1. Input Data

Data yang akan digunakan adalah data loan yang bersumber dari data kaggle

loan<- read.csv("loan.csv")

2. Membuat Peubah Baru

loan$Var_baru <- 1:61
head(loan, 3)
##   age gender occupation education_level marital_status income credit_score
## 1  32   Male   Engineer      Bachelor's        Married  85000          720
## 2  45 Female    Teacher        Master's         Single  62000          680
## 3  28   Male    Student     High School         Single  25000          590
##   loan_status Var_baru
## 1    Approved        1
## 2    Approved        2
## 3      Denied        3
loan[,"Var_baru"] <- 1:61
head(loan, 3)
##   age gender occupation education_level marital_status income credit_score
## 1  32   Male   Engineer      Bachelor's        Married  85000          720
## 2  45 Female    Teacher        Master's         Single  62000          680
## 3  28   Male    Student     High School         Single  25000          590
##   loan_status Var_baru
## 1    Approved        1
## 2    Approved        2
## 3      Denied        3
loan <- loan[, 1:5] 
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
loan %>%
  mutate(Urutan = 1:61) %>%
  head(3)
##   age gender occupation education_level marital_status Urutan
## 1  32   Male   Engineer      Bachelor's        Married      1
## 2  45 Female    Teacher        Master's         Single      2
## 3  28   Male    Student     High School         Single      3

3. Subset Data

loan_marital_status_base <-loan[loan$marital_status=="Married", ]
head(loan_marital_status_base, 10) 
##    age gender occupation education_level marital_status
## 1   32   Male   Engineer      Bachelor's        Married
## 4   51 Female    Manager      Bachelor's        Married
## 5   36   Male Accountant      Bachelor's        Married
## 7   42   Male     Lawyer        Doctoral        Married
## 9   37   Male         IT        Master's        Married
## 10  48 Female     Doctor        Doctoral        Married
## 11  55   Male Consultant        Master's        Married
## 14  39 Female  Marketing      Bachelor's        Married
## 15  44   Male  Architect        Master's        Married
## 17  34   Male   Engineer      Bachelor's        Married
loan %>%
  filter(marital_status=="Single") %>%
head(4) 
##   age gender occupation education_level marital_status
## 1  45 Female    Teacher        Master's         Single
## 2  28   Male    Student     High School         Single
## 3  24 Female      Nurse     Associate's         Single
## 4  29 Female     Artist      Bachelor's         Single
head(loan[, c(2, 4, 5)], 3)
##   gender education_level marital_status
## 1   Male      Bachelor's        Married
## 2 Female        Master's         Single
## 3   Male     High School         Single
head(loan[, c("gender", "education_level", "marital_status")], 3) 
##   gender education_level marital_status
## 1   Male      Bachelor's        Married
## 2 Female        Master's         Single
## 3   Male     High School         Single
loan %>%
  select(gender, education_level, marital_status) %>%
  head (3)
##   gender education_level marital_status
## 1   Male      Bachelor's        Married
## 2 Female        Master's         Single
## 3   Male     High School         Single
loan[loan$occupation == "Teacher" & loan$gender== "Female",]
##    age gender occupation education_level marital_status
## 2   45 Female    Teacher        Master's         Single
## 42  30 Female    Teacher        Master's         Single
loan %>%
  filter(occupation == "Teacher" & loan$gender== "Female") %>%
  head(2)
##   age gender occupation education_level marital_status
## 1  45 Female    Teacher        Master's         Single
## 2  30 Female    Teacher        Master's         Single
loan[loan$education_level == "Bachelor's" & loan$age > 20, ]
##    age gender occupation education_level marital_status
## 1   32   Male   Engineer      Bachelor's        Married
## 4   51 Female    Manager      Bachelor's        Married
## 5   36   Male Accountant      Bachelor's        Married
## 8   29 Female     Artist      Bachelor's         Single
## 12  31 Female    Analyst      Bachelor's         Single
## 14  39 Female  Marketing      Bachelor's        Married
## 16  27 Female   Designer      Bachelor's         Single
## 17  34   Male   Engineer      Bachelor's        Married
## 21  38   Male      Pilot      Bachelor's        Married
## 24  33 Female     Writer      Bachelor's         Single
## 27  35   Male      Sales      Bachelor's        Married
## 30  49 Female    Realtor      Bachelor's        Married
## 33  32   Male Programmer      Bachelor's        Married
## 35  29   Male   Musician      Bachelor's         Single
## 39  40   Male   Software      Bachelor's        Married
## 41  38   Male Accountant      Bachelor's        Married
## 43  45   Male    Manager      Bachelor's        Married
## 47  31   Male    Analyst      Bachelor's         Single
## 49  34   Male         IT      Bachelor's        Married
## 50  27 Female     Artist      Bachelor's         Single
## 52  29 Female   Designer      Bachelor's         Single
## 53  37   Male   Engineer      Bachelor's        Married
## 59  43   Male     Banker      Bachelor's        Married
loan %>%
  filter(education_level == "Bachelor's" & occupation == "Engineer") %>%
  head()
##   age gender occupation education_level marital_status
## 1  32   Male   Engineer      Bachelor's        Married
## 2  34   Male   Engineer      Bachelor's        Married
## 3  37   Male   Engineer      Bachelor's        Married
loan %>%
  filter(education_level == "Bachelor's" & marital_status == "Single") %>%
  head()
##   age gender occupation education_level marital_status
## 1  29 Female     Artist      Bachelor's         Single
## 2  31 Female    Analyst      Bachelor's         Single
## 3  27 Female   Designer      Bachelor's         Single
## 4  33 Female     Writer      Bachelor's         Single
## 5  29   Male   Musician      Bachelor's         Single
## 6  31   Male    Analyst      Bachelor's         Single
idx <- loan$age %in% c(33, 37, 44)
loan[idx, "age"]
## [1] 37 44 33 33 37 44
loan %>%
  filter(age %in% c(33, 37, 44))
##   age gender occupation education_level marital_status
## 1  37   Male         IT        Master's        Married
## 2  44   Male  Architect        Master's        Married
## 3  33 Female     Writer      Bachelor's         Single
## 4  33 Female    Stylist     Associate's         Single
## 5  37   Male   Engineer      Bachelor's        Married
## 6  44 Female  Marketing        Master's        Married

4. Sorting Data

Sorting data berfungsi untuk mengurutkan data sesuai dengan variabel tertentu dalam data frame. * Secara Ascending (Terkecil ke Terbesar) Menggunakan Syntax Base

head(loan[order(loan$age), "age"], 5)
## [1] 24 25 25 26 26
loan %>%
  arrange(age) %>%
  head(5)
##   age gender   occupation education_level marital_status
## 1  24 Female        Nurse     Associate's         Single
## 2  25 Female Receptionist     High School         Single
## 3  25 Female Receptionist     High School         Single
## 4  26   Male     Salesman     High School         Single
## 5  26 Female       Server     High School         Single
head(loan[order(-loan$age), "age"], 5)
## [1] 55 54 53 52 51
loan %>%
  arrange(desc(age)) %>%
  head(5) 
##   age gender   occupation education_level marital_status
## 1  55   Male   Consultant        Master's        Married
## 2  54 Female       Editor        Master's        Married
## 3  53 Female    Professor        Doctoral        Married
## 4  52 Female Psychologist        Doctoral        Married
## 5  51 Female      Manager      Bachelor's        Married

4.1 Menggunakan Ascending dan Descending Bersamaan

idx1 <- order(loan$gender, decreasing=TRUE)
loan2 <- loan[idx1, ]

idx2 <- order(loan2$occupation)
loan3 <- loan2[idx2, ]
head(loan3[, c("gender", "occupation")])
##    gender occupation
## 5    Male Accountant
## 41   Male Accountant
## 47   Male    Analyst
## 12 Female    Analyst
## 15   Male  Architect
## 57   Male  Architect
loan %>%
  arrange(desc(gender), occupation) %>%
  select(gender,occupation) %>%
  head(6)
##   gender occupation
## 1   Male Accountant
## 2   Male Accountant
## 3   Male    Analyst
## 4   Male  Architect
## 5   Male  Architect
## 6   Male     Banker