Data yang akan digunakan adalah data loan yang bersumber dari data kaggle
loan<- read.csv("loan.csv")
loan$Var_baru <- 1:61
head(loan, 3)
## age gender occupation education_level marital_status income credit_score
## 1 32 Male Engineer Bachelor's Married 85000 720
## 2 45 Female Teacher Master's Single 62000 680
## 3 28 Male Student High School Single 25000 590
## loan_status Var_baru
## 1 Approved 1
## 2 Approved 2
## 3 Denied 3
loan[,"Var_baru"] <- 1:61
head(loan, 3)
## age gender occupation education_level marital_status income credit_score
## 1 32 Male Engineer Bachelor's Married 85000 720
## 2 45 Female Teacher Master's Single 62000 680
## 3 28 Male Student High School Single 25000 590
## loan_status Var_baru
## 1 Approved 1
## 2 Approved 2
## 3 Denied 3
loan <- loan[, 1:5]
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
loan %>%
mutate(Urutan = 1:61) %>%
head(3)
## age gender occupation education_level marital_status Urutan
## 1 32 Male Engineer Bachelor's Married 1
## 2 45 Female Teacher Master's Single 2
## 3 28 Male Student High School Single 3
loan_marital_status_base <-loan[loan$marital_status=="Married", ]
head(loan_marital_status_base, 10)
## age gender occupation education_level marital_status
## 1 32 Male Engineer Bachelor's Married
## 4 51 Female Manager Bachelor's Married
## 5 36 Male Accountant Bachelor's Married
## 7 42 Male Lawyer Doctoral Married
## 9 37 Male IT Master's Married
## 10 48 Female Doctor Doctoral Married
## 11 55 Male Consultant Master's Married
## 14 39 Female Marketing Bachelor's Married
## 15 44 Male Architect Master's Married
## 17 34 Male Engineer Bachelor's Married
loan %>%
filter(marital_status=="Single") %>%
head(4)
## age gender occupation education_level marital_status
## 1 45 Female Teacher Master's Single
## 2 28 Male Student High School Single
## 3 24 Female Nurse Associate's Single
## 4 29 Female Artist Bachelor's Single
head(loan[, c(2, 4, 5)], 3)
## gender education_level marital_status
## 1 Male Bachelor's Married
## 2 Female Master's Single
## 3 Male High School Single
head(loan[, c("gender", "education_level", "marital_status")], 3)
## gender education_level marital_status
## 1 Male Bachelor's Married
## 2 Female Master's Single
## 3 Male High School Single
loan %>%
select(gender, education_level, marital_status) %>%
head (3)
## gender education_level marital_status
## 1 Male Bachelor's Married
## 2 Female Master's Single
## 3 Male High School Single
loan[loan$occupation == "Teacher" & loan$gender== "Female",]
## age gender occupation education_level marital_status
## 2 45 Female Teacher Master's Single
## 42 30 Female Teacher Master's Single
loan %>%
filter(occupation == "Teacher" & loan$gender== "Female") %>%
head(2)
## age gender occupation education_level marital_status
## 1 45 Female Teacher Master's Single
## 2 30 Female Teacher Master's Single
loan[loan$education_level == "Bachelor's" & loan$age > 20, ]
## age gender occupation education_level marital_status
## 1 32 Male Engineer Bachelor's Married
## 4 51 Female Manager Bachelor's Married
## 5 36 Male Accountant Bachelor's Married
## 8 29 Female Artist Bachelor's Single
## 12 31 Female Analyst Bachelor's Single
## 14 39 Female Marketing Bachelor's Married
## 16 27 Female Designer Bachelor's Single
## 17 34 Male Engineer Bachelor's Married
## 21 38 Male Pilot Bachelor's Married
## 24 33 Female Writer Bachelor's Single
## 27 35 Male Sales Bachelor's Married
## 30 49 Female Realtor Bachelor's Married
## 33 32 Male Programmer Bachelor's Married
## 35 29 Male Musician Bachelor's Single
## 39 40 Male Software Bachelor's Married
## 41 38 Male Accountant Bachelor's Married
## 43 45 Male Manager Bachelor's Married
## 47 31 Male Analyst Bachelor's Single
## 49 34 Male IT Bachelor's Married
## 50 27 Female Artist Bachelor's Single
## 52 29 Female Designer Bachelor's Single
## 53 37 Male Engineer Bachelor's Married
## 59 43 Male Banker Bachelor's Married
loan %>%
filter(education_level == "Bachelor's" & occupation == "Engineer") %>%
head()
## age gender occupation education_level marital_status
## 1 32 Male Engineer Bachelor's Married
## 2 34 Male Engineer Bachelor's Married
## 3 37 Male Engineer Bachelor's Married
loan %>%
filter(education_level == "Bachelor's" & marital_status == "Single") %>%
head()
## age gender occupation education_level marital_status
## 1 29 Female Artist Bachelor's Single
## 2 31 Female Analyst Bachelor's Single
## 3 27 Female Designer Bachelor's Single
## 4 33 Female Writer Bachelor's Single
## 5 29 Male Musician Bachelor's Single
## 6 31 Male Analyst Bachelor's Single
idx <- loan$age %in% c(33, 37, 44)
loan[idx, "age"]
## [1] 37 44 33 33 37 44
loan %>%
filter(age %in% c(33, 37, 44))
## age gender occupation education_level marital_status
## 1 37 Male IT Master's Married
## 2 44 Male Architect Master's Married
## 3 33 Female Writer Bachelor's Single
## 4 33 Female Stylist Associate's Single
## 5 37 Male Engineer Bachelor's Married
## 6 44 Female Marketing Master's Married
Sorting data berfungsi untuk mengurutkan data sesuai dengan variabel tertentu dalam data frame. * Secara Ascending (Terkecil ke Terbesar) Menggunakan Syntax Base
head(loan[order(loan$age), "age"], 5)
## [1] 24 25 25 26 26
loan %>%
arrange(age) %>%
head(5)
## age gender occupation education_level marital_status
## 1 24 Female Nurse Associate's Single
## 2 25 Female Receptionist High School Single
## 3 25 Female Receptionist High School Single
## 4 26 Male Salesman High School Single
## 5 26 Female Server High School Single
head(loan[order(-loan$age), "age"], 5)
## [1] 55 54 53 52 51
loan %>%
arrange(desc(age)) %>%
head(5)
## age gender occupation education_level marital_status
## 1 55 Male Consultant Master's Married
## 2 54 Female Editor Master's Married
## 3 53 Female Professor Doctoral Married
## 4 52 Female Psychologist Doctoral Married
## 5 51 Female Manager Bachelor's Married
idx1 <- order(loan$gender, decreasing=TRUE)
loan2 <- loan[idx1, ]
idx2 <- order(loan2$occupation)
loan3 <- loan2[idx2, ]
head(loan3[, c("gender", "occupation")])
## gender occupation
## 5 Male Accountant
## 41 Male Accountant
## 47 Male Analyst
## 12 Female Analyst
## 15 Male Architect
## 57 Male Architect
loan %>%
arrange(desc(gender), occupation) %>%
select(gender,occupation) %>%
head(6)
## gender occupation
## 1 Male Accountant
## 2 Male Accountant
## 3 Male Analyst
## 4 Male Architect
## 5 Male Architect
## 6 Male Banker