Loading Libraries:
knitr::opts_chunk$set(warning = FALSE, fig.align = "center", out.width = "85%",
message = FALSE, cache = TRUE)
library(openintro)
library(tidyverse)
library(feather)
Lab 1: read a csv file into a tibble from your
disk.
setwd("~/Desktop")
bank_data <- read_csv("BankChurners.csv")
glimpse(bank_data)
## Rows: 10,127
## Columns: 23
## $ CLIENTNUM <dbl> …
## $ Attrition_Flag <chr> …
## $ Customer_Age <dbl> …
## $ Gender <chr> …
## $ Dependent_count <dbl> …
## $ Education_Level <chr> …
## $ Marital_Status <chr> …
## $ Income_Category <chr> …
## $ Card_Category <chr> …
## $ Months_on_book <dbl> …
## $ Total_Relationship_Count <dbl> …
## $ Months_Inactive_12_mon <dbl> …
## $ Contacts_Count_12_mon <dbl> …
## $ Credit_Limit <dbl> …
## $ Total_Revolving_Bal <dbl> …
## $ Avg_Open_To_Buy <dbl> …
## $ Total_Amt_Chng_Q4_Q1 <dbl> …
## $ Total_Trans_Amt <dbl> …
## $ Total_Trans_Ct <dbl> …
## $ Total_Ct_Chng_Q4_Q1 <dbl> …
## $ Avg_Utilization_Ratio <dbl> …
## $ Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1 <dbl> …
## $ Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2 <dbl> …
Lab 2: “John Smith WA
418-Y11-4111
Mary Hartford CA 319-Z19-4341
Evan Nolan IL 219-532-c301”
1. Try to use read_csv to read the following text. What do
you get?
read_csv("John Smith WA 418-Y11-4111
Mary Hartford CA 319-Z19-4341
Evan Nolan IL 219-532-c301", col_names = F)
## # A tibble: 3 × 1
## X1
## <chr>
## 1 John Smith WA 418-Y11-4111
## 2 Mary Hartford CA 319-Z19-4341
## 3 Evan Nolan IL 219-532-c301
2. Try to use read_delim to read the same text. What do you
get?
read_delim("John Smith WA 418-Y11-4111
Mary Hartford CA 319-Z19-4341
Evan Nolan IL 219-532-c301")
## # A tibble: 2 × 20
## John Smith ...3 ...4 ...5 ...6 ...7 ...8 ...9 ...10 ...11 WA ...13
## <chr> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <chr> <lgl> <lgl> <chr> <lgl>
## 1 Mary Hartf… NA NA NA NA NA NA CA NA NA <NA> NA
## 2 Evan Nolan NA NA NA NA NA NA <NA> NA NA IL NA
## # ℹ 7 more variables: ...14 <lgl>, ...15 <lgl>, ...16 <lgl>, ...17 <chr>,
## # ...18 <lgl>, ...19 <lgl>, `418-Y11-4111` <chr>
Lab 3: Divide the bank customer data into two parts - data
for female customers only and for male customers only. Save them into
two different files “female_bank_data.rds” and
“male_bank_data.rds”.
Bank_F <- bank_data %>%
filter(Gender == "F")
write_rds(Bank_F, 'female_bank_data.rds')
read_rds('female_bank_data.rds')
## # A tibble: 5,358 × 23
## CLIENTNUM Attrition_Flag Customer_Age Gender Dependent_count Education_Level
## <dbl> <chr> <dbl> <chr> <dbl> <chr>
## 1 818770008 Existing Custo… 49 F 5 Graduate
## 2 769911858 Existing Custo… 40 F 4 High School
## 3 712396908 Existing Custo… 57 F 2 Graduate
## 4 709327383 Existing Custo… 45 F 2 Graduate
## 5 708508758 Attrited Custo… 62 F 0 Graduate
## 6 811604133 Existing Custo… 47 F 4 Unknown
## 7 771071958 Existing Custo… 41 F 3 Graduate
## 8 718813833 Existing Custo… 44 F 3 Uneducated
## 9 788658483 Existing Custo… 53 F 2 College
## 10 715318008 Existing Custo… 55 F 1 College
## # ℹ 5,348 more rows
## # ℹ 17 more variables: Marital_Status <chr>, Income_Category <chr>,
## # Card_Category <chr>, Months_on_book <dbl>, Total_Relationship_Count <dbl>,
## # Months_Inactive_12_mon <dbl>, Contacts_Count_12_mon <dbl>,
## # Credit_Limit <dbl>, Total_Revolving_Bal <dbl>, Avg_Open_To_Buy <dbl>,
## # Total_Amt_Chng_Q4_Q1 <dbl>, Total_Trans_Amt <dbl>, Total_Trans_Ct <dbl>,
## # Total_Ct_Chng_Q4_Q1 <dbl>, Avg_Utilization_Ratio <dbl>, …
Bank_M <- bank_data %>%
filter(Gender == "M")
write_rds(Bank_M, 'male_bank_data.rds')
read_rds('male_bank_data.rds')
## # A tibble: 4,769 × 23
## CLIENTNUM Attrition_Flag Customer_Age Gender Dependent_count Education_Level
## <dbl> <chr> <dbl> <chr> <dbl> <chr>
## 1 768805383 Existing Custo… 45 M 3 High School
## 2 713982108 Existing Custo… 51 M 3 Graduate
## 3 709106358 Existing Custo… 40 M 3 Uneducated
## 4 713061558 Existing Custo… 44 M 2 Graduate
## 5 810347208 Existing Custo… 51 M 4 Unknown
## 6 818906208 Existing Custo… 32 M 0 High School
## 7 710930508 Existing Custo… 37 M 3 Uneducated
## 8 719661558 Existing Custo… 48 M 2 Graduate
## 9 708790833 Existing Custo… 42 M 5 Uneducated
## 10 710821833 Existing Custo… 65 M 1 Unknown
## # ℹ 4,759 more rows
## # ℹ 17 more variables: Marital_Status <chr>, Income_Category <chr>,
## # Card_Category <chr>, Months_on_book <dbl>, Total_Relationship_Count <dbl>,
## # Months_Inactive_12_mon <dbl>, Contacts_Count_12_mon <dbl>,
## # Credit_Limit <dbl>, Total_Revolving_Bal <dbl>, Avg_Open_To_Buy <dbl>,
## # Total_Amt_Chng_Q4_Q1 <dbl>, Total_Trans_Amt <dbl>, Total_Trans_Ct <dbl>,
## # Total_Ct_Chng_Q4_Q1 <dbl>, Avg_Utilization_Ratio <dbl>, …