library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
setwd("~/Data Study/Data 101 MC/Week6")
telecom <- read_csv("telecom.csv")
## Rows: 10 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): customerID, TotalCharges, PaymentMethod, Churn
## dbl (1): MonthlyCharges
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Take a quick look around the data (using functions str, dim, etc.)
str(telecom)
## spec_tbl_df [10 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ customerID : chr [1:10] "7590-VHVEG" "5575-GNVDE" "3668-QPYBK" "7795-CFOCW" ...
## $ MonthlyCharges: num [1:10] 29.9 57 NA 42.3 70.7 ...
## $ TotalCharges : chr [1:10] "109.9" "na" "108.15" "1840.75" ...
## $ PaymentMethod : chr [1:10] "Electronic check" "Mailed check" "--" "Bank transfer" ...
## $ Churn : chr [1:10] "yes" "yes" "yes" "no" ...
## - attr(*, "spec")=
## .. cols(
## .. customerID = col_character(),
## .. MonthlyCharges = col_double(),
## .. TotalCharges = col_character(),
## .. PaymentMethod = col_character(),
## .. Churn = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
dim(telecom)
## [1] 10 5
Review question: print out the table of value counts for the Chrun variable.
count(telecom,Churn)
## # A tibble: 2 × 2
## Churn n
## <chr> <int>
## 1 no 5
## 2 yes 5
Print out to the screen (do not save to a new data frame) the rows where MonthlyCharges > 55
filter(telecom, MonthlyCharges>55)
## # A tibble: 4 × 5
## customerID MonthlyCharges TotalCharges PaymentMethod Churn
## <chr> <dbl> <chr> <chr> <chr>
## 1 5575-GNVDE 57.0 na Mailed check yes
## 2 9237-HQITU 70.7 <NA> Electronic check no
## 3 1452-KIOVK 89.1 1949.4 Credit card no
## 4 7892-POOKP 105. 3046.05 Electronic check no
Change the data types of variables customerID and TotalCharges to appropriate types
telecom %>%
mutate_at(c("TotalCharges"), as.numeric)
## Warning in mask$eval_all_mutate(quo): 강제형변환에 의해 생성된 NA 입니다
## # A tibble: 10 × 5
## customerID MonthlyCharges TotalCharges PaymentMethod Churn
## <chr> <dbl> <dbl> <chr> <chr>
## 1 7590-VHVEG 29.8 110. Electronic check yes
## 2 5575-GNVDE 57.0 NA Mailed check yes
## 3 3668-QPYBK NA 108. -- yes
## 4 7795-CFOCW 42.3 1841. Bank transfer no
## 5 9237-HQITU 70.7 NA Electronic check no
## 6 9305-CDSKC NaN 820. -- yes
## 7 1452-KIOVK 89.1 1949. Credit card no
## 8 6713-OKOMC NA NA <NA> yes
## 9 7892-POOKP 105. 3046. Electronic check no
## 10 8451-AJOMK 54.1 355. Electronic check no
Change all of the missing or not available data to “NA”
telecom <- telecom %>%
mutate_all(na_if,"--")
telecom
## # A tibble: 10 × 5
## customerID MonthlyCharges TotalCharges PaymentMethod Churn
## <chr> <dbl> <chr> <chr> <chr>
## 1 7590-VHVEG 29.8 109.9 Electronic check yes
## 2 5575-GNVDE 57.0 na Mailed check yes
## 3 3668-QPYBK NA 108.15 <NA> yes
## 4 7795-CFOCW 42.3 1840.75 Bank transfer no
## 5 9237-HQITU 70.7 <NA> Electronic check no
## 6 9305-CDSKC NaN 820.5 <NA> yes
## 7 1452-KIOVK 89.1 1949.4 Credit card no
## 8 6713-OKOMC NA N/A <NA> yes
## 9 7892-POOKP 105. 3046.05 Electronic check no
## 10 8451-AJOMK 54.1 354.95 Electronic check no
Print out to the screen (do not save tot a new data frame) the data with any rows containing “NA” removed
telecom[rowSums(is.na(telecom))>0,]
## # A tibble: 4 × 5
## customerID MonthlyCharges TotalCharges PaymentMethod Churn
## <chr> <dbl> <chr> <chr> <chr>
## 1 3668-QPYBK NA 108.15 <NA> yes
## 2 9237-HQITU 70.7 <NA> Electronic check no
## 3 9305-CDSKC NaN 820.5 <NA> yes
## 4 6713-OKOMC NA N/A <NA> yes
Drop the Churn column from the data frame
telecomNoChurn <- select(telecom,-Churn)
telecomNoChurn
## # A tibble: 10 × 4
## customerID MonthlyCharges TotalCharges PaymentMethod
## <chr> <dbl> <chr> <chr>
## 1 7590-VHVEG 29.8 109.9 Electronic check
## 2 5575-GNVDE 57.0 na Mailed check
## 3 3668-QPYBK NA 108.15 <NA>
## 4 7795-CFOCW 42.3 1840.75 Bank transfer
## 5 9237-HQITU 70.7 <NA> Electronic check
## 6 9305-CDSKC NaN 820.5 <NA>
## 7 1452-KIOVK 89.1 1949.4 Credit card
## 8 6713-OKOMC NA N/A <NA>
## 9 7892-POOKP 105. 3046.05 Electronic check
## 10 8451-AJOMK 54.1 354.95 Electronic check