Day 4 HW

telecom_data_dictionary    <- read.csv("telecom_data_dictionary.csv",    stringsAsFactors = FALSE)
telecom_customer_churn     <- read.csv("telecom_customer_churn.csv",     stringsAsFactors = FALSE)
telecom_zipcode_population <- read.csv("telecom_zipcode_population.csv", stringsAsFactors = FALSE)
names(telecom_customer_churn)
 [1] "Customer.ID"                       "Gender"                           
 [3] "Age"                               "Married"                          
 [5] "Number.of.Dependents"              "City"                             
 [7] "Zip.code"                          "Latitude"                         
 [9] "Longitude"                         "Number.of.Referrals"              
[11] "Tenure.in.Months"                  "Offer"                            
[13] "Phone.Service"                     "Avg.Monthly.Long.Distance.Charges"
[15] "Multiple.Lines"                    "Internet.Service"                 
[17] "Internet.Type"                     "Avg.Monthly.GB.Download"          
[19] "Online.Security"                   "Online.Backup"                    
[21] "Device.Protection.Plan"            "Premium.Tech.Support"             
[23] "Streaming.TV"                      "Streaming.Movies"                 
[25] "Streaming.Music"                   "Unlimited.Data"                   
[27] "Contract"                          "Paperless.Billing"                
[29] "Payment.Method"                    "Monthly.Charge"                   
[31] "Total.Charges"                     "Total.Refunds"                    
[33] "Total.Extra.Data.Charges"          "Total.Long.Distance.Charges"      
[35] "Total.Revenue"                     "Customer.Status"                  
[37] "Churn.Category"                    "Churn.Reason"                     
names(telecom_zipcode_population)
[1] "Zip.Code"   "Population"
common_key <- intersect(
  names(telecom_customer_churn),
  names(telecom_zipcode_population)
)

telecom_merged <- merge(
  x  = telecom_customer_churn,
  y  = telecom_zipcode_population,
  by = common_key,
  all.x = TRUE
)
summary(telecom_merged)
 Customer.ID           Gender               Age          Married         
 Length:11768853    Length:11768853    Min.   :19.00   Length:11768853   
 Class :character   Class :character   1st Qu.:32.00   Class :character  
 Mode  :character   Mode  :character   Median :46.00   Mode  :character  
                                       Mean   :46.51                     
                                       3rd Qu.:60.00                     
                                       Max.   :80.00                     
                                                                         
 Number.of.Dependents     City              Zip.code        Latitude    
 Min.   :0.0000       Length:11768853    Min.   :90001   Min.   :32.56  
 1st Qu.:0.0000       Class :character   1st Qu.:92101   1st Qu.:33.99  
 Median :0.0000       Mode  :character   Median :93518   Median :36.21  
 Mean   :0.4687                          Mean   :93486   Mean   :36.20  
 3rd Qu.:0.0000                          3rd Qu.:95329   3rd Qu.:38.16  
 Max.   :9.0000                          Max.   :96150   Max.   :41.96  
                                                                        
   Longitude      Number.of.Referrals Tenure.in.Months    Offer          
 Min.   :-124.3   Min.   : 0.000      Min.   : 1.00    Length:11768853   
 1st Qu.:-121.8   1st Qu.: 0.000      1st Qu.: 9.00    Class :character  
 Median :-119.6   Median : 0.000      Median :29.00    Mode  :character  
 Mean   :-119.8   Mean   : 1.952      Mean   :32.39                      
 3rd Qu.:-118.0   3rd Qu.: 3.000      3rd Qu.:55.00                      
 Max.   :-114.2   Max.   :11.000      Max.   :72.00                      
                                                                         
 Phone.Service      Avg.Monthly.Long.Distance.Charges Multiple.Lines    
 Length:11768853    Min.   : 1.01                     Length:11768853   
 Class :character   1st Qu.:13.05                     Class :character  
 Mode  :character   Median :25.69                     Mode  :character  
                    Mean   :25.42                                       
                    3rd Qu.:37.68                                       
                    Max.   :49.99                                       
                    NA's   :1139622                                     
 Internet.Service   Internet.Type      Avg.Monthly.GB.Download
 Length:11768853    Length:11768853    Min.   : 2.00          
 Class :character   Class :character   1st Qu.:13.00          
 Mode  :character   Mode  :character   Median :21.00          
                                       Mean   :26.19          
                                       3rd Qu.:30.00          
                                       Max.   :85.00          
                                       NA's   :2549946        
 Online.Security    Online.Backup      Device.Protection.Plan
 Length:11768853    Length:11768853    Length:11768853       
 Class :character   Class :character   Class :character      
 Mode  :character   Mode  :character   Mode  :character      
                                                             
                                                             
                                                             
                                                             
 Premium.Tech.Support Streaming.TV       Streaming.Movies   Streaming.Music   
 Length:11768853      Length:11768853    Length:11768853    Length:11768853   
 Class :character     Class :character   Class :character   Class :character  
 Mode  :character     Mode  :character   Mode  :character   Mode  :character  
                                                                              
                                                                              
                                                                              
                                                                              
 Unlimited.Data       Contract         Paperless.Billing  Payment.Method    
 Length:11768853    Length:11768853    Length:11768853    Length:11768853   
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
 Monthly.Charge   Total.Charges    Total.Refunds    Total.Extra.Data.Charges
 Min.   :-10.00   Min.   :  18.8   Min.   : 0.000   Min.   :  0.000         
 1st Qu.: 30.40   1st Qu.: 400.0   1st Qu.: 0.000   1st Qu.:  0.000         
 Median : 70.05   Median :1394.5   Median : 0.000   Median :  0.000         
 Mean   : 63.60   Mean   :2280.4   Mean   : 1.962   Mean   :  6.861         
 3rd Qu.: 89.75   3rd Qu.:3789.2   3rd Qu.: 0.000   3rd Qu.:  0.000         
 Max.   :118.75   Max.   :8684.8   Max.   :49.790   Max.   :150.000         
                                                                            
 Total.Long.Distance.Charges Total.Revenue      Customer.Status   
 Min.   :   0.0              Min.   :   21.36   Length:11768853   
 1st Qu.:  70.5              1st Qu.:  605.47   Class :character  
 Median : 401.4              Median : 2108.64   Mode  :character  
 Mean   : 749.1              Mean   : 3034.38                     
 3rd Qu.:1191.5              3rd Qu.: 4801.93                     
 Max.   :3564.7              Max.   :11979.34                     
                                                                  
 Churn.Category     Churn.Reason          Zip.Code       Population    
 Length:11768853    Length:11768853    Min.   :90001   Min.   :    11  
 Class :character   Class :character   1st Qu.:92268   1st Qu.:  1785  
 Mode  :character   Mode  :character   Median :93664   Median : 14239  
                                       Mean   :93679   Mean   : 20276  
                                       3rd Qu.:95409   3rd Qu.: 32984  
                                       Max.   :96161   Max.   :105285  
                                                                       
avg_table <- telecom_merged %>%
  group_by(Zip.Code) %>%
  summarise(
    Avg_Population = mean(Population, na.rm = TRUE),
    Avg_Age        = mean(Age,        na.rm = TRUE)
  ) %>%
  arrange(Zip.Code)

print(avg_table)
# A tibble: 1,671 × 3
   Zip.Code Avg_Population Avg_Age
      <int>          <dbl>   <dbl>
 1    90001          54492    46.5
 2    90002          44586    46.5
 3    90003          58198    46.5
 4    90004          67852    46.5
 5    90005          43019    46.5
 6    90006          62784    46.5
 7    90007          45025    46.5
 8    90008          30852    46.5
 9    90010           1957    46.5
10    90011         101215    46.5
# ℹ 1,661 more rows