remove(list = ls()) # clear environment
zipcode_population <- telecom_zipcode_population <- read.csv("~/Downloads/telecom_zipcode_population.csv")
customer_churn <- telecom_customer_churn <- read.csv("~/Downloads/telecom_customer_churn.csv")
data_dictionary <- read.csv("~/Downloads/telecom_data_dictionary.csv", comment.char="#")

# importing all the datasets
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
head(data_dictionary)
##            Table                Field
## 1 Customer Churn           CustomerID
## 2 Customer Churn               Gender
## 3 Customer Churn                  Age
## 4 Customer Churn              Married
## 5 Customer Churn Number of Dependents
## 6 Customer Churn                 City
##                                                                                                                  Description
## 1                                                                                  A unique ID that identifies each customer
## 2                                                                                     The customer\x92s gender: Male, Female
## 3                                    The customer\x92s current age, in years, at the time the fiscal quarter ended (Q2 2022)
## 4                                                                              Indicates if the customer is married: Yes, No
## 5 Indicates the number of dependents that live with the customer (dependents could be children, parents, grandparents, etc.)
## 6                                                              The city of the customer\x92s primary residence in California
head(customer_churn)
##   Customer.ID Gender Age Married Number.of.Dependents         City Zip.code
## 1  0002-ORFBO Female  37     Yes                    0 Frazier Park    93225
## 2  0003-MKNFE   Male  46      No                    0     Glendale    91206
## 3  0004-TLHLJ   Male  50      No                    0   Costa Mesa    92627
## 4  0011-IGKFF   Male  78     Yes                    0     Martinez    94553
## 5  0013-EXCHZ Female  75     Yes                    0    Camarillo    93010
## 6  0013-MHZWF Female  23      No                    3     Midpines    95345
##   Latitude Longitude Number.of.Referrals Tenure.in.Months   Offer Phone.Service
## 1 34.82766 -118.9991                   2                9    None           Yes
## 2 34.16251 -118.2039                   0                9    None           Yes
## 3 33.64567 -117.9226                   0                4 Offer E           Yes
## 4 38.01446 -122.1154                   1               13 Offer D           Yes
## 5 34.22785 -119.0799                   3                3    None           Yes
## 6 37.58150 -119.9728                   0                9 Offer E           Yes
##   Avg.Monthly.Long.Distance.Charges Multiple.Lines Internet.Service
## 1                             42.39             No              Yes
## 2                             10.69            Yes              Yes
## 3                             33.65             No              Yes
## 4                             27.82             No              Yes
## 5                              7.38             No              Yes
## 6                             16.77             No              Yes
##   Internet.Type Avg.Monthly.GB.Download Online.Security Online.Backup
## 1         Cable                      16              No           Yes
## 2         Cable                      10              No            No
## 3   Fiber Optic                      30              No            No
## 4   Fiber Optic                       4              No           Yes
## 5   Fiber Optic                      11              No            No
## 6         Cable                      73              No            No
##   Device.Protection.Plan Premium.Tech.Support Streaming.TV Streaming.Movies
## 1                     No                  Yes          Yes               No
## 2                     No                   No           No              Yes
## 3                    Yes                   No           No               No
## 4                    Yes                   No          Yes              Yes
## 5                     No                  Yes          Yes               No
## 6                     No                  Yes          Yes              Yes
##   Streaming.Music Unlimited.Data       Contract Paperless.Billing
## 1              No            Yes       One Year               Yes
## 2             Yes             No Month-to-Month                No
## 3              No            Yes Month-to-Month               Yes
## 4              No            Yes Month-to-Month               Yes
## 5              No            Yes Month-to-Month               Yes
## 6             Yes            Yes Month-to-Month               Yes
##    Payment.Method Monthly.Charge Total.Charges Total.Refunds
## 1     Credit Card           65.6        593.30          0.00
## 2     Credit Card           -4.0        542.40         38.33
## 3 Bank Withdrawal           73.9        280.85          0.00
## 4 Bank Withdrawal           98.0       1237.85          0.00
## 5     Credit Card           83.9        267.40          0.00
## 6     Credit Card           69.4        571.45          0.00
##   Total.Extra.Data.Charges Total.Long.Distance.Charges Total.Revenue
## 1                        0                      381.51        974.81
## 2                       10                       96.21        610.28
## 3                        0                      134.60        415.45
## 4                        0                      361.66       1599.51
## 5                        0                       22.14        289.54
## 6                        0                      150.93        722.38
##   Customer.Status  Churn.Category                  Churn.Reason
## 1          Stayed                                              
## 2          Stayed                                              
## 3         Churned      Competitor Competitor had better devices
## 4         Churned Dissatisfaction       Product dissatisfaction
## 5         Churned Dissatisfaction           Network reliability
## 6          Stayed
head(zipcode_population)
##   Zip.Code Population
## 1    90001      54492
## 2    90002      44586
## 3    90003      58198
## 4    90004      67852
## 5    90005      43019
## 6    90006      62784
# merged data into summary_stats

summary_stats <- customer_churn |>
  left_join(zipcode_population, by = c("Zip.code" = "Zip.Code")) |>
  summary()
print(summary_stats)
##  Customer.ID           Gender               Age          Married         
##  Length:7043        Length:7043        Min.   :19.00   Length:7043       
##  Class :character   Class :character   1st Qu.:32.00   Class :character  
##  Mode  :character   Mode  :character   Median :46.00   Mode  :character  
##                                        Mean   :46.51                     
##                                        3rd Qu.:60.00                     
##                                        Max.   :80.00                     
##                                                                          
##  Number.of.Dependents     City              Zip.code        Latitude    
##  Min.   :0.0000       Length:7043        Min.   :90001   Min.   :32.56  
##  1st Qu.:0.0000       Class :character   1st Qu.:92101   1st Qu.:33.99  
##  Median :0.0000       Mode  :character   Median :93518   Median :36.21  
##  Mean   :0.4687                          Mean   :93486   Mean   :36.20  
##  3rd Qu.:0.0000                          3rd Qu.:95329   3rd Qu.:38.16  
##  Max.   :9.0000                          Max.   :96150   Max.   :41.96  
##                                                                         
##    Longitude      Number.of.Referrals Tenure.in.Months    Offer          
##  Min.   :-124.3   Min.   : 0.000      Min.   : 1.00    Length:7043       
##  1st Qu.:-121.8   1st Qu.: 0.000      1st Qu.: 9.00    Class :character  
##  Median :-119.6   Median : 0.000      Median :29.00    Mode  :character  
##  Mean   :-119.8   Mean   : 1.952      Mean   :32.39                      
##  3rd Qu.:-118.0   3rd Qu.: 3.000      3rd Qu.:55.00                      
##  Max.   :-114.2   Max.   :11.000      Max.   :72.00                      
##                                                                          
##  Phone.Service      Avg.Monthly.Long.Distance.Charges Multiple.Lines    
##  Length:7043        Min.   : 1.01                     Length:7043       
##  Class :character   1st Qu.:13.05                     Class :character  
##  Mode  :character   Median :25.69                     Mode  :character  
##                     Mean   :25.42                                       
##                     3rd Qu.:37.68                                       
##                     Max.   :49.99                                       
##                     NA's   :682                                         
##  Internet.Service   Internet.Type      Avg.Monthly.GB.Download
##  Length:7043        Length:7043        Min.   : 2.00          
##  Class :character   Class :character   1st Qu.:13.00          
##  Mode  :character   Mode  :character   Median :21.00          
##                                        Mean   :26.19          
##                                        3rd Qu.:30.00          
##                                        Max.   :85.00          
##                                        NA's   :1526           
##  Online.Security    Online.Backup      Device.Protection.Plan
##  Length:7043        Length:7043        Length:7043           
##  Class :character   Class :character   Class :character      
##  Mode  :character   Mode  :character   Mode  :character      
##                                                              
##                                                              
##                                                              
##                                                              
##  Premium.Tech.Support Streaming.TV       Streaming.Movies   Streaming.Music   
##  Length:7043          Length:7043        Length:7043        Length:7043       
##  Class :character     Class :character   Class :character   Class :character  
##  Mode  :character     Mode  :character   Mode  :character   Mode  :character  
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##  Unlimited.Data       Contract         Paperless.Billing  Payment.Method    
##  Length:7043        Length:7043        Length:7043        Length:7043       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Monthly.Charge   Total.Charges    Total.Refunds    Total.Extra.Data.Charges
##  Min.   :-10.00   Min.   :  18.8   Min.   : 0.000   Min.   :  0.000         
##  1st Qu.: 30.40   1st Qu.: 400.1   1st Qu.: 0.000   1st Qu.:  0.000         
##  Median : 70.05   Median :1394.5   Median : 0.000   Median :  0.000         
##  Mean   : 63.60   Mean   :2280.4   Mean   : 1.962   Mean   :  6.861         
##  3rd Qu.: 89.75   3rd Qu.:3786.6   3rd Qu.: 0.000   3rd Qu.:  0.000         
##  Max.   :118.75   Max.   :8684.8   Max.   :49.790   Max.   :150.000         
##                                                                             
##  Total.Long.Distance.Charges Total.Revenue      Customer.Status   
##  Min.   :   0.00             Min.   :   21.36   Length:7043       
##  1st Qu.:  70.55             1st Qu.:  605.61   Class :character  
##  Median : 401.44             Median : 2108.64   Mode  :character  
##  Mean   : 749.10             Mean   : 3034.38                     
##  3rd Qu.:1191.10             3rd Qu.: 4801.15                     
##  Max.   :3564.72             Max.   :11979.34                     
##                                                                   
##  Churn.Category     Churn.Reason         Population    
##  Length:7043        Length:7043        Min.   :    11  
##  Class :character   Class :character   1st Qu.:  2344  
##  Mode  :character   Mode  :character   Median : 17554  
##                                        Mean   : 22140  
##                                        3rd Qu.: 36125  
##                                        Max.   :105285  
## 

Creating a table for average age and average population for each zipcode

library(dplyr)

str(summary_stats)
##  'table' chr [1:7, 1:39] "Length:7043       " "Class :character  " ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:7] "" "" "" "" ...
##   ..$ : chr [1:39] "Customer.ID" "   Gender" "     Age" "  Married" ...
summary_stats <- read.csv("~/Downloads/telecom_customer_churn.csv")

summary_stats$Age <- as.numeric(summary_stats$Age)

summary_stats$Zip.code <- as.numeric(summary_stats$Zip.code)

population <- read.csv("~/Downloads/telecom_zipcode_population.csv")

zipcode_population$Population <- as.numeric(zipcode_population$Population)

telecom_customer_churn$Age <- as.numeric(telecom_customer_churn$Age)

zipcode_population$Zip.Code <- as.numeric(as.character(zipcode_population$Zip.Code))

# converting all the necessary data types to numeric

rm(telecom_zipcode_population) # removing unnecessary data
rm(population)
rm(data_dictionary)
rm(customer_churn)

?dplyr

zipcode_population <- zipcode_population |>
  rename(Zip.code = Zip.Code)

# Renamed the two variables to match

merged_data <- summary_stats |>
  left_join(zipcode_population, by = "Zip.code")

# merged data again

merged_data |>
group_by(Zip.code) |>
  summarise(Avg_Population = mean(Population, na.rm = TRUE),
            Avg_Age = median(Age, na.rm = TRUE)
            )
## # A tibble: 1,626 × 3
##    Zip.code Avg_Population Avg_Age
##       <dbl>          <dbl>   <dbl>
##  1    90001          54492    51  
##  2    90002          44586    51  
##  3    90003          58198    49  
##  4    90004          67852    44  
##  5    90005          43019    42.5
##  6    90006          62784    41  
##  7    90007          45025    42  
##  8    90008          30852    42  
##  9    90010           1957    26  
## 10    90011         101215    30  
## # ℹ 1,616 more rows
# form the table