Set Up Working Directory

getwd()
## [1] "/Users/jadensavoy/Desktop/MKTG 3P98/Excel Files"
setwd("/Users/jadensavoy/Desktop/MKTG 3P98/Excel Files")

Load in Packages

library(tinytex)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Load in the Datasets

Load Car1 Data File Using read.csv

Car1<-read.csv("Car_Survey_1a.csv")

Load Car2 Data File Using read.csv

Car2<-read.csv("Car_Survey_2a.csv")

Display Each Dataset

Display The Structure of Car1 (data frame)

str(Car1)
## 'data.frame':    1180 obs. of  23 variables:
##  $ Resp        : chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Att_1       : int  6 7 7 4 6 6 1 6 3 6 ...
##  $ Att_2       : int  6 5 7 1 6 6 1 5 2 6 ...
##  $ Enj_1       : int  6 5 7 1 6 6 1 5 3 4 ...
##  $ Enj_2       : int  6 2 5 1 5 5 1 3 2 4 ...
##  $ Perform_1   : int  5 2 5 1 5 5 2 5 2 4 ...
##  $ Perform_2   : int  6 6 5 1 2 5 2 5 3 4 ...
##  $ Perform_3   : int  3 7 3 1 1 7 2 2 1 1 ...
##  $ WOM_1       : int  3 5 6 7 7 5 2 4 6 5 ...
##  $ WOM_2       : int  3 5 6 7 7 5 3 6 6 6 ...
##  $ Futu_Pur_1  : int  3 6 7 3 7 7 5 4 7 6 ...
##  $ Futu_Pur_2  : int  3 6 7 3 6 7 2 4 7 6 ...
##  $ Valu_Percp_1: int  5 6 5 6 6 7 2 4 6 6 ...
##  $ Valu_Percp_2: int  2 7 7 5 5 7 2 4 6 6 ...
##  $ Pur_Proces_1: int  6 7 7 5 6 7 2 4 6 6 ...
##  $ Pur_Proces_2: int  4 6 7 4 7 7 6 4 6 6 ...
##  $ Residence   : int  2 2 1 2 1 2 2 1 2 1 ...
##  $ Pay_Meth    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Collision" ...
##  $ Gender      : chr  "Male" "Male" "Male" "Male" ...
##  $ Age         : int  18 18 19 19 19 19 19 21 21 21 ...
##  $ Education   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ X           : logi  NA NA NA NA NA NA ...
head(Car1,n=5)
##   Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1     6     6     6     6         5         6         3     3     3
## 2 Res2     7     5     5     2         2         6         7     5     5
## 3 Res3     7     7     7     5         5         5         3     6     6
## 4 Res4     4     1     1     1         1         1         1     7     7
## 5 Res5     6     6     6     5         5         2         1     7     7
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            7            7            6
## 3          7          7            5            7            7            7
## 4          3          3            6            5            5            4
## 5          7          6            6            5            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education  X
## 1         2        2  Collision   Male  18         2 NA
## 2         2        2  Collision   Male  18         2 NA
## 3         1        2  Collision   Male  19         2 NA
## 4         2        2  Collision   Male  19         2 NA
## 5         1        2  Collision Female  19         2 NA

Display The Structure of Car2 (data frame)

str(Car2)
## 'data.frame':    1049 obs. of  9 variables:
##  $ Respondents: chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Region     : chr  "European" "European" "European" "European" ...
##  $ Model      : chr  "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
##  $ MPG        : int  15 15 15 15 15 15 15 15 15 15 ...
##  $ Cyl        : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ acc1       : num  5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
##  $ C_cost.    : num  16 16 16 16 16 16 16 16 16 16 ...
##  $ H_Cost     : num  14 14 14 14 14 14 14 14 14 14 ...
##  $ Post.Satis : int  4 3 5 5 5 3 3 6 3 5 ...
head(Car2,n=5)
##   Respondents   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1        Res1 European Ford Expedition  15   8  5.5      16     14          4
## 2        Res2 European Ford Expedition  15   8  5.5      16     14          3
## 3        Res3 European Ford Expedition  15   8  5.5      16     14          5
## 4        Res4 European Ford Expedition  15   8  5.5      16     14          5
## 5        Res5 European Ford Expedition  15   8  5.5      16     14          5

Create a Master Dataset (combined Car1 and Car2)

Rename Unique ID in Car 2 to match with Car1

names(Car2)[1]<-c("Resp")

Check to See if it was changed

head(Car2,n=1)
##   Resp   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition  15   8  5.5      16     14          4

Merge Car1 and Car2 into one dataset

Car_Total<-merge(Car1,Car2, by="Resp")
str(Car_Total)
## 'data.frame':    1049 obs. of  31 variables:
##  $ Resp        : chr  "Res1" "Res10" "Res100" "Res1000" ...
##  $ Att_1       : int  6 6 6 6 6 3 2 7 2 6 ...
##  $ Att_2       : int  6 6 7 6 6 1 2 7 1 6 ...
##  $ Enj_1       : int  6 4 7 7 7 4 1 7 2 6 ...
##  $ Enj_2       : int  6 4 3 6 6 3 2 6 1 5 ...
##  $ Perform_1   : int  5 4 5 6 6 5 2 5 2 5 ...
##  $ Perform_2   : int  6 4 6 6 6 6 2 6 2 5 ...
##  $ Perform_3   : int  3 1 6 6 6 6 1 5 2 5 ...
##  $ WOM_1       : int  3 5 3 6 4 2 6 6 7 3 ...
##  $ WOM_2       : int  3 6 5 6 4 6 7 6 7 3 ...
##  $ Futu_Pur_1  : int  3 6 6 6 4 6 6 6 7 6 ...
##  $ Futu_Pur_2  : int  3 6 6 6 6 6 5 7 7 6 ...
##  $ Valu_Percp_1: int  5 6 7 4 5 5 4 6 4 5 ...
##  $ Valu_Percp_2: int  2 6 6 6 6 4 4 5 6 6 ...
##  $ Pur_Proces_1: int  6 6 5 6 6 5 4 5 6 6 ...
##  $ Pur_Proces_2: int  4 6 5 3 7 5 5 5 7 5 ...
##  $ Residence   : int  2 1 2 2 1 1 1 2 1 2 ...
##  $ Pay_Meth    : int  2 2 1 3 3 3 3 3 3 3 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Liability" ...
##  $ Gender      : chr  "Male" "Male" "Female" "Female" ...
##  $ Age         : int  18 21 32 24 24 25 26 26 27 27 ...
##  $ Education   : int  2 2 1 2 2 2 2 2 2 2 ...
##  $ X           : logi  NA NA NA NA NA NA ...
##  $ Region      : chr  "European" "European" "American" "Asian" ...
##  $ Model       : chr  "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
##  $ MPG         : int  15 15 24 26 26 26 26 26 26 26 ...
##  $ Cyl         : int  8 8 4 4 4 4 4 4 4 4 ...
##  $ acc1        : num  5.5 5.5 8.2 8 8 8 8 8 8 8 ...
##  $ C_cost.     : num  16 16 10 7 7 7 7 7 7 7 ...
##  $ H_Cost      : num  14 14 8 6 6 6 6 6 6 6 ...
##  $ Post.Satis  : int  4 5 4 6 5 6 5 6 7 6 ...
head(Car_Total)
##      Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1    Res1     6     6     6     6         5         6         3     3     3
## 2   Res10     6     6     4     4         4         4         1     5     6
## 3  Res100     6     7     7     3         5         6         6     3     5
## 4 Res1000     6     6     7     6         6         6         6     6     6
## 5 Res1001     6     6     7     6         6         6         6     4     4
## 6 Res1002     3     1     4     3         5         6         6     2     6
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            6            6            6
## 3          6          6            7            6            5            5
## 4          6          6            4            6            6            3
## 5          4          6            5            6            6            7
## 6          6          6            5            4            5            5
##   Residence Pay_Meth Insur_Type Gender Age Education  X   Region
## 1         2        2  Collision   Male  18         2 NA European
## 2         1        2  Collision   Male  21         2 NA European
## 3         2        1  Collision Female  32         1 NA American
## 4         2        3  Liability Female  24         2 NA    Asian
## 5         1        3  Liability Female  24         2 NA    Asian
## 6         1        3  Liability Female  25         2 NA    Asian
##             Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Ford Expedition  15   8  5.5      16     14          4
## 2 Ford Expedition  15   8  5.5      16     14          5
## 3     Toyota Rav4  24   4  8.2      10      8          4
## 4  Toyota Corolla  26   4  8.0       7      6          6
## 5  Toyota Corolla  26   4  8.0       7      6          5
## 6  Toyota Corolla  26   4  8.0       7      6          6

Save the merged data (Car_Total) to a file

write.csv(Car_Total, "Car_Total", row.names = FALSE)

Using readxl package

Load the package

library(readxl)

Replace Missing Values

Check the Master Dataset

summary(Car_Total)
##      Resp               Att_1           Att_2           Enj_1      
##  Length:1049        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.000  
##  Mode  :character   Median :6.000   Median :6.000   Median :6.000  
##                     Mean   :4.882   Mean   :5.287   Mean   :5.378  
##                     3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000  
##                     Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                     NA's   :4                       NA's   :4      
##      Enj_2         Perform_1       Perform_2       Perform_3    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :4.575   Mean   :4.947   Mean   :4.831   Mean   :4.217  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##  NA's   :4       NA's   :2       NA's   :4       NA's   :1      
##      WOM_1           WOM_2        Futu_Pur_1      Futu_Pur_2     Valu_Percp_1  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:4.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :6.000   Median :6.00   Median :6.000   Median :6.000   Median :6.000  
##  Mean   :5.286   Mean   :5.35   Mean   :5.321   Mean   :5.371   Mean   :5.411  
##  3rd Qu.:7.000   3rd Qu.:6.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.00   Max.   :9.000   Max.   :7.000   Max.   :7.000  
##  NA's   :1       NA's   :3      NA's   :5       NA's   :2       NA's   :4      
##   Valu_Percp_2    Pur_Proces_1    Pur_Proces_2     Residence    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :5.000   Median :6.000   Median :5.000   Median :1.000  
##  Mean   :5.114   Mean   :5.256   Mean   :4.923   Mean   :1.474  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :5.000  
##  NA's   :1       NA's   :3       NA's   :4       NA's   :5      
##     Pay_Meth      Insur_Type           Gender               Age       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :18.00  
##  1st Qu.:1.000   Class :character   Class :character   1st Qu.:23.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :34.00  
##  Mean   :2.153                                         Mean   :35.22  
##  3rd Qu.:3.000                                         3rd Qu.:48.00  
##  Max.   :3.000                                         Max.   :60.00  
##                                                                       
##    Education        X              Region             Model          
##  Min.   :1.000   Mode:logical   Length:1049        Length:1049       
##  1st Qu.:2.000   NA's:1049      Class :character   Class :character  
##  Median :2.000                  Mode  :character   Mode  :character  
##  Mean   :1.989                                                       
##  3rd Qu.:2.000                                                       
##  Max.   :3.000                                                       
##                                                                      
##       MPG             Cyl           acc1          C_cost.          H_Cost      
##  Min.   :14.00   Min.   :4.0   Min.   :3.600   Min.   : 7.00   Min.   : 6.000  
##  1st Qu.:17.00   1st Qu.:4.0   1st Qu.:5.100   1st Qu.:10.00   1st Qu.: 8.000  
##  Median :19.00   Median :6.0   Median :6.500   Median :12.00   Median :10.000  
##  Mean   :19.58   Mean   :5.8   Mean   :6.202   Mean   :11.35   Mean   : 9.634  
##  3rd Qu.:22.00   3rd Qu.:6.0   3rd Qu.:7.500   3rd Qu.:13.00   3rd Qu.:11.000  
##  Max.   :26.00   Max.   :8.0   Max.   :8.500   Max.   :16.00   Max.   :14.000  
##                                                                                
##    Post.Satis  
##  Min.   :2.00  
##  1st Qu.:5.00  
##  Median :6.00  
##  Mean   :5.28  
##  3rd Qu.:6.00  
##  Max.   :7.00  
## 

Replace Missing Values in Att_1

Calculate the Mean of Att_1

mean(Car_Total$Att_1)
## [1] NA

Find Rows with NA in Att_1

na_rows<-Car_Total[is.na(Car_Total$Att_1),]
                   print(na_rows)
##       Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 109 Res151    NA     2    NA     2         3        NA         2     2    NA
## 110 Res152    NA     5     5     4         5         6         4     5     6
## 112 Res154    NA     6     6     6         5         4         5     6     6
## 127 Res168    NA     3     3     3         6         5         3     5     6
##     Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 109          2          2           NA            2            2            3
## 110          6          6            6            2            4            5
## 112          5          6            5            6            5            5
## 127          6          7            5            3            5            4
##     Residence Pay_Meth    Insur_Type Gender Age Education  X   Region
## 109         1        2 Comprehensive Female  21         2 NA American
## 110         2        1 Comprehensive Female  21         2 NA American
## 112         2        2 Comprehensive Female  23         2 NA American
## 127         1        2     Collision   Male  29         2 NA    Asian
##                 Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 109     Chrysler Jeep  18   6  3.6      12   10.0          4
## 110     Chrysler Jeep  18   6  3.6      12   10.0          6
## 112     Chrysler Jeep  18   6  3.6      12   10.0          6
## 127 Toyota Highlander  20   6  7.2      10    8.5          6

Find mean of Att_1 without NA values

meanAtt_1<-mean(Car_Total$Att_1,na.rm = TRUE)
print(meanAtt_1)
## [1] 4.882297

Replace NA values with calculated mean of Att_1

Car_Total[is.na(Car_Total$Att_1),"Att_1"] <-meanAtt_1

Check to see if NA rows are replaced with calculated mean

Car_Total[c(rownames(na_rows)),]
##       Resp    Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 109 Res151 4.882297     2    NA     2         3        NA         2     2    NA
## 110 Res152 4.882297     5     5     4         5         6         4     5     6
## 112 Res154 4.882297     6     6     6         5         4         5     6     6
## 127 Res168 4.882297     3     3     3         6         5         3     5     6
##     Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 109          2          2           NA            2            2            3
## 110          6          6            6            2            4            5
## 112          5          6            5            6            5            5
## 127          6          7            5            3            5            4
##     Residence Pay_Meth    Insur_Type Gender Age Education  X   Region
## 109         1        2 Comprehensive Female  21         2 NA American
## 110         2        1 Comprehensive Female  21         2 NA American
## 112         2        2 Comprehensive Female  23         2 NA American
## 127         1        2     Collision   Male  29         2 NA    Asian
##                 Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 109     Chrysler Jeep  18   6  3.6      12   10.0          4
## 110     Chrysler Jeep  18   6  3.6      12   10.0          6
## 112     Chrysler Jeep  18   6  3.6      12   10.0          6
## 127 Toyota Highlander  20   6  7.2      10    8.5          6

Replace Missing Values in Futu_Pur_1

Calculate the Mean of Futu_Pur_1

mean(Car_Total$Futu_Pur_1)
## [1] NA

Find Rows with NA in Futu_Pur_1

na_rows<-Car_Total[is.na(Car_Total$Futu_Pur_1),]
                   print(na_rows)
##       Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 62  Res109     6     6     6     5         5         6        NA    NA     4
## 120 Res161     6     6     6     3         5         1         4     5     5
## 181 Res216     5     6     6     7         7         4         4     5     5
## 873  Res84     6     6     6     6        NA        NA         5     5    NA
## 917  Res88     7     7    NA    NA         7         7         5     7     6
##     Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 62          NA         NA           NA           NA           NA           NA
## 120         NA          5            5            6            5            5
## 181         NA          6            5            5            5            5
## 873         NA          6            6            5            5            6
## 917         NA          5            5            4            5            5
##     Residence Pay_Meth    Insur_Type Gender Age Education  X         Region
## 62          1        3     Liability   Male  36         2 NA       American
## 120         1        3 Comprehensive   Male  24         3 NA Middle Eastern
## 181         2        1     Liability   Male  19         3 NA       American
## 873         2        1     Collision   Male  23         2 NA       American
## 917         1        2     Collision Female  24         1 NA       American
##             Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 62    Toyota Rav4  24   4  8.2      10      8          3
## 120 Chrysler Jeep  18   6  3.6      12     10          4
## 181   Toyota Rav4  24   4  8.2      10      8          6
## 873     Honda CRV  26   4  8.5       8      7          4
## 917     Honda CRV  26   4  8.5       8      7          4

Find mean of Futu_Pur_1 without NA values

meanFut_Pur_1<-mean(Car_Total$Futu_Pur_1,na.rm = TRUE)
print(meanFut_Pur_1)
## [1] 5.320881

Replace NA values with calculated mean of Futu_Pur_1

Car_Total[is.na(Car_Total$Futu_Pur_1),"Futu_Pur_1"] <-meanFut_Pur_1

Check to see if NA rows are replaced with calculated mean

Car_Total[c(rownames(na_rows)),]
##       Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 62  Res109     6     6     6     5         5         6        NA    NA     4
## 120 Res161     6     6     6     3         5         1         4     5     5
## 181 Res216     5     6     6     7         7         4         4     5     5
## 873  Res84     6     6     6     6        NA        NA         5     5    NA
## 917  Res88     7     7    NA    NA         7         7         5     7     6
##     Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 62    5.320881         NA           NA           NA           NA           NA
## 120   5.320881          5            5            6            5            5
## 181   5.320881          6            5            5            5            5
## 873   5.320881          6            6            5            5            6
## 917   5.320881          5            5            4            5            5
##     Residence Pay_Meth    Insur_Type Gender Age Education  X         Region
## 62          1        3     Liability   Male  36         2 NA       American
## 120         1        3 Comprehensive   Male  24         3 NA Middle Eastern
## 181         2        1     Liability   Male  19         3 NA       American
## 873         2        1     Collision   Male  23         2 NA       American
## 917         1        2     Collision Female  24         1 NA       American
##             Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 62    Toyota Rav4  24   4  8.2      10      8          3
## 120 Chrysler Jeep  18   6  3.6      12     10          4
## 181   Toyota Rav4  24   4  8.2      10      8          6
## 873     Honda CRV  26   4  8.5       8      7          4
## 917     Honda CRV  26   4  8.5       8      7          4

Replace Missing Values in Futu_Pur_2

Calculate the Mean of Futu_Pur_2

mean(Car_Total$Futu_Pur_2)
## [1] NA

Find Rows with NA in Futu_Pur_2

na_rows<-Car_Total[is.na(Car_Total$Futu_Pur_2),]
                   print(na_rows)
##       Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 62  Res109     6     6     6     5         5         6        NA    NA     4
## 507  Res51     4     4     4     1         2         3         3     3     3
##     Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 62    5.320881         NA           NA           NA           NA           NA
## 507   2.000000         NA            2            2            2            6
##     Residence Pay_Meth    Insur_Type Gender Age Education  X   Region
## 62          1        3     Liability   Male  36         2 NA American
## 507         2        1 Comprehensive   Male  48         2 NA American
##               Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 62      Toyota Rav4  24   4  8.2      10      8          3
## 507 Ford Expedition  15   8  5.5      16     14          4

Find mean of Futu_Pur_2 without NA values

meanFut_Pur_2<-mean(Car_Total$Futu_Pur_2,na.rm = TRUE)
print(meanFut_Pur_2)
## [1] 5.370583

Replace NA values with calculated mean of Futu_Pur_2

Car_Total[is.na(Car_Total$Futu_Pur_2),"Futu_Pur_2"] <-meanFut_Pur_2

Check to see if NA rows are replaced with calculated mean

Car_Total[c(rownames(na_rows)),]
##       Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 62  Res109     6     6     6     5         5         6        NA    NA     4
## 507  Res51     4     4     4     1         2         3         3     3     3
##     Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 62    5.320881   5.370583           NA           NA           NA           NA
## 507   2.000000   5.370583            2            2            2            6
##     Residence Pay_Meth    Insur_Type Gender Age Education  X   Region
## 62          1        3     Liability   Male  36         2 NA American
## 507         2        1 Comprehensive   Male  48         2 NA American
##               Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 62      Toyota Rav4  24   4  8.2      10      8          3
## 507 Ford Expedition  15   8  5.5      16     14          4

Create a new column (variable) to calculate mean of Att_1 and Att_2

Car_Total$Att_Mean=(Car_Total$Att_1+
                      Car_Total$Att_2)/2
View(Car_Total[c("Att_1","Att_2", "Att_Mean")]) 

Check for NA values in Att_Mean

na_rows<-Car_Total[is.na(Car_Total$Att_Mean),]
print(na_rows)
##  [1] Resp         Att_1        Att_2        Enj_1        Enj_2       
##  [6] Perform_1    Perform_2    Perform_3    WOM_1        WOM_2       
## [11] Futu_Pur_1   Futu_Pur_2   Valu_Percp_1 Valu_Percp_2 Pur_Proces_1
## [16] Pur_Proces_2 Residence    Pay_Meth     Insur_Type   Gender      
## [21] Age          Education    X            Region       Model       
## [26] MPG          Cyl          acc1         C_cost.      H_Cost      
## [31] Post.Satis   Att_Mean    
## <0 rows> (or 0-length row.names)

Create a new column (variable) to calculate mean of Futu_Pur_1 and Fut_Pur_2

Car_Total$Futu_Pur_Mean=(Car_Total$Futu_Pur_1+
                      Car_Total$Futu_Pur_2)/2
View(Car_Total[c("Futu_Pur_1","Futu_Pur_2", "Futu_Pur_Mean")]) 

Check for NA values in Futu_Pur_Mean

na_rows<-Car_Total[is.na(Car_Total$Futu_Pur_Mean),]
print(na_rows)
##  [1] Resp          Att_1         Att_2         Enj_1         Enj_2        
##  [6] Perform_1     Perform_2     Perform_3     WOM_1         WOM_2        
## [11] Futu_Pur_1    Futu_Pur_2    Valu_Percp_1  Valu_Percp_2  Pur_Proces_1 
## [16] Pur_Proces_2  Residence     Pay_Meth      Insur_Type    Gender       
## [21] Age           Education     X             Region        Model        
## [26] MPG           Cyl           acc1          C_cost.       H_Cost       
## [31] Post.Satis    Att_Mean      Futu_Pur_Mean
## <0 rows> (or 0-length row.names)

Load in Librarys

Load in ggplot

library(ggplot2) 

Load in Stringr

library(stringr) 

Seperate Make and Model into two Columns

Car_Total[c("Make","Model_v1")] <-str_split_fixed(Car_Total$Model," ", 2)
View(Car_Total)

Number of Cars per Make by Region

Graph

ggplot(Car_Total,aes(x=Region,fill = Make))+
  theme_bw()+
  geom_bar()+
  labs(y="Number of Cars",
       title = "Number of Cars per Make by Region")

Create a Contingengy Table For Region by Model

xtabs(~Region + Model, Car_Total)
##                 Model
## Region           Buick Encore Chevrolet Camaro Chrysler Jeep Dodge Journey
##   American                 17               22            54            22
##   Asian                     0                0            24             0
##   European                 10               21            13             0
##   Middle Eastern            4               21            78            19
##                 Model
## Region           Fiat 500x Ford Expedition Ford Explorer Honda CRV Honda Pilot
##   American               9              51            30        25          13
##   Asian                  0              13            11        29          29
##   European               9              23             6        20           9
##   Middle Eastern         0              16            52         0          34
##                 Model
## Region           Kia Sorento Lincoln Navigator  Toyota Corolla
##   American                15                  0             33
##   Asian                    0                  0             51
##   European                19                  0              8
##   Middle Eastern           0                 39              0
##                 Model
## Region           Toyota Highlander Toyota Rav4
##   American                      30          39
##   Asian                         51           0
##   European                      48          24
##   Middle Eastern                 8           0

Number of Cars per Make by Age Group

Convert Age into 3 Catagories

Car_Total$AgeGrp<-cut(Car_Total$Age,
                      breaks = c(0, 30, 50, Inf),
                      labels = c("Young Adults", "Adults", "Mature Adults"),
                      right = FALSE)
names(Car_Total)
##  [1] "Resp"          "Att_1"         "Att_2"         "Enj_1"        
##  [5] "Enj_2"         "Perform_1"     "Perform_2"     "Perform_3"    
##  [9] "WOM_1"         "WOM_2"         "Futu_Pur_1"    "Futu_Pur_2"   
## [13] "Valu_Percp_1"  "Valu_Percp_2"  "Pur_Proces_1"  "Pur_Proces_2" 
## [17] "Residence"     "Pay_Meth"      "Insur_Type"    "Gender"       
## [21] "Age"           "Education"     "X"             "Region"       
## [25] "Model"         "MPG"           "Cyl"           "acc1"         
## [29] "C_cost."       "H_Cost"        "Post.Satis"    "Att_Mean"     
## [33] "Futu_Pur_Mean" "Make"          "Model_v1"      "AgeGrp"
head(Car_Total$ageGrp)
## NULL

Graph

ggplot(Car_Total,aes(x=AgeGrp,fill = Make))+
  theme_bw()+
  geom_bar()+
  labs(y="Number of Cars",
       title = "Number of Cars per Make by Age")

Number of Cars per Make by Gender

Graph

ggplot(Car_Total,aes(x=Gender,fill = Make))+
  theme_bw()+
  geom_bar()+
  labs(y="Number of Cars",
       title = "Number of Cars per Make by Gender")

Number of Cars per Make by Education

Convert Eduction into 3 Catagories

Car_Total <- Car_Total %>%
  mutate(education_level = recode(Education, 
                                  `1` = "Highschool", 
                                  `2` = "Undergraduate", 
                                  `3` = "Masters Degree"))

Graph

ggplot(Car_Total,aes(x=education_level,fill = Make))+
  theme_bw()+
  geom_bar()+
  labs(y="Number of Cars",
       title = "Number of Cars per Make by Education")

Group by Parent Company

Install Packages

library(dplyr)

Group

Car_Total <- Car_Total %>% 
mutate(Parent = case_when(Make == "Buick" ~ "General Motors",
                            Make == "Chevrolet" ~ "General Motors",
                            Make == "Chrysler" ~ "Chrysler",
                            Make == "Dodge" ~ "Chrysler",
                            Make == "Fiat" ~ "Chrysler",
                            Make == "Ford" ~ "Ford",
                            Make == "Honda" ~ "Honda",
                            Make == "Kia" ~ "Kia",
                            Make == "Lincoln" ~ "Ford",
                            Make == "Toyota" ~ "Toyota",
                            TRUE ~ "Check"))

Check if Grouping is Correct

count(Car_Total, Car_Total$Make, Car_Total$Parent, name = "Freq")
##    Car_Total$Make Car_Total$Parent Freq
## 1           Buick   General Motors   31
## 2       Chevrolet   General Motors   64
## 3        Chrysler         Chrysler  169
## 4           Dodge         Chrysler   41
## 5            Fiat         Chrysler   18
## 6            Ford             Ford  202
## 7           Honda            Honda  159
## 8             Kia              Kia   34
## 9         Lincoln             Ford   39
## 10         Toyota           Toyota  292
table(Car_Total$Make)
## 
##     Buick Chevrolet  Chrysler     Dodge      Fiat      Ford     Honda       Kia 
##        31        64       169        41        18       202       159        34 
##   Lincoln    Toyota 
##        39       292

Graph

ggplot(Car_Total,aes(x=Region,fill = Parent))+
  theme_bw()+
  facet_wrap(~Model)+
  geom_bar()+
  labs(y="Number of Cars",
       title = "Number of Cars by Model and Region")

What is the Attitude Mean by Make and Region?

Create Contingency Table

brand_region_table_Att1 <- aggregate(Att_1~Parent+Region, Car_Total, mean)
print(brand_region_table_Att1)
##            Parent         Region    Att_1
## 1        Chrysler       American 4.925258
## 2            Ford       American 4.567901
## 3  General Motors       American 5.358974
## 4           Honda       American 5.657895
## 5             Kia       American 3.266667
## 6          Toyota       American 5.254902
## 7        Chrysler          Asian 5.291667
## 8            Ford          Asian 4.166667
## 9           Honda          Asian 5.568966
## 10         Toyota          Asian 4.881199
## 11       Chrysler       European 4.636364
## 12           Ford       European 5.241379
## 13 General Motors       European 5.000000
## 14          Honda       European 4.758621
## 15            Kia       European 3.789474
## 16         Toyota       European 4.800000
## 17       Chrysler Middle Eastern 4.123711
## 18           Ford Middle Eastern 4.626168
## 19 General Motors Middle Eastern 5.920000
## 20          Honda Middle Eastern 5.500000
## 21         Toyota Middle Eastern 5.375000

Graph

ggplot(brand_region_table_Att1, aes(x=Region, y=Att_1, group = Parent)) +
  geom_line(aes(colour = Parent))+
  geom_point(aes(colour = Parent))+
  labs(y="Att_1 Mean",
       title = "Attitude Mean by Parent and Region")

What is the Attitude Mean for Toyota by Region?

Create Contingency Table

Toyota_Att1_Mean <- brand_region_table_Att1%>%
  filter(Parent == "Toyota")
head(Toyota_Att1_Mean,n=10)
##   Parent         Region    Att_1
## 1 Toyota       American 5.254902
## 2 Toyota          Asian 4.881199
## 3 Toyota       European 4.800000
## 4 Toyota Middle Eastern 5.375000
table(Toyota_Att1_Mean$Parent)
## 
## Toyota 
##      4

Graph

ggplot(Toyota_Att1_Mean, aes(x=Region, y=Att_1, group = Parent)) +
  geom_line(aes(colour = Parent))+
  geom_point(aes(colour = Parent))+
  scale_y_continuous(limits = c(3, 6)) 

labs(y="Att_1 Mean",
     title = "Attitude Mean for Toyota by Region")
## $y
## [1] "Att_1 Mean"
## 
## $title
## [1] "Attitude Mean for Toyota by Region"
## 
## attr(,"class")
## [1] "labels"

Filter for 1+ brands (Toyota, Ford)

Multiple_Att1_Mean <- brand_region_table_Att1 %>%
  filter(Parent == "Toyota" | Parent == "Ford")
head(Multiple_Att1_Mean,n=10)
##   Parent         Region    Att_1
## 1   Ford       American 4.567901
## 2 Toyota       American 5.254902
## 3   Ford          Asian 4.166667
## 4 Toyota          Asian 4.881199
## 5   Ford       European 5.241379
## 6 Toyota       European 4.800000
## 7   Ford Middle Eastern 4.626168
## 8 Toyota Middle Eastern 5.375000
table(Multiple_Att1_Mean$Parent)
## 
##   Ford Toyota 
##      4      4

Graph

ggplot(Multiple_Att1_Mean, aes(x=Region, y=Att_1, group = Parent)) +
  geom_line(aes(colour = Parent)) +
  geom_point(aes(colour = Parent)) +
  scale_y_continuous(limits = c(3, 6)) 

labs(y="Att_1 Mean",
     title = "Attitude Mean for Toyota and Ford by Region")
## $y
## [1] "Att_1 Mean"
## 
## $title
## [1] "Attitude Mean for Toyota and Ford by Region"
## 
## attr(,"class")
## [1] "labels"

What is the Future Purchase Mean by Make and Region?

Create Contingency Table

brand_region_table_Pur <- aggregate(Futu_Pur_1~Parent+Region, Car_Total, mean)
print(brand_region_table_Pur)
##            Parent         Region Futu_Pur_1
## 1        Chrysler       American   4.929412
## 2            Ford       American   5.370370
## 3  General Motors       American   5.282051
## 4           Honda       American   5.227415
## 5             Kia       American   6.000000
## 6          Toyota       American   5.270998
## 7        Chrysler          Asian   5.125000
## 8            Ford          Asian   5.166667
## 9           Honda          Asian   5.120690
## 10         Toyota          Asian   5.382353
## 11       Chrysler       European   5.545455
## 12           Ford       European   5.620690
## 13 General Motors       European   4.548387
## 14          Honda       European   5.586207
## 15            Kia       European   5.421053
## 16         Toyota       European   5.262500
## 17       Chrysler Middle Eastern   5.384751
## 18           Ford Middle Eastern   5.644860
## 19 General Motors Middle Eastern   5.480000
## 20          Honda Middle Eastern   5.441176
## 21         Toyota Middle Eastern   5.250000

Graph

ggplot(brand_region_table_Pur, aes(x=Region, y=Futu_Pur_1, group = Parent)) +
  geom_line(aes(colour = Parent))+
  geom_point(aes(colour = Parent))+
  labs(y="Futu_Pur_1 Mean",
       title = "Future Purchase Mean by Parent and Region")

What is the Future Purchase Mean for Toyota by Region?

Toyota_Pur1_Mean <- brand_region_table_Pur %>%
  filter(Parent == "Toyota")
head(Toyota_Pur1_Mean,n=10)
##   Parent         Region Futu_Pur_1
## 1 Toyota       American   5.270998
## 2 Toyota          Asian   5.382353
## 3 Toyota       European   5.262500
## 4 Toyota Middle Eastern   5.250000
table(Toyota_Pur1_Mean$Parent)
## 
## Toyota 
##      4

Filter for 1+ brands (Toyota, Ford)

Multiple_Pur1_Mean <- brand_region_table_Pur %>%
  filter(Parent == "Toyota" | Parent == "Ford")
head(Multiple_Pur1_Mean,n=10)
##   Parent         Region Futu_Pur_1
## 1   Ford       American   5.370370
## 2 Toyota       American   5.270998
## 3   Ford          Asian   5.166667
## 4 Toyota          Asian   5.382353
## 5   Ford       European   5.620690
## 6 Toyota       European   5.262500
## 7   Ford Middle Eastern   5.644860
## 8 Toyota Middle Eastern   5.250000
table(Multiple_Pur1_Mean$Parent)
## 
##   Ford Toyota 
##      4      4

Graph

ggplot(Multiple_Pur1_Mean, aes(x=Region, y=Futu_Pur_1, group = Parent)) +
  geom_line(aes(colour = Parent)) +
  geom_point(aes(colour = Parent)) +
  scale_y_continuous(limits = c(3, 6)) 

labs(y="Futu_Pur_1 Mean",
     title = "Future Purchase Mean for Toyota and Ford by Region")
## $y
## [1] "Futu_Pur_1 Mean"
## 
## $title
## [1] "Future Purchase Mean for Toyota and Ford by Region"
## 
## attr(,"class")
## [1] "labels"

Filter Data into Toyota only

filtered_data_Toyota <- Car_Total %>%
  filter(Parent == "Toyota")

Graph Age Group by Region- Toyota Only

ggplot(filtered_data_Toyota,aes(x=Region,fill = AgeGrp))+
  theme_bw()+
  geom_bar()+
  labs(y="Number of Cars",
       title = "Number of Toyota Cars by Age Group and Region")

Number of Toyota Cars Purchased by Each Age Group

car_count_by_age <- filtered_data_Toyota %>%
  group_by(AgeGrp) %>%
  summarize(car_count = n())
print(car_count_by_age)
## # A tibble: 3 × 2
##   AgeGrp        car_count
##   <fct>             <int>
## 1 Young Adults        104
## 2 Adults              149
## 3 Mature Adults        39

Number of Toyota Cars Purchased by Each Region

car_count_by_region <- filtered_data_Toyota %>%
  group_by(Region) %>%
  summarize(car_count = n())
print(car_count_by_region)
## # A tibble: 4 × 2
##   Region         car_count
##   <chr>              <int>
## 1 American             102
## 2 Asian                102
## 3 European              80
## 4 Middle Eastern         8

Number of Toyota Cars Purchased by Gender

car_count_by_gender <- filtered_data_Toyota %>%
  group_by(Gender) %>%
  summarize(car_count = n())
print(car_count_by_gender)
## # A tibble: 3 × 2
##   Gender   car_count
##   <chr>        <int>
## 1 ""               3
## 2 "Female"       213
## 3 "Male"          76

Graph Gender by Region- Toyota Only

ggplot(filtered_data_Toyota,aes(x=Region,fill = Gender))+
  theme_bw()+
  geom_bar()+
  labs(y="Number of Cars",
       title = "Number of Toyota Cars by Gender and Region")

Number of Toyota Cars Purchased by Education

car_count_by_education <- filtered_data_Toyota %>%
  group_by(education_level) %>%
  summarize(car_count = n())
print(car_count_by_education)
## # A tibble: 3 × 2
##   education_level car_count
##   <chr>               <int>
## 1 Highschool             65
## 2 Masters Degree         70
## 3 Undergraduate         157

Graph Education Level by Region- Toyota Only

ggplot(filtered_data_Toyota,aes(x=Region,fill = education_level))+
  theme_bw()+
  geom_bar()+
  labs(y="Number of Cars",
       title = "Number of Toyota Cars Purchased by Education Level and Region")