R Markdown

This is an R Markdown document, detailing how to do Project 1 in MKTG 3P98. We will be looking at data files from a survey about peoples perception on certain popular car brands and models as well as some demographic and ethnographic information associated with the survey takers. By the end we will be able to generate some insights as to the amount of these car brands and how popular they are in each geographic region, and subsequently how popular their car models are in said regions.

get working directory

getwd()
## [1] "/Users/macbookair/Desktop/MKTG R Project 1"

Open and Read the CSV files for the Project

#open and view first file
C1<-read.csv("Car_Survey_1.csv")
str(C1)
## 'data.frame':    1180 obs. of  23 variables:
##  $ Resp        : chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Att_1       : int  6 7 7 4 6 6 1 6 3 6 ...
##  $ Att_2       : int  6 5 7 1 6 6 1 5 2 6 ...
##  $ Enj_1       : int  6 5 7 1 6 6 1 5 3 4 ...
##  $ Enj_2       : int  6 2 5 1 5 5 1 3 2 4 ...
##  $ Perform_1   : int  5 2 5 1 5 5 2 5 2 4 ...
##  $ Perform_2   : int  6 6 5 1 2 5 2 5 3 4 ...
##  $ Perform_3   : int  3 7 3 1 1 7 2 2 1 1 ...
##  $ WOM_1       : int  3 5 6 7 7 5 2 4 6 5 ...
##  $ WOM_2       : int  3 5 6 7 7 5 3 6 6 6 ...
##  $ Futu_Pur_1  : int  3 6 7 3 7 7 5 4 7 6 ...
##  $ Futu_Pur_2  : int  3 6 7 3 6 7 2 4 7 6 ...
##  $ Valu_Percp_1: int  5 6 5 6 6 7 2 4 6 6 ...
##  $ Valu_Percp_2: int  2 7 7 5 5 7 2 4 6 6 ...
##  $ Pur_Proces_1: int  6 7 7 5 6 7 2 4 6 6 ...
##  $ Pur_Proces_2: int  4 6 7 4 7 7 6 4 6 6 ...
##  $ Residence   : int  2 2 1 2 1 2 2 1 2 1 ...
##  $ Pay_Meth    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Collision" ...
##  $ Gender      : chr  "Male" "Male" "Male" "Male" ...
##  $ Age         : int  18 18 19 19 19 19 19 21 21 21 ...
##  $ Education   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ X           : logi  NA NA NA NA NA NA ...
head(C1,n=5)
##   Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1     6     6     6     6         5         6         3     3     3
## 2 Res2     7     5     5     2         2         6         7     5     5
## 3 Res3     7     7     7     5         5         5         3     6     6
## 4 Res4     4     1     1     1         1         1         1     7     7
## 5 Res5     6     6     6     5         5         2         1     7     7
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            7            7            6
## 3          7          7            5            7            7            7
## 4          3          3            6            5            5            4
## 5          7          6            6            5            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education  X
## 1         2        2  Collision   Male  18         2 NA
## 2         2        2  Collision   Male  18         2 NA
## 3         1        2  Collision   Male  19         2 NA
## 4         2        2  Collision   Male  19         2 NA
## 5         1        2  Collision Female  19         2 NA
View(C1)
#open second file
C2<-read.csv("Car_Survey_2.csv")
str(C2)
## 'data.frame':    1049 obs. of  9 variables:
##  $ Respondents: chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Region     : chr  "European" "European" "European" "European" ...
##  $ Model      : chr  "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
##  $ MPG        : int  15 15 15 15 15 15 15 15 15 15 ...
##  $ Cyl        : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ acc1       : num  5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
##  $ C_cost.    : num  16 16 16 16 16 16 16 16 16 16 ...
##  $ H_Cost     : num  14 14 14 14 14 14 14 14 14 14 ...
##  $ Post.Satis : int  4 3 5 5 5 3 3 6 3 5 ...
head(C2,n=5)
##   Respondents   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1        Res1 European Ford Expedition  15   8  5.5      16     14          4
## 2        Res2 European Ford Expedition  15   8  5.5      16     14          3
## 3        Res3 European Ford Expedition  15   8  5.5      16     14          5
## 4        Res4 European Ford Expedition  15   8  5.5      16     14          5
## 5        Res5 European Ford Expedition  15   8  5.5      16     14          5
View(C2)

clean and merge the data files, creating a new one.

#edit column names
names(C2)[1]<-c("Resp")
head(C2,n=1)
##   Resp   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition  15   8  5.5      16     14          4
#merge columns
Car_T<-merge(C1,C2, by="Resp")
str(Car_T)
## 'data.frame':    1049 obs. of  31 variables:
##  $ Resp        : chr  "Res1" "Res10" "Res100" "Res1000" ...
##  $ Att_1       : int  6 6 6 6 6 3 2 7 2 6 ...
##  $ Att_2       : int  6 6 7 6 6 1 2 7 1 6 ...
##  $ Enj_1       : int  6 4 7 7 7 4 1 7 2 6 ...
##  $ Enj_2       : int  6 4 3 6 6 3 2 6 1 5 ...
##  $ Perform_1   : int  5 4 5 6 6 5 2 5 2 5 ...
##  $ Perform_2   : int  6 4 6 6 6 6 2 6 2 5 ...
##  $ Perform_3   : int  3 1 6 6 6 6 1 5 2 5 ...
##  $ WOM_1       : int  3 5 3 6 4 2 6 6 7 3 ...
##  $ WOM_2       : int  3 6 5 6 4 6 7 6 7 3 ...
##  $ Futu_Pur_1  : int  3 6 6 6 4 6 6 6 7 6 ...
##  $ Futu_Pur_2  : int  3 6 6 6 6 6 5 7 7 6 ...
##  $ Valu_Percp_1: int  5 6 7 4 5 5 4 6 4 5 ...
##  $ Valu_Percp_2: int  2 6 6 6 6 4 4 5 6 6 ...
##  $ Pur_Proces_1: int  6 6 5 6 6 5 4 5 6 6 ...
##  $ Pur_Proces_2: int  4 6 5 3 7 5 5 5 7 5 ...
##  $ Residence   : int  2 1 2 2 1 1 1 2 1 2 ...
##  $ Pay_Meth    : int  2 2 1 3 3 3 3 3 3 3 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Liability" ...
##  $ Gender      : chr  "Male" "Male" "Female" "Female" ...
##  $ Age         : int  18 21 32 24 24 25 26 26 27 27 ...
##  $ Education   : int  2 2 1 2 2 2 2 2 2 2 ...
##  $ X           : logi  NA NA NA NA NA NA ...
##  $ Region      : chr  "European" "European" "American" "Asian" ...
##  $ Model       : chr  "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
##  $ MPG         : int  15 15 24 26 26 26 26 26 26 26 ...
##  $ Cyl         : int  8 8 4 4 4 4 4 4 4 4 ...
##  $ acc1        : num  5.5 5.5 8.2 8 8 8 8 8 8 8 ...
##  $ C_cost.     : num  16 16 10 7 7 7 7 7 7 7 ...
##  $ H_Cost      : num  14 14 8 6 6 6 6 6 6 6 ...
##  $ Post.Satis  : int  4 5 4 6 5 6 5 6 7 6 ...
#create new merged table
write.csv(Car_T,"Car_Total",row.names = FALSE)
View(Car_T)

loading the data files in excel format to check for N/A or non numeric values

library(readxl)

Car_Data_1<-read_excel("Car_Survey_1.xlsx")
Car_Data_2<-read_excel("Car_Survey_2.xlsx")

#checking missing (or NA values)

summary(Car_T)
##      Resp               Att_1           Att_2           Enj_1      
##  Length:1049        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.000  
##  Mode  :character   Median :6.000   Median :6.000   Median :6.000  
##                     Mean   :4.882   Mean   :5.287   Mean   :5.378  
##                     3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000  
##                     Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                     NA's   :4                       NA's   :4      
##      Enj_2         Perform_1       Perform_2       Perform_3    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :4.575   Mean   :4.947   Mean   :4.831   Mean   :4.217  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##  NA's   :4       NA's   :2       NA's   :4       NA's   :1      
##      WOM_1           WOM_2        Futu_Pur_1      Futu_Pur_2     Valu_Percp_1  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:4.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :6.000   Median :6.00   Median :6.000   Median :6.000   Median :6.000  
##  Mean   :5.286   Mean   :5.35   Mean   :5.321   Mean   :5.371   Mean   :5.411  
##  3rd Qu.:7.000   3rd Qu.:6.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.00   Max.   :9.000   Max.   :7.000   Max.   :7.000  
##  NA's   :1       NA's   :3      NA's   :5       NA's   :2       NA's   :4      
##   Valu_Percp_2    Pur_Proces_1    Pur_Proces_2     Residence    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :5.000   Median :6.000   Median :5.000   Median :1.000  
##  Mean   :5.114   Mean   :5.256   Mean   :4.923   Mean   :1.474  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :5.000  
##  NA's   :1       NA's   :3       NA's   :4       NA's   :5      
##     Pay_Meth      Insur_Type           Gender               Age       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :18.00  
##  1st Qu.:1.000   Class :character   Class :character   1st Qu.:23.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :34.00  
##  Mean   :2.153                                         Mean   :35.22  
##  3rd Qu.:3.000                                         3rd Qu.:48.00  
##  Max.   :3.000                                         Max.   :60.00  
##                                                                       
##    Education        X              Region             Model          
##  Min.   :1.000   Mode:logical   Length:1049        Length:1049       
##  1st Qu.:2.000   NA's:1049      Class :character   Class :character  
##  Median :2.000                  Mode  :character   Mode  :character  
##  Mean   :1.989                                                       
##  3rd Qu.:2.000                                                       
##  Max.   :3.000                                                       
##                                                                      
##       MPG             Cyl           acc1          C_cost.          H_Cost      
##  Min.   :14.00   Min.   :4.0   Min.   :3.600   Min.   : 7.00   Min.   : 6.000  
##  1st Qu.:17.00   1st Qu.:4.0   1st Qu.:5.100   1st Qu.:10.00   1st Qu.: 8.000  
##  Median :19.00   Median :6.0   Median :6.500   Median :12.00   Median :10.000  
##  Mean   :19.58   Mean   :5.8   Mean   :6.202   Mean   :11.35   Mean   : 9.634  
##  3rd Qu.:22.00   3rd Qu.:6.0   3rd Qu.:7.500   3rd Qu.:13.00   3rd Qu.:11.000  
##  Max.   :26.00   Max.   :8.0   Max.   :8.500   Max.   :16.00   Max.   :14.000  
##                                                                                
##    Post.Satis  
##  Min.   :2.00  
##  1st Qu.:5.00  
##  Median :6.00  
##  Mean   :5.28  
##  3rd Qu.:6.00  
##  Max.   :7.00  
## 
#the output was NA
#compute mean value of Att_1
mean(Car_T$Att_1)
## [1] NA
meanATT1<-mean(Car_T$Att_1,na.rm = TRUE)

create a new “average” value to replace the NA values in the Attribute_1 column

#replace NA values with the column mean of Att_1
meanATT1<-mean(Car_T$Att_1,na.rm = TRUE)
Car_T[is.na(Car_T$Att_1),"Att_1"]<-meanATT1

#Replace NA Values in Car_Total with the mean of their respective columns
Car_T[is.na(Car_T)]<-mean(Car_T, na.rm = TRUE)
## Warning in mean.default(Car_T, na.rm = TRUE): argument is not numeric or
## logical: returning NA
summary(Car_T)
##      Resp               Att_1           Att_2           Enj_1      
##  Length:1049        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.000  
##  Mode  :character   Median :5.000   Median :6.000   Median :6.000  
##                     Mean   :4.882   Mean   :5.287   Mean   :5.378  
##                     3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000  
##                     Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                     NA's   :4      
##      Enj_2         Perform_1       Perform_2       Perform_3    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :4.575   Mean   :4.947   Mean   :4.831   Mean   :4.217  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##  NA's   :4       NA's   :2       NA's   :4       NA's   :1      
##      WOM_1           WOM_2        Futu_Pur_1      Futu_Pur_2     Valu_Percp_1  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:4.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :6.000   Median :6.00   Median :6.000   Median :6.000   Median :6.000  
##  Mean   :5.286   Mean   :5.35   Mean   :5.321   Mean   :5.371   Mean   :5.411  
##  3rd Qu.:7.000   3rd Qu.:6.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.00   Max.   :9.000   Max.   :7.000   Max.   :7.000  
##  NA's   :1       NA's   :3      NA's   :5       NA's   :2       NA's   :4      
##   Valu_Percp_2    Pur_Proces_1    Pur_Proces_2     Residence    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :5.000   Median :6.000   Median :5.000   Median :1.000  
##  Mean   :5.114   Mean   :5.256   Mean   :4.923   Mean   :1.474  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :5.000  
##  NA's   :1       NA's   :3       NA's   :4       NA's   :5      
##     Pay_Meth      Insur_Type           Gender               Age       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :18.00  
##  1st Qu.:1.000   Class :character   Class :character   1st Qu.:23.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :34.00  
##  Mean   :2.153                                         Mean   :35.22  
##  3rd Qu.:3.000                                         3rd Qu.:48.00  
##  Max.   :3.000                                         Max.   :60.00  
##                                                                       
##    Education           X           Region             Model          
##  Min.   :1.000   Min.   : NA    Length:1049        Length:1049       
##  1st Qu.:2.000   1st Qu.: NA    Class :character   Class :character  
##  Median :2.000   Median : NA    Mode  :character   Mode  :character  
##  Mean   :1.989   Mean   :NaN                                         
##  3rd Qu.:2.000   3rd Qu.: NA                                         
##  Max.   :3.000   Max.   : NA                                         
##                  NA's   :1049                                        
##       MPG             Cyl           acc1          C_cost.          H_Cost      
##  Min.   :14.00   Min.   :4.0   Min.   :3.600   Min.   : 7.00   Min.   : 6.000  
##  1st Qu.:17.00   1st Qu.:4.0   1st Qu.:5.100   1st Qu.:10.00   1st Qu.: 8.000  
##  Median :19.00   Median :6.0   Median :6.500   Median :12.00   Median :10.000  
##  Mean   :19.58   Mean   :5.8   Mean   :6.202   Mean   :11.35   Mean   : 9.634  
##  3rd Qu.:22.00   3rd Qu.:6.0   3rd Qu.:7.500   3rd Qu.:13.00   3rd Qu.:11.000  
##  Max.   :26.00   Max.   :8.0   Max.   :8.500   Max.   :16.00   Max.   :14.000  
##                                                                                
##    Post.Satis  
##  Min.   :2.00  
##  1st Qu.:5.00  
##  Median :6.00  
##  Mean   :5.28  
##  3rd Qu.:6.00  
##  Max.   :7.00  
## 
#install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Car_T<- Car_T %>%
 mutate(across(where(is.numeric), ~ ifelse(is.na(.),mean(., na.rm = TRUE),.)))
summary(Car_T)
##      Resp               Att_1           Att_2           Enj_1      
##  Length:1049        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:5.000  
##  Mode  :character   Median :5.000   Median :6.000   Median :6.000  
##                     Mean   :4.882   Mean   :5.287   Mean   :5.378  
##                     3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000  
##                     Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                    
##      Enj_2         Perform_1       Perform_2       Perform_3    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :4.575   Mean   :4.947   Mean   :4.831   Mean   :4.217  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                 
##      WOM_1           WOM_2        Futu_Pur_1      Futu_Pur_2     Valu_Percp_1  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:5.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :6.000   Median :6.00   Median :6.000   Median :6.000   Median :6.000  
##  Mean   :5.286   Mean   :5.35   Mean   :5.321   Mean   :5.371   Mean   :5.411  
##  3rd Qu.:7.000   3rd Qu.:6.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.00   Max.   :9.000   Max.   :7.000   Max.   :7.000  
##                                                                                
##   Valu_Percp_2    Pur_Proces_1    Pur_Proces_2     Residence    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :5.000   Median :6.000   Median :5.000   Median :1.000  
##  Mean   :5.114   Mean   :5.256   Mean   :4.923   Mean   :1.474  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :5.000  
##                                                                 
##     Pay_Meth      Insur_Type           Gender               Age       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :18.00  
##  1st Qu.:1.000   Class :character   Class :character   1st Qu.:23.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :34.00  
##  Mean   :2.153                                         Mean   :35.22  
##  3rd Qu.:3.000                                         3rd Qu.:48.00  
##  Max.   :3.000                                         Max.   :60.00  
##                                                                       
##    Education           X           Region             Model          
##  Min.   :1.000   Min.   : NA    Length:1049        Length:1049       
##  1st Qu.:2.000   1st Qu.: NA    Class :character   Class :character  
##  Median :2.000   Median : NA    Mode  :character   Mode  :character  
##  Mean   :1.989   Mean   :NaN                                         
##  3rd Qu.:2.000   3rd Qu.: NA                                         
##  Max.   :3.000   Max.   : NA                                         
##                  NA's   :1049                                        
##       MPG             Cyl           acc1          C_cost.          H_Cost      
##  Min.   :14.00   Min.   :4.0   Min.   :3.600   Min.   : 7.00   Min.   : 6.000  
##  1st Qu.:17.00   1st Qu.:4.0   1st Qu.:5.100   1st Qu.:10.00   1st Qu.: 8.000  
##  Median :19.00   Median :6.0   Median :6.500   Median :12.00   Median :10.000  
##  Mean   :19.58   Mean   :5.8   Mean   :6.202   Mean   :11.35   Mean   : 9.634  
##  3rd Qu.:22.00   3rd Qu.:6.0   3rd Qu.:7.500   3rd Qu.:13.00   3rd Qu.:11.000  
##  Max.   :26.00   Max.   :8.0   Max.   :8.500   Max.   :16.00   Max.   :14.000  
##                                                                                
##    Post.Satis  
##  Min.   :2.00  
##  1st Qu.:5.00  
##  Median :6.00  
##  Mean   :5.28  
##  3rd Qu.:6.00  
##  Max.   :7.00  
## 
#create a new column to calculate mean of Att_1 and Att_2
Car_T$Att_Mean = (Car_T$Att_1 + Car_T$Att_2) /2
View(Car_T)

create a bar charts, one showing the distrution of car brands across the region; and another bar chart breaking that chart further down by car model.

#what is the distribution of cars across the regions (frequency count)?
#install.packages("ggplot2")
library(ggplot2)

#Create Graph 1
ggplot(Car_T,aes(x=Region,fill = Region)) +
  theme_bw()+
  geom_bar()+
  geom_text(stat = "count", aes(label=..count..),vjust=0) +
  labs(y="Number of Cars",
       x = "Region",
       title = "Number of cars by Region")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

car_region_percentage <- prop.table(table(Car_T$Region))
print(car_region_percentage)
## 
##       American          Asian       European Middle Eastern 
##      0.3431840      0.1982841      0.2001907      0.2583413
#Define Model and Region as categorical variables
Car_T$Model<-as.factor(Car_T$Model)
Car_T$Region<-as.factor(Car_T$Region)

#Create Graph 2
ggplot(Car_T,aes(x=Region,fill=Model))+
  theme_bw()+
  geom_bar()+
  labs(y="Number of Cars",
       title = "Number of Cars by Model and Region")

library(tidyr)
library(dplyr)
library(ggplot2)

Car_T <- Car_T %>% 
  separate(Model, into = c("Brand", "Model"),
           sep = " ", extra = "merge")

#count the total number of cars by brand
count(Car_T,Car_T$Make, Car_T$Brand, name = "Freq")
##    Car_T$Brand Freq
## 1        Buick   31
## 2    Chevrolet   64
## 3     Chrysler  169
## 4        Dodge   41
## 5         Fiat   18
## 6         Ford  202
## 7        Honda  159
## 8          Kia   34
## 9      Lincoln   39
## 10      Toyota  292
#create a crosstabulation table for brand by region
xtabs(~Region + Brand, Car_T)
##                 Brand
## Region           Buick Chevrolet Chrysler Dodge Fiat Ford Honda Kia Lincoln
##   American          17        22       54    22    9   81    38  15       0
##   Asian              0         0       24     0    0   24    58   0       0
##   European          10        21       13     0    9   29    29  19       0
##   Middle Eastern     4        21       78    19    0   68    34   0      39
##                 Brand
## Region           Toyota
##   American          102
##   Asian             102
##   European           80
##   Middle Eastern      8
#count the total number of cars by Brand across Region
brand_region_counts <- table(Car_T$Brand, Car_T$Region)
print(brand_region_counts)
##            
##             American Asian European Middle Eastern
##   Buick           17     0       10              4
##   Chevrolet       22     0       21             21
##   Chrysler        54    24       13             78
##   Dodge           22     0        0             19
##   Fiat             9     0        9              0
##   Ford            81    24       29             68
##   Honda           38    58       29             34
##   Kia             15     0       19              0
##   Lincoln          0     0        0             39
##   Toyota         102   102       80              8
#attitude toward car by region 
brand_region_table <- aggregate(Att_1~Brand+Region, Car_T, mean)
print(brand_region_table)
##        Brand         Region    Att_1
## 1      Buick       American 5.529412
## 2  Chevrolet       American 5.227273
## 3   Chrysler       American 5.011979
## 4      Dodge       American 4.818182
## 5       Fiat       American 4.666667
## 6       Ford       American 4.567901
## 7      Honda       American 5.657895
## 8        Kia       American 3.266667
## 9     Toyota       American 5.254902
## 10  Chrysler          Asian 5.291667
## 11      Ford          Asian 4.166667
## 12     Honda          Asian 5.568966
## 13    Toyota          Asian 4.881199
## 14     Buick       European 5.000000
## 15 Chevrolet       European 5.000000
## 16  Chrysler       European 5.230769
## 17      Fiat       European 3.777778
## 18      Ford       European 5.241379
## 19     Honda       European 4.758621
## 20       Kia       European 3.789474
## 21    Toyota       European 4.800000
## 22     Buick Middle Eastern 6.000000
## 23 Chevrolet Middle Eastern 5.904762
## 24  Chrysler Middle Eastern 4.038462
## 25     Dodge Middle Eastern 4.473684
## 26      Ford Middle Eastern 4.014706
## 27     Honda Middle Eastern 5.500000
## 28   Lincoln Middle Eastern 5.692308
## 29    Toyota Middle Eastern 5.375000
#create a graph (data visualization - attitude toward car brands across regions)
ggplot(brand_region_table, aes(x=Region, y=Att_1, group=Brand))+
  geom_line(aes(color=Brand))+
  geom_point(aes(color=Brand))+
  labs(y="Att_1 Mean",
       title = "Attitude Mean by Brand and Region")

We will now be looking at the following data from Honda’s Perspective.

#select a specific brand and explore attitude across regions
#Honda
#Filter by a specific brand (Honda) from the brand_region_table created above.
Honda_Att1_Mean <- brand_region_table %>%
  filter (Brand == "Honda")

#view the results
print(Honda_Att1_Mean)
##   Brand         Region    Att_1
## 1 Honda       American 5.657895
## 2 Honda          Asian 5.568966
## 3 Honda       European 4.758621
## 4 Honda Middle Eastern 5.500000
#Create a graph (data visualization of the results above)
ggplot(Honda_Att1_Mean, aes(x=Region, y=Att_1, group = Brand))+
  geom_line(aes(color=Brand))+
  geom_point(aes(color=Brand))+
  scale_y_continuous(limits = c(3,6))+
  labs (y="Att_1 Mean",
        title = "Attitude Mean for Honda by region")

#filter for  competing brands (Honda vs Toyota) and comparing attitudes
Multiple_Att1_Mean <- brand_region_table %>%
  filter(Brand == "Honda" | Brand == "Toyota")

#view the results
print(Multiple_Att1_Mean)
##    Brand         Region    Att_1
## 1  Honda       American 5.657895
## 2 Toyota       American 5.254902
## 3  Honda          Asian 5.568966
## 4 Toyota          Asian 4.881199
## 5  Honda       European 4.758621
## 6 Toyota       European 4.800000
## 7  Honda Middle Eastern 5.500000
## 8 Toyota Middle Eastern 5.375000
#create a graph (data visualization - attitude toward Honda vs Toyota across regions)
ggplot(Multiple_Att1_Mean, aes(x=Region, y=Att_1, group=Brand))+
  geom_line(aes(color=Brand))+
  geom_point(aes(color=Brand))+
  scale_y_continuous(limits = c(3,6))+
  labs (y="Att_1 Mean",
        title = "Attitude Mean for Honda and Toyota by region")

# Demographic Groups by Age Variable (convert age into 3 groups)


Car_T$Age_Grp <- cut(Car_T$Age, breaks=c(0, 30, 50, Inf),
                       labels=c("Young Adults", "Adults", "Mature Adults"), right=FALSE)

names(Car_T)
##  [1] "Resp"         "Att_1"        "Att_2"        "Enj_1"        "Enj_2"       
##  [6] "Perform_1"    "Perform_2"    "Perform_3"    "WOM_1"        "WOM_2"       
## [11] "Futu_Pur_1"   "Futu_Pur_2"   "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1"
## [16] "Pur_Proces_2" "Residence"    "Pay_Meth"     "Insur_Type"   "Gender"      
## [21] "Age"          "Education"    "X"            "Region"       "Brand"       
## [26] "Model"        "MPG"          "Cyl"          "acc1"         "C_cost."     
## [31] "H_Cost"       "Post.Satis"   "Att_Mean"     "Age_Grp"
head(Car_T, n=5)
##      Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1    Res1     6     6     6     6         5         6         3     3     3
## 2   Res10     6     6     4     4         4         4         1     5     6
## 3  Res100     6     7     7     3         5         6         6     3     5
## 4 Res1000     6     6     7     6         6         6         6     6     6
## 5 Res1001     6     6     7     6         6         6         6     4     4
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            6            6            6
## 3          6          6            7            6            5            5
## 4          6          6            4            6            6            3
## 5          4          6            5            6            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education   X   Region  Brand
## 1         2        2  Collision   Male  18         2 NaN European   Ford
## 2         1        2  Collision   Male  21         2 NaN European   Ford
## 3         2        1  Collision Female  32         1 NaN American Toyota
## 4         2        3  Liability Female  24         2 NaN    Asian Toyota
## 5         1        3  Liability Female  24         2 NaN    Asian Toyota
##        Model MPG Cyl acc1 C_cost. H_Cost Post.Satis Att_Mean      Age_Grp
## 1 Expedition  15   8  5.5      16     14          4      6.0 Young Adults
## 2 Expedition  15   8  5.5      16     14          5      6.0 Young Adults
## 3       Rav4  24   4  8.2      10      8          4      6.5       Adults
## 4    Corolla  26   4  8.0       7      6          6      6.0 Young Adults
## 5    Corolla  26   4  8.0       7      6          5      6.0 Young Adults
#age group analysis for Honda across Regions

filtered_data_Honda <- Car_T %>%
  filter(Brand == "Honda")

#create a graph showing the number of Honda cars by Age group for each Region
ggplot(filtered_data_Honda, aes(x=Region, fill = Age_Grp))+
  theme_bw()+
  geom_bar()+
  labs (y="Number of Cars",
        title = "Number of Honda by age group and region")

ls()
##  [1] "brand_region_counts"   "brand_region_table"    "C1"                   
##  [4] "C2"                    "Car_Data_1"            "Car_Data_2"           
##  [7] "car_region_percentage" "Car_T"                 "filtered_data_Honda"  
## [10] "Honda_Att1_Mean"       "meanATT1"              "Multiple_Att1_Mean"