This is an R Markdown document, detailing how to do Project 1 in MKTG 3P98. We will be looking at data files from a survey about peoples perception on certain popular car brands and models as well as some demographic and ethnographic information associated with the survey takers. By the end we will be able to generate some insights as to the amount of these car brands and how popular they are in each geographic region, and subsequently how popular their car models are in said regions.
get working directory
getwd()
## [1] "/Users/macbookair/Desktop/MKTG R Project 1"
Open and Read the CSV files for the Project
#open and view first file
C1<-read.csv("Car_Survey_1.csv")
str(C1)
## 'data.frame': 1180 obs. of 23 variables:
## $ Resp : chr "Res1" "Res2" "Res3" "Res4" ...
## $ Att_1 : int 6 7 7 4 6 6 1 6 3 6 ...
## $ Att_2 : int 6 5 7 1 6 6 1 5 2 6 ...
## $ Enj_1 : int 6 5 7 1 6 6 1 5 3 4 ...
## $ Enj_2 : int 6 2 5 1 5 5 1 3 2 4 ...
## $ Perform_1 : int 5 2 5 1 5 5 2 5 2 4 ...
## $ Perform_2 : int 6 6 5 1 2 5 2 5 3 4 ...
## $ Perform_3 : int 3 7 3 1 1 7 2 2 1 1 ...
## $ WOM_1 : int 3 5 6 7 7 5 2 4 6 5 ...
## $ WOM_2 : int 3 5 6 7 7 5 3 6 6 6 ...
## $ Futu_Pur_1 : int 3 6 7 3 7 7 5 4 7 6 ...
## $ Futu_Pur_2 : int 3 6 7 3 6 7 2 4 7 6 ...
## $ Valu_Percp_1: int 5 6 5 6 6 7 2 4 6 6 ...
## $ Valu_Percp_2: int 2 7 7 5 5 7 2 4 6 6 ...
## $ Pur_Proces_1: int 6 7 7 5 6 7 2 4 6 6 ...
## $ Pur_Proces_2: int 4 6 7 4 7 7 6 4 6 6 ...
## $ Residence : int 2 2 1 2 1 2 2 1 2 1 ...
## $ Pay_Meth : int 2 2 2 2 2 2 2 2 2 2 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Collision" ...
## $ Gender : chr "Male" "Male" "Male" "Male" ...
## $ Age : int 18 18 19 19 19 19 19 21 21 21 ...
## $ Education : int 2 2 2 2 2 2 2 2 2 2 ...
## $ X : logi NA NA NA NA NA NA ...
head(C1,n=5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1 6 6 6 6 5 6 3 3 3
## 2 Res2 7 5 5 2 2 6 7 5 5
## 3 Res3 7 7 7 5 5 5 3 6 6
## 4 Res4 4 1 1 1 1 1 1 7 7
## 5 Res5 6 6 6 5 5 2 1 7 7
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 3 3 5 2 6 4
## 2 6 6 6 7 7 6
## 3 7 7 5 7 7 7
## 4 3 3 6 5 5 4
## 5 7 6 6 5 6 7
## Residence Pay_Meth Insur_Type Gender Age Education X
## 1 2 2 Collision Male 18 2 NA
## 2 2 2 Collision Male 18 2 NA
## 3 1 2 Collision Male 19 2 NA
## 4 2 2 Collision Male 19 2 NA
## 5 1 2 Collision Female 19 2 NA
View(C1)
#open second file
C2<-read.csv("Car_Survey_2.csv")
str(C2)
## 'data.frame': 1049 obs. of 9 variables:
## $ Respondents: chr "Res1" "Res2" "Res3" "Res4" ...
## $ Region : chr "European" "European" "European" "European" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
## $ MPG : int 15 15 15 15 15 15 15 15 15 15 ...
## $ Cyl : int 8 8 8 8 8 8 8 8 8 8 ...
## $ acc1 : num 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
## $ C_cost. : num 16 16 16 16 16 16 16 16 16 16 ...
## $ H_Cost : num 14 14 14 14 14 14 14 14 14 14 ...
## $ Post.Satis : int 4 3 5 5 5 3 3 6 3 5 ...
head(C2,n=5)
## Respondents Region Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
## 2 Res2 European Ford Expedition 15 8 5.5 16 14 3
## 3 Res3 European Ford Expedition 15 8 5.5 16 14 5
## 4 Res4 European Ford Expedition 15 8 5.5 16 14 5
## 5 Res5 European Ford Expedition 15 8 5.5 16 14 5
View(C2)
clean and merge the data files, creating a new one.
#edit column names
names(C2)[1]<-c("Resp")
head(C2,n=1)
## Resp Region Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
#merge columns
Car_T<-merge(C1,C2, by="Resp")
str(Car_T)
## 'data.frame': 1049 obs. of 31 variables:
## $ Resp : chr "Res1" "Res10" "Res100" "Res1000" ...
## $ Att_1 : int 6 6 6 6 6 3 2 7 2 6 ...
## $ Att_2 : int 6 6 7 6 6 1 2 7 1 6 ...
## $ Enj_1 : int 6 4 7 7 7 4 1 7 2 6 ...
## $ Enj_2 : int 6 4 3 6 6 3 2 6 1 5 ...
## $ Perform_1 : int 5 4 5 6 6 5 2 5 2 5 ...
## $ Perform_2 : int 6 4 6 6 6 6 2 6 2 5 ...
## $ Perform_3 : int 3 1 6 6 6 6 1 5 2 5 ...
## $ WOM_1 : int 3 5 3 6 4 2 6 6 7 3 ...
## $ WOM_2 : int 3 6 5 6 4 6 7 6 7 3 ...
## $ Futu_Pur_1 : int 3 6 6 6 4 6 6 6 7 6 ...
## $ Futu_Pur_2 : int 3 6 6 6 6 6 5 7 7 6 ...
## $ Valu_Percp_1: int 5 6 7 4 5 5 4 6 4 5 ...
## $ Valu_Percp_2: int 2 6 6 6 6 4 4 5 6 6 ...
## $ Pur_Proces_1: int 6 6 5 6 6 5 4 5 6 6 ...
## $ Pur_Proces_2: int 4 6 5 3 7 5 5 5 7 5 ...
## $ Residence : int 2 1 2 2 1 1 1 2 1 2 ...
## $ Pay_Meth : int 2 2 1 3 3 3 3 3 3 3 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Liability" ...
## $ Gender : chr "Male" "Male" "Female" "Female" ...
## $ Age : int 18 21 32 24 24 25 26 26 27 27 ...
## $ Education : int 2 2 1 2 2 2 2 2 2 2 ...
## $ X : logi NA NA NA NA NA NA ...
## $ Region : chr "European" "European" "American" "Asian" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
## $ MPG : int 15 15 24 26 26 26 26 26 26 26 ...
## $ Cyl : int 8 8 4 4 4 4 4 4 4 4 ...
## $ acc1 : num 5.5 5.5 8.2 8 8 8 8 8 8 8 ...
## $ C_cost. : num 16 16 10 7 7 7 7 7 7 7 ...
## $ H_Cost : num 14 14 8 6 6 6 6 6 6 6 ...
## $ Post.Satis : int 4 5 4 6 5 6 5 6 7 6 ...
#create new merged table
write.csv(Car_T,"Car_Total",row.names = FALSE)
View(Car_T)
loading the data files in excel format to check for N/A or non numeric values
library(readxl)
Car_Data_1<-read_excel("Car_Survey_1.xlsx")
Car_Data_2<-read_excel("Car_Survey_2.xlsx")
#checking missing (or NA values)
summary(Car_T)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Mode :character Median :6.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :4
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :2 NA's :4 NA's :1
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## NA's :1 NA's :3 NA's :5 NA's :2 NA's :4
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## NA's :1 NA's :3 NA's :4 NA's :5
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
##
## Education X Region Model
## Min. :1.000 Mode:logical Length:1049 Length:1049
## 1st Qu.:2.000 NA's:1049 Class :character Class :character
## Median :2.000 Mode :character Mode :character
## Mean :1.989
## 3rd Qu.:2.000
## Max. :3.000
##
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
##
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
##
#the output was NA
#compute mean value of Att_1
mean(Car_T$Att_1)
## [1] NA
meanATT1<-mean(Car_T$Att_1,na.rm = TRUE)
create a new “average” value to replace the NA values in the Attribute_1 column
#replace NA values with the column mean of Att_1
meanATT1<-mean(Car_T$Att_1,na.rm = TRUE)
Car_T[is.na(Car_T$Att_1),"Att_1"]<-meanATT1
#Replace NA Values in Car_Total with the mean of their respective columns
Car_T[is.na(Car_T)]<-mean(Car_T, na.rm = TRUE)
## Warning in mean.default(Car_T, na.rm = TRUE): argument is not numeric or
## logical: returning NA
summary(Car_T)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Mode :character Median :5.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :2 NA's :4 NA's :1
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## NA's :1 NA's :3 NA's :5 NA's :2 NA's :4
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## NA's :1 NA's :3 NA's :4 NA's :5
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
##
## Education X Region Model
## Min. :1.000 Min. : NA Length:1049 Length:1049
## 1st Qu.:2.000 1st Qu.: NA Class :character Class :character
## Median :2.000 Median : NA Mode :character Mode :character
## Mean :1.989 Mean :NaN
## 3rd Qu.:2.000 3rd Qu.: NA
## Max. :3.000 Max. : NA
## NA's :1049
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
##
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
##
#install.packages("dplyr")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Car_T<- Car_T %>%
mutate(across(where(is.numeric), ~ ifelse(is.na(.),mean(., na.rm = TRUE),.)))
summary(Car_T)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:5.000
## Mode :character Median :5.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
##
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
##
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
##
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
##
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
##
## Education X Region Model
## Min. :1.000 Min. : NA Length:1049 Length:1049
## 1st Qu.:2.000 1st Qu.: NA Class :character Class :character
## Median :2.000 Median : NA Mode :character Mode :character
## Mean :1.989 Mean :NaN
## 3rd Qu.:2.000 3rd Qu.: NA
## Max. :3.000 Max. : NA
## NA's :1049
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
##
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
##
#create a new column to calculate mean of Att_1 and Att_2
Car_T$Att_Mean = (Car_T$Att_1 + Car_T$Att_2) /2
View(Car_T)
create a bar charts, one showing the distrution of car brands across the region; and another bar chart breaking that chart further down by car model.
#what is the distribution of cars across the regions (frequency count)?
#install.packages("ggplot2")
library(ggplot2)
#Create Graph 1
ggplot(Car_T,aes(x=Region,fill = Region)) +
theme_bw()+
geom_bar()+
geom_text(stat = "count", aes(label=..count..),vjust=0) +
labs(y="Number of Cars",
x = "Region",
title = "Number of cars by Region")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
car_region_percentage <- prop.table(table(Car_T$Region))
print(car_region_percentage)
##
## American Asian European Middle Eastern
## 0.3431840 0.1982841 0.2001907 0.2583413
#Define Model and Region as categorical variables
Car_T$Model<-as.factor(Car_T$Model)
Car_T$Region<-as.factor(Car_T$Region)
#Create Graph 2
ggplot(Car_T,aes(x=Region,fill=Model))+
theme_bw()+
geom_bar()+
labs(y="Number of Cars",
title = "Number of Cars by Model and Region")
library(tidyr)
library(dplyr)
library(ggplot2)
Car_T <- Car_T %>%
separate(Model, into = c("Brand", "Model"),
sep = " ", extra = "merge")
#count the total number of cars by brand
count(Car_T,Car_T$Make, Car_T$Brand, name = "Freq")
## Car_T$Brand Freq
## 1 Buick 31
## 2 Chevrolet 64
## 3 Chrysler 169
## 4 Dodge 41
## 5 Fiat 18
## 6 Ford 202
## 7 Honda 159
## 8 Kia 34
## 9 Lincoln 39
## 10 Toyota 292
#create a crosstabulation table for brand by region
xtabs(~Region + Brand, Car_T)
## Brand
## Region Buick Chevrolet Chrysler Dodge Fiat Ford Honda Kia Lincoln
## American 17 22 54 22 9 81 38 15 0
## Asian 0 0 24 0 0 24 58 0 0
## European 10 21 13 0 9 29 29 19 0
## Middle Eastern 4 21 78 19 0 68 34 0 39
## Brand
## Region Toyota
## American 102
## Asian 102
## European 80
## Middle Eastern 8
#count the total number of cars by Brand across Region
brand_region_counts <- table(Car_T$Brand, Car_T$Region)
print(brand_region_counts)
##
## American Asian European Middle Eastern
## Buick 17 0 10 4
## Chevrolet 22 0 21 21
## Chrysler 54 24 13 78
## Dodge 22 0 0 19
## Fiat 9 0 9 0
## Ford 81 24 29 68
## Honda 38 58 29 34
## Kia 15 0 19 0
## Lincoln 0 0 0 39
## Toyota 102 102 80 8
#attitude toward car by region
brand_region_table <- aggregate(Att_1~Brand+Region, Car_T, mean)
print(brand_region_table)
## Brand Region Att_1
## 1 Buick American 5.529412
## 2 Chevrolet American 5.227273
## 3 Chrysler American 5.011979
## 4 Dodge American 4.818182
## 5 Fiat American 4.666667
## 6 Ford American 4.567901
## 7 Honda American 5.657895
## 8 Kia American 3.266667
## 9 Toyota American 5.254902
## 10 Chrysler Asian 5.291667
## 11 Ford Asian 4.166667
## 12 Honda Asian 5.568966
## 13 Toyota Asian 4.881199
## 14 Buick European 5.000000
## 15 Chevrolet European 5.000000
## 16 Chrysler European 5.230769
## 17 Fiat European 3.777778
## 18 Ford European 5.241379
## 19 Honda European 4.758621
## 20 Kia European 3.789474
## 21 Toyota European 4.800000
## 22 Buick Middle Eastern 6.000000
## 23 Chevrolet Middle Eastern 5.904762
## 24 Chrysler Middle Eastern 4.038462
## 25 Dodge Middle Eastern 4.473684
## 26 Ford Middle Eastern 4.014706
## 27 Honda Middle Eastern 5.500000
## 28 Lincoln Middle Eastern 5.692308
## 29 Toyota Middle Eastern 5.375000
#create a graph (data visualization - attitude toward car brands across regions)
ggplot(brand_region_table, aes(x=Region, y=Att_1, group=Brand))+
geom_line(aes(color=Brand))+
geom_point(aes(color=Brand))+
labs(y="Att_1 Mean",
title = "Attitude Mean by Brand and Region")
We will now be looking at the following data from Honda’s Perspective.
#select a specific brand and explore attitude across regions
#Honda
#Filter by a specific brand (Honda) from the brand_region_table created above.
Honda_Att1_Mean <- brand_region_table %>%
filter (Brand == "Honda")
#view the results
print(Honda_Att1_Mean)
## Brand Region Att_1
## 1 Honda American 5.657895
## 2 Honda Asian 5.568966
## 3 Honda European 4.758621
## 4 Honda Middle Eastern 5.500000
#Create a graph (data visualization of the results above)
ggplot(Honda_Att1_Mean, aes(x=Region, y=Att_1, group = Brand))+
geom_line(aes(color=Brand))+
geom_point(aes(color=Brand))+
scale_y_continuous(limits = c(3,6))+
labs (y="Att_1 Mean",
title = "Attitude Mean for Honda by region")
#filter for competing brands (Honda vs Toyota) and comparing attitudes
Multiple_Att1_Mean <- brand_region_table %>%
filter(Brand == "Honda" | Brand == "Toyota")
#view the results
print(Multiple_Att1_Mean)
## Brand Region Att_1
## 1 Honda American 5.657895
## 2 Toyota American 5.254902
## 3 Honda Asian 5.568966
## 4 Toyota Asian 4.881199
## 5 Honda European 4.758621
## 6 Toyota European 4.800000
## 7 Honda Middle Eastern 5.500000
## 8 Toyota Middle Eastern 5.375000
#create a graph (data visualization - attitude toward Honda vs Toyota across regions)
ggplot(Multiple_Att1_Mean, aes(x=Region, y=Att_1, group=Brand))+
geom_line(aes(color=Brand))+
geom_point(aes(color=Brand))+
scale_y_continuous(limits = c(3,6))+
labs (y="Att_1 Mean",
title = "Attitude Mean for Honda and Toyota by region")
# Demographic Groups by Age Variable (convert age into 3 groups)
Car_T$Age_Grp <- cut(Car_T$Age, breaks=c(0, 30, 50, Inf),
labels=c("Young Adults", "Adults", "Mature Adults"), right=FALSE)
names(Car_T)
## [1] "Resp" "Att_1" "Att_2" "Enj_1" "Enj_2"
## [6] "Perform_1" "Perform_2" "Perform_3" "WOM_1" "WOM_2"
## [11] "Futu_Pur_1" "Futu_Pur_2" "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1"
## [16] "Pur_Proces_2" "Residence" "Pay_Meth" "Insur_Type" "Gender"
## [21] "Age" "Education" "X" "Region" "Brand"
## [26] "Model" "MPG" "Cyl" "acc1" "C_cost."
## [31] "H_Cost" "Post.Satis" "Att_Mean" "Age_Grp"
head(Car_T, n=5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1 6 6 6 6 5 6 3 3 3
## 2 Res10 6 6 4 4 4 4 1 5 6
## 3 Res100 6 7 7 3 5 6 6 3 5
## 4 Res1000 6 6 7 6 6 6 6 6 6
## 5 Res1001 6 6 7 6 6 6 6 4 4
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 3 3 5 2 6 4
## 2 6 6 6 6 6 6
## 3 6 6 7 6 5 5
## 4 6 6 4 6 6 3
## 5 4 6 5 6 6 7
## Residence Pay_Meth Insur_Type Gender Age Education X Region Brand
## 1 2 2 Collision Male 18 2 NaN European Ford
## 2 1 2 Collision Male 21 2 NaN European Ford
## 3 2 1 Collision Female 32 1 NaN American Toyota
## 4 2 3 Liability Female 24 2 NaN Asian Toyota
## 5 1 3 Liability Female 24 2 NaN Asian Toyota
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis Att_Mean Age_Grp
## 1 Expedition 15 8 5.5 16 14 4 6.0 Young Adults
## 2 Expedition 15 8 5.5 16 14 5 6.0 Young Adults
## 3 Rav4 24 4 8.2 10 8 4 6.5 Adults
## 4 Corolla 26 4 8.0 7 6 6 6.0 Young Adults
## 5 Corolla 26 4 8.0 7 6 5 6.0 Young Adults
#age group analysis for Honda across Regions
filtered_data_Honda <- Car_T %>%
filter(Brand == "Honda")
#create a graph showing the number of Honda cars by Age group for each Region
ggplot(filtered_data_Honda, aes(x=Region, fill = Age_Grp))+
theme_bw()+
geom_bar()+
labs (y="Number of Cars",
title = "Number of Honda by age group and region")
ls()
## [1] "brand_region_counts" "brand_region_table" "C1"
## [4] "C2" "Car_Data_1" "Car_Data_2"
## [7] "car_region_percentage" "Car_T" "filtered_data_Honda"
## [10] "Honda_Att1_Mean" "meanATT1" "Multiple_Att1_Mean"