Get working directory
getwd()
## [1] "C:/Users/kylev/OneDrive/Documents"
setwd("C:/Users/kylev/OneDrive/Documents")
Open and rename, as well as merging and viewing the files.
Car1 <- read.csv("Car_Survey_1a.csv")
Car2 <- read.csv("Car_Survey_2a.csv")
str(Car1)
## 'data.frame': 1180 obs. of 23 variables:
## $ Resp : chr "Res1" "Res2" "Res3" "Res4" ...
## $ Att_1 : int 6 7 7 4 6 6 1 6 3 6 ...
## $ Att_2 : int 6 5 7 1 6 6 1 5 2 6 ...
## $ Enj_1 : int 6 5 7 1 6 6 1 5 3 4 ...
## $ Enj_2 : int 6 2 5 1 5 5 1 3 2 4 ...
## $ Perform_1 : int 5 2 5 1 5 5 2 5 2 4 ...
## $ Perform_2 : int 6 6 5 1 2 5 2 5 3 4 ...
## $ Perform_3 : int 3 7 3 1 1 7 2 2 1 1 ...
## $ WOM_1 : int 3 5 6 7 7 5 2 4 6 5 ...
## $ WOM_2 : int 3 5 6 7 7 5 3 6 6 6 ...
## $ Futu_Pur_1 : int 3 6 7 3 7 7 5 4 7 6 ...
## $ Futu_Pur_2 : int 3 6 7 3 6 7 2 4 7 6 ...
## $ Valu_Percp_1: int 5 6 5 6 6 7 2 4 6 6 ...
## $ Valu_Percp_2: int 2 7 7 5 5 7 2 4 6 6 ...
## $ Pur_Proces_1: int 6 7 7 5 6 7 2 4 6 6 ...
## $ Pur_Proces_2: int 4 6 7 4 7 7 6 4 6 6 ...
## $ Residence : int 2 2 1 2 1 2 2 1 2 1 ...
## $ Pay_Meth : int 2 2 2 2 2 2 2 2 2 2 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Collision" ...
## $ Gender : chr "Male" "Male" "Male" "Male" ...
## $ Age : int 18 18 19 19 19 19 19 21 21 21 ...
## $ Education : int 2 2 2 2 2 2 2 2 2 2 ...
## $ X : logi NA NA NA NA NA NA ...
head(Car1, n = 5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1 6 6 6 6 5 6 3 3 3
## 2 Res2 7 5 5 2 2 6 7 5 5
## 3 Res3 7 7 7 5 5 5 3 6 6
## 4 Res4 4 1 1 1 1 1 1 7 7
## 5 Res5 6 6 6 5 5 2 1 7 7
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 3 3 5 2 6 4
## 2 6 6 6 7 7 6
## 3 7 7 5 7 7 7
## 4 3 3 6 5 5 4
## 5 7 6 6 5 6 7
## Residence Pay_Meth Insur_Type Gender Age Education X
## 1 2 2 Collision Male 18 2 NA
## 2 2 2 Collision Male 18 2 NA
## 3 1 2 Collision Male 19 2 NA
## 4 2 2 Collision Male 19 2 NA
## 5 1 2 Collision Female 19 2 NA
View(Car1)
str(Car2)
## 'data.frame': 1049 obs. of 9 variables:
## $ Respondents: chr "Res1" "Res2" "Res3" "Res4" ...
## $ Region : chr "European" "European" "European" "European" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
## $ MPG : int 15 15 15 15 15 15 15 15 15 15 ...
## $ Cyl : int 8 8 8 8 8 8 8 8 8 8 ...
## $ acc1 : num 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
## $ C_cost. : num 16 16 16 16 16 16 16 16 16 16 ...
## $ H_Cost : num 14 14 14 14 14 14 14 14 14 14 ...
## $ Post.Satis : int 4 3 5 5 5 3 3 6 3 5 ...
head(Car2, n = 5)
## Respondents Region Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
## 2 Res2 European Ford Expedition 15 8 5.5 16 14 3
## 3 Res3 European Ford Expedition 15 8 5.5 16 14 5
## 4 Res4 European Ford Expedition 15 8 5.5 16 14 5
## 5 Res5 European Ford Expedition 15 8 5.5 16 14 5
View(Car2)
#changing the name
names(Car2)[1]<-c("Resp")
head(Car2, n=5)
## Resp Region Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
## 2 Res2 European Ford Expedition 15 8 5.5 16 14 3
## 3 Res3 European Ford Expedition 15 8 5.5 16 14 5
## 4 Res4 European Ford Expedition 15 8 5.5 16 14 5
## 5 Res5 European Ford Expedition 15 8 5.5 16 14 5
#merge the data files
Car_Total<-merge(Car1, Car2, by="Resp")
str(Car_Total)
## 'data.frame': 1049 obs. of 31 variables:
## $ Resp : chr "Res1" "Res10" "Res100" "Res1000" ...
## $ Att_1 : int 6 6 6 6 6 3 2 7 2 6 ...
## $ Att_2 : int 6 6 7 6 6 1 2 7 1 6 ...
## $ Enj_1 : int 6 4 7 7 7 4 1 7 2 6 ...
## $ Enj_2 : int 6 4 3 6 6 3 2 6 1 5 ...
## $ Perform_1 : int 5 4 5 6 6 5 2 5 2 5 ...
## $ Perform_2 : int 6 4 6 6 6 6 2 6 2 5 ...
## $ Perform_3 : int 3 1 6 6 6 6 1 5 2 5 ...
## $ WOM_1 : int 3 5 3 6 4 2 6 6 7 3 ...
## $ WOM_2 : int 3 6 5 6 4 6 7 6 7 3 ...
## $ Futu_Pur_1 : int 3 6 6 6 4 6 6 6 7 6 ...
## $ Futu_Pur_2 : int 3 6 6 6 6 6 5 7 7 6 ...
## $ Valu_Percp_1: int 5 6 7 4 5 5 4 6 4 5 ...
## $ Valu_Percp_2: int 2 6 6 6 6 4 4 5 6 6 ...
## $ Pur_Proces_1: int 6 6 5 6 6 5 4 5 6 6 ...
## $ Pur_Proces_2: int 4 6 5 3 7 5 5 5 7 5 ...
## $ Residence : int 2 1 2 2 1 1 1 2 1 2 ...
## $ Pay_Meth : int 2 2 1 3 3 3 3 3 3 3 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Liability" ...
## $ Gender : chr "Male" "Male" "Female" "Female" ...
## $ Age : int 18 21 32 24 24 25 26 26 27 27 ...
## $ Education : int 2 2 1 2 2 2 2 2 2 2 ...
## $ X : logi NA NA NA NA NA NA ...
## $ Region : chr "European" "European" "American" "Asian" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
## $ MPG : int 15 15 24 26 26 26 26 26 26 26 ...
## $ Cyl : int 8 8 4 4 4 4 4 4 4 4 ...
## $ acc1 : num 5.5 5.5 8.2 8 8 8 8 8 8 8 ...
## $ C_cost. : num 16 16 10 7 7 7 7 7 7 7 ...
## $ H_Cost : num 14 14 8 6 6 6 6 6 6 6 ...
## $ Post.Satis : int 4 5 4 6 5 6 5 6 7 6 ...
# avoid row numbers
write.csv(Car_Total, "Car_Total", row.names = FALSE)
View(Car_Total)
Open packages i installed
library(readr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Check Missing values, and values for Att_1
#check missing values
summary(Car_Total)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Mode :character Median :6.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :4
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :2 NA's :4 NA's :1
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## NA's :1 NA's :3 NA's :5 NA's :2 NA's :4
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## NA's :1 NA's :3 NA's :4 NA's :5
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
##
## Education X Region Model
## Min. :1.000 Mode:logical Length:1049 Length:1049
## 1st Qu.:2.000 NA's:1049 Class :character Class :character
## Median :2.000 Mode :character Mode :character
## Mean :1.989
## 3rd Qu.:2.000
## Max. :3.000
##
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
##
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
##
#compute the mean value of Att_1
mean(Car_Total$Att_1)
## [1] NA
# repalce missing value with coulmn mean of att_1
meanAAT1<-mean(Car_Total$Att_1, na.rm = TRUE)
Car_Total[is.na(Car_Total$Att_1),"Att_1"]<-meanAAT1
Calculte the mean values for all other variables
mean(Car_Total$Att_1)
## [1] 4.882297
mean(Car_Total$Att_2)
## [1] 5.28694
mean(Car_Total$Enj_1)
## [1] NA
mean(Car_Total$Enj_2)
## [1] NA
mean(Car_Total$Perform_1)
## [1] NA
mean(Car_Total$Perform_2)
## [1] NA
mean(Car_Total$Perform_3)
## [1] NA
mean(Car_Total$WOM_1)
## [1] NA
mean(Car_Total$WOM_2)
## [1] NA
mean(Car_Total$Futu_Pur_1)
## [1] NA
mean(Car_Total$Futu_Pur_2)
## [1] NA
mean(Car_Total$Valu_Percp_1)
## [1] NA
mean(Car_Total$Valu_Percp_2)
## [1] NA
mean(Car_Total$Pur_Proces_1)
## [1] NA
mean(Car_Total$Pur_Proces_2)
## [1] NA
mean(Car_Total$Residence)
## [1] NA
mean(Car_Total$Pay_Meth)
## [1] 2.15348
mean(Car_Total$Insur_Type)
## Warning in mean.default(Car_Total$Insur_Type): argument is not numeric or
## logical: returning NA
## [1] NA
mean(Car_Total$Gender)
## Warning in mean.default(Car_Total$Gender): argument is not numeric or logical:
## returning NA
## [1] NA
mean(Car_Total$Age)
## [1] 35.22021
mean(Car_Total$Education)
## [1] 1.988561
mean(Car_Total$X)
## [1] NA
mean(Car_Total$Region)
## Warning in mean.default(Car_Total$Region): argument is not numeric or logical:
## returning NA
## [1] NA
mean(Car_Total$Model)
## Warning in mean.default(Car_Total$Model): argument is not numeric or logical:
## returning NA
## [1] NA
mean(Car_Total$MPG)
## [1] 19.58055
mean(Car_Total$Cyl)
## [1] 5.799809
mean(Car_Total$acc1)
## [1] 6.201668
mean(Car_Total$H_Cost)
## [1] 9.634223
mean(Car_Total$C_cost.)
## [1] 11.35319
mean(Car_Total$Post.Satis)
## [1] 5.280267
Return and replace all missing NA values for each variable to the mean of each column
Car_Total <- Car_Total %>%
mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))
summary(Car_Total)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:5.000
## Mode :character Median :5.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
##
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
##
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
##
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
##
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
##
## Education X Region Model
## Min. :1.000 Min. : NA Length:1049 Length:1049
## 1st Qu.:2.000 1st Qu.: NA Class :character Class :character
## Median :2.000 Median : NA Mode :character Mode :character
## Mean :1.989 Mean :NaN
## 3rd Qu.:2.000 3rd Qu.: NA
## Max. :3.000 Max. : NA
## NA's :1049
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
##
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
##
Find total mean average of ATT1 and ATT2.
Car_Total$Att_Mean = (Car_Total$Att_1 +
Car_Total$Att_2) / 2
View(Car_Total)
Find total mean average for more variables
Car_Total$Enj_Mean = (Car_Total$Enj_1 +
Car_Total$Enj_2) / 2
Car_Total$Perform_Mean = (Car_Total$Perform_1 +
Car_Total$Perform_2 +
Car_Total$Perform_3) / 3
Car_Total$Valu_Mean = (Car_Total$Valu_Percp_1 +
Car_Total$Valu_Percp_2) / 2
Car_Total$WOM_Mean = (Car_Total$WOM_1 +
Car_Total$WOM_2) / 2
Car_Total$Future_Mean = (Car_Total$Futu_Pur_1 +
Car_Total$Futu_Pur_2) / 2
Car_Total$Pur_Purchase_Mean = (Car_Total$Pur_Proces_1 +
Car_Total$Pur_Proces_2) / 2
view(Car_Total)
Graph frequency count of cars across the different regions.
ggplot(Car_Total, aes(x=Region, fill = Region))+
theme_bw()+
geom_bar()+
geom_text(stat = "count", aes(label=..count..), vjust=0)+
labs(y = "Number of Cars",
x = "Region",
title = "Number of Cars by Region")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Numerical percentage distribution of whole market.
car_region_percentage <- prop.table(table(Car_Total$Region))
print(car_region_percentage)
##
## American Asian European Middle Eastern
## 0.3431840 0.1982841 0.2001907 0.2583413
Define model and region as categorical type Variables
Car_Total$Model <- as.factor(Car_Total$Model)
Car_Total$Region <- as.factor(Car_Total$Region)
create a graph to show number of cars by total and region
ggplot(Car_Total, aes(x=Region, fill = Model))+
theme_bw()+
geom_bar()+
labs(y = "Number of Cars",
title = "Number of Cars by Model and Region")
Split the model into Brand and Column
cars_seperated <- Car_Total %>% separate(Model, into = c("Brand", "Model"),
sep = " ", extra = "merge")
Car_Total <-cars_seperated
view(Car_Total)
View Graph of cars split by brand and region
ggplot(Car_Total, aes(x=Region, fill = Brand))+
theme_bw()+
geom_bar()+
labs(y = "Number of Cars",
title = "Number of Cars by Brand and Region")
Count number of cars , create cross tabulation and count total by brands and region.
count(Car_Total, Car_Total$Make, Car_Total$Brand, name = "Freq")
## Car_Total$Brand Freq
## 1 Buick 31
## 2 Chevrolet 64
## 3 Chrysler 169
## 4 Dodge 41
## 5 Fiat 18
## 6 Ford 202
## 7 Honda 159
## 8 Kia 34
## 9 Lincoln 39
## 10 Toyota 292
xtabs(~Region + Brand, Car_Total)
## Brand
## Region Buick Chevrolet Chrysler Dodge Fiat Ford Honda Kia Lincoln
## American 17 22 54 22 9 81 38 15 0
## Asian 0 0 24 0 0 24 58 0 0
## European 10 21 13 0 9 29 29 19 0
## Middle Eastern 4 21 78 19 0 68 34 0 39
## Brand
## Region Toyota
## American 102
## Asian 102
## European 80
## Middle Eastern 8
brand_region_counts <- table(Car_Total$Brand, Car_Total$Region)
print(brand_region_counts)
##
## American Asian European Middle Eastern
## Buick 17 0 10 4
## Chevrolet 22 0 21 21
## Chrysler 54 24 13 78
## Dodge 22 0 0 19
## Fiat 9 0 9 0
## Ford 81 24 29 68
## Honda 38 58 29 34
## Kia 15 0 19 0
## Lincoln 0 0 0 39
## Toyota 102 102 80 8
Develop and graph attitude of brand by region.
# attitude toward car brand by region
brand_region_table <- aggregate(Att_1~Brand+Region, Car_Total, mean)
print(brand_region_table)
## Brand Region Att_1
## 1 Buick American 5.529412
## 2 Chevrolet American 5.227273
## 3 Chrysler American 5.011979
## 4 Dodge American 4.818182
## 5 Fiat American 4.666667
## 6 Ford American 4.567901
## 7 Honda American 5.657895
## 8 Kia American 3.266667
## 9 Toyota American 5.254902
## 10 Chrysler Asian 5.291667
## 11 Ford Asian 4.166667
## 12 Honda Asian 5.568966
## 13 Toyota Asian 4.881199
## 14 Buick European 5.000000
## 15 Chevrolet European 5.000000
## 16 Chrysler European 5.230769
## 17 Fiat European 3.777778
## 18 Ford European 5.241379
## 19 Honda European 4.758621
## 20 Kia European 3.789474
## 21 Toyota European 4.800000
## 22 Buick Middle Eastern 6.000000
## 23 Chevrolet Middle Eastern 5.904762
## 24 Chrysler Middle Eastern 4.038462
## 25 Dodge Middle Eastern 4.473684
## 26 Ford Middle Eastern 4.014706
## 27 Honda Middle Eastern 5.500000
## 28 Lincoln Middle Eastern 5.692308
## 29 Toyota Middle Eastern 5.375000
ggplot(brand_region_table, aes(x=Region, y=Att_1, group = Brand))+
geom_line(aes(colour = Brand))+
geom_point(aes(colour = Brand))+
labs(y = "Att_1 Mean",
title = "Attitude Mean by Brand and region")
Filter brand region by Toyota.
Toyota_Att1_Mean <- brand_region_table %>%
filter( Brand == "Toyota")
print(Toyota_Att1_Mean)
## Brand Region Att_1
## 1 Toyota American 5.254902
## 2 Toyota Asian 4.881199
## 3 Toyota European 4.800000
## 4 Toyota Middle Eastern 5.375000
Graph filtered Toyota data.
ggplot(Toyota_Att1_Mean, aes(x=Region, y=Att_1, group = Brand))+
geom_line(aes(colour = Brand))+
geom_point(aes(colour = Brand))+
labs(y = "Att_1 Mean",
title = "Attitude Mean by for Toyota by Region")
Filter and Compare attitudes of Toyota and Honda.
Multiple_Att1_Mean <- brand_region_table %>%
filter( Brand == "Toyota"| Brand == "Honda")
view(Multiple_Att1_Mean)
Graph attitute mean by Toyota and Honda.
ggplot(Multiple_Att1_Mean, aes(x=Region, y=Att_1, group = Brand))+
geom_line(aes(colour = Brand))+
geom_point(aes(colour = Brand))+
labs(y = "Att_1 Mean",
title = "Attitude Mean for Toyota and Honda by Region")
Create demographic variable by age group.
Car_Total$AgeGr <-cut(Car_Total$Age, breaks = c(0, 30, 50, Inf),
labels = c("Young Adults", "Adults", "Mature Adults"),
right = FALSE)
names(Car_Total)
## [1] "Resp" "Att_1" "Att_2"
## [4] "Enj_1" "Enj_2" "Perform_1"
## [7] "Perform_2" "Perform_3" "WOM_1"
## [10] "WOM_2" "Futu_Pur_1" "Futu_Pur_2"
## [13] "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1"
## [16] "Pur_Proces_2" "Residence" "Pay_Meth"
## [19] "Insur_Type" "Gender" "Age"
## [22] "Education" "X" "Region"
## [25] "Brand" "Model" "MPG"
## [28] "Cyl" "acc1" "C_cost."
## [31] "H_Cost" "Post.Satis" "Att_Mean"
## [34] "Enj_Mean" "Perform_Mean" "Valu_Mean"
## [37] "WOM_Mean" "Future_Mean" "Pur_Purchase_Mean"
## [40] "AgeGr"
head(Car_Total, n=5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1 6 6 6 6 5 6 3 3 3
## 2 Res10 6 6 4 4 4 4 1 5 6
## 3 Res100 6 7 7 3 5 6 6 3 5
## 4 Res1000 6 6 7 6 6 6 6 6 6
## 5 Res1001 6 6 7 6 6 6 6 4 4
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 3 3 5 2 6 4
## 2 6 6 6 6 6 6
## 3 6 6 7 6 5 5
## 4 6 6 4 6 6 3
## 5 4 6 5 6 6 7
## Residence Pay_Meth Insur_Type Gender Age Education X Region Brand
## 1 2 2 Collision Male 18 2 NaN European Ford
## 2 1 2 Collision Male 21 2 NaN European Ford
## 3 2 1 Collision Female 32 1 NaN American Toyota
## 4 2 3 Liability Female 24 2 NaN Asian Toyota
## 5 1 3 Liability Female 24 2 NaN Asian Toyota
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis Att_Mean Enj_Mean
## 1 Expedition 15 8 5.5 16 14 4 6.0 6.0
## 2 Expedition 15 8 5.5 16 14 5 6.0 4.0
## 3 Rav4 24 4 8.2 10 8 4 6.5 5.0
## 4 Corolla 26 4 8.0 7 6 6 6.0 6.5
## 5 Corolla 26 4 8.0 7 6 5 6.0 6.5
## Perform_Mean Valu_Mean WOM_Mean Future_Mean Pur_Purchase_Mean AgeGr
## 1 4.666667 3.5 3.0 3 5.0 Young Adults
## 2 3.000000 6.0 5.5 6 6.0 Young Adults
## 3 5.666667 6.5 4.0 6 5.0 Adults
## 4 6.000000 5.0 6.0 6 4.5 Young Adults
## 5 6.000000 5.5 4.0 5 6.5 Young Adults
Age group analysis of Toyota by Region.
Filtered_Data_Toyota <- Car_Total %>%
filter(Brand == "Toyota")
ggplot(Filtered_Data_Toyota, aes(x=Region, fill = AgeGr))+
theme_bw()+
geom_bar()+
labs(y = "Number of Cars",
title = "Number of Toyota cars by Age group and Region")
Analysis of Toyota by Model and Age Group
ggplot(Filtered_Data_Toyota, aes(x=Model, fill = AgeGr))+
theme_bw()+
geom_bar()+
labs(y = "Number of Cars",
title = "Number of Toyota cars by Age group and Model")
Customer average satisfaction by age group
ggplot(Filtered_Data_Toyota, aes(x=Post.Satis, fill = AgeGr))+
theme_bw()+
geom_bar()+
labs(y = "Total Number of Customers",
x = "Post Purchase Satisfaction",
title = "Average Customer Satisfaction by Age Group")
Customer Satisfaction by Car Model
ggplot(Filtered_Data_Toyota, aes(x=Post.Satis, fill = Model))+
theme_bw()+
geom_bar()+
labs(y = "Total Number of Customers",
x = "Post Purchase Satisfaction",
title = "Average Customer Satisfaction by Car Model")
Future purchase intent by region
future_purchase_table <- aggregate(Future_Mean~Brand+Region, Car_Total, mean)
ggplot(future_purchase_table, aes(x=Region, y=Future_Mean, group = Brand))+
geom_line(aes(colour = Brand))+
geom_point(aes(colour = Brand))+
labs(y = "Future Purchase Mean",
title = "Future Purchase Mean by Brand and region")
Filter Toyota from above
Toyota_future_Mean <- future_purchase_table %>%
filter( Brand == "Toyota")
print(Toyota_future_Mean)
## Brand Region Future_Mean
## 1 Toyota American 5.313786
## 2 Toyota Asian 5.382353
## 3 Toyota European 5.275000
## 4 Toyota Middle Eastern 5.125000
Future purchase intent by model
ggplot(Toyota_future_Mean, aes(x=Region, y=Future_Mean, group = Brand))+
geom_line(aes(colour = Brand))+
geom_point(aes(colour = Brand))+
labs(y = "Future Purchase Mean",
title = "Future Purchase Mean for Toyota by Region")
Graphing and filtering Toyotas market future purchase intent with its main competitors
future_mean_competitors <- Car_Total %>%
filter( Brand == "Toyota"| Brand == "Honda"| Brand == "Ford"| Brand == "Chrysler")
future_purchase_tables <- aggregate(Future_Mean~Brand+Region, future_mean_competitors, mean)
view(future_purchase_table)
ggplot(future_purchase_tables, aes(x=Region, y=Future_Mean, group = Brand))+
geom_line(aes(colour = Brand))+
geom_point(aes(colour = Brand))+
labs(y = "Future Purchase Mean",
title = "Future Purchase Mean by Brand and region")
Graphing and filtering Toyota market share contrasting it with its main competitors
Brands_competitors <- Car_Total %>%
filter( Brand == "Toyota"| Brand == "Honda"| Brand == "Ford"| Brand == "Chrysler")
ggplot(Brands_competitors, aes(x=Region, fill = Brand))+
theme_bw()+
geom_bar()+
labs(y = "Number of Cars",
title = "Market Share of Main Competitors")