#Set Up Working Directory
getwd()
## [1] "/Users/jadensavoy/Desktop/MKTG 3P98/Excel Files"
setwd("/Users/jadensavoy/Desktop/MKTG 3P98/Excel Files")
#Load in Packages
library(tinytex)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Load in the Datasets ##Load Car1 Data File Using read.csv
Car1<-read.csv("Car_Survey_1a.csv")
##Load Car2 Data File Using read.csv
Car2<-read.csv("Car_Survey_2a.csv")
#Display Each Dataset ##Display The Structure of Car1 (data frame)
str(Car1)
## 'data.frame': 1180 obs. of 23 variables:
## $ Resp : chr "Res1" "Res2" "Res3" "Res4" ...
## $ Att_1 : int 6 7 7 4 6 6 1 6 3 6 ...
## $ Att_2 : int 6 5 7 1 6 6 1 5 2 6 ...
## $ Enj_1 : int 6 5 7 1 6 6 1 5 3 4 ...
## $ Enj_2 : int 6 2 5 1 5 5 1 3 2 4 ...
## $ Perform_1 : int 5 2 5 1 5 5 2 5 2 4 ...
## $ Perform_2 : int 6 6 5 1 2 5 2 5 3 4 ...
## $ Perform_3 : int 3 7 3 1 1 7 2 2 1 1 ...
## $ WOM_1 : int 3 5 6 7 7 5 2 4 6 5 ...
## $ WOM_2 : int 3 5 6 7 7 5 3 6 6 6 ...
## $ Futu_Pur_1 : int 3 6 7 3 7 7 5 4 7 6 ...
## $ Futu_Pur_2 : int 3 6 7 3 6 7 2 4 7 6 ...
## $ Valu_Percp_1: int 5 6 5 6 6 7 2 4 6 6 ...
## $ Valu_Percp_2: int 2 7 7 5 5 7 2 4 6 6 ...
## $ Pur_Proces_1: int 6 7 7 5 6 7 2 4 6 6 ...
## $ Pur_Proces_2: int 4 6 7 4 7 7 6 4 6 6 ...
## $ Residence : int 2 2 1 2 1 2 2 1 2 1 ...
## $ Pay_Meth : int 2 2 2 2 2 2 2 2 2 2 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Collision" ...
## $ Gender : chr "Male" "Male" "Male" "Male" ...
## $ Age : int 18 18 19 19 19 19 19 21 21 21 ...
## $ Education : int 2 2 2 2 2 2 2 2 2 2 ...
## $ X : logi NA NA NA NA NA NA ...
head(Car1,n=5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1 6 6 6 6 5 6 3 3 3
## 2 Res2 7 5 5 2 2 6 7 5 5
## 3 Res3 7 7 7 5 5 5 3 6 6
## 4 Res4 4 1 1 1 1 1 1 7 7
## 5 Res5 6 6 6 5 5 2 1 7 7
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 3 3 5 2 6 4
## 2 6 6 6 7 7 6
## 3 7 7 5 7 7 7
## 4 3 3 6 5 5 4
## 5 7 6 6 5 6 7
## Residence Pay_Meth Insur_Type Gender Age Education X
## 1 2 2 Collision Male 18 2 NA
## 2 2 2 Collision Male 18 2 NA
## 3 1 2 Collision Male 19 2 NA
## 4 2 2 Collision Male 19 2 NA
## 5 1 2 Collision Female 19 2 NA
##Display The Structure of Car2 (data frame)
str(Car2)
## 'data.frame': 1049 obs. of 9 variables:
## $ Respondents: chr "Res1" "Res2" "Res3" "Res4" ...
## $ Region : chr "European" "European" "European" "European" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
## $ MPG : int 15 15 15 15 15 15 15 15 15 15 ...
## $ Cyl : int 8 8 8 8 8 8 8 8 8 8 ...
## $ acc1 : num 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
## $ C_cost. : num 16 16 16 16 16 16 16 16 16 16 ...
## $ H_Cost : num 14 14 14 14 14 14 14 14 14 14 ...
## $ Post.Satis : int 4 3 5 5 5 3 3 6 3 5 ...
head(Car2,n=5)
## Respondents Region Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
## 2 Res2 European Ford Expedition 15 8 5.5 16 14 3
## 3 Res3 European Ford Expedition 15 8 5.5 16 14 5
## 4 Res4 European Ford Expedition 15 8 5.5 16 14 5
## 5 Res5 European Ford Expedition 15 8 5.5 16 14 5
#Create a Master Dataset (combined Car1 and Car2) ##Rename Unique ID in Car 2 to match with Car1
names(Car2)[1]<-c("Resp")
##Check to See if it was changed
head(Car2,n=1)
## Resp Region Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
##Merge Car1 and Car2 into one dataset
Car_Total<-merge(Car1,Car2, by="Resp")
str(Car_Total)
## 'data.frame': 1049 obs. of 31 variables:
## $ Resp : chr "Res1" "Res10" "Res100" "Res1000" ...
## $ Att_1 : int 6 6 6 6 6 3 2 7 2 6 ...
## $ Att_2 : int 6 6 7 6 6 1 2 7 1 6 ...
## $ Enj_1 : int 6 4 7 7 7 4 1 7 2 6 ...
## $ Enj_2 : int 6 4 3 6 6 3 2 6 1 5 ...
## $ Perform_1 : int 5 4 5 6 6 5 2 5 2 5 ...
## $ Perform_2 : int 6 4 6 6 6 6 2 6 2 5 ...
## $ Perform_3 : int 3 1 6 6 6 6 1 5 2 5 ...
## $ WOM_1 : int 3 5 3 6 4 2 6 6 7 3 ...
## $ WOM_2 : int 3 6 5 6 4 6 7 6 7 3 ...
## $ Futu_Pur_1 : int 3 6 6 6 4 6 6 6 7 6 ...
## $ Futu_Pur_2 : int 3 6 6 6 6 6 5 7 7 6 ...
## $ Valu_Percp_1: int 5 6 7 4 5 5 4 6 4 5 ...
## $ Valu_Percp_2: int 2 6 6 6 6 4 4 5 6 6 ...
## $ Pur_Proces_1: int 6 6 5 6 6 5 4 5 6 6 ...
## $ Pur_Proces_2: int 4 6 5 3 7 5 5 5 7 5 ...
## $ Residence : int 2 1 2 2 1 1 1 2 1 2 ...
## $ Pay_Meth : int 2 2 1 3 3 3 3 3 3 3 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Liability" ...
## $ Gender : chr "Male" "Male" "Female" "Female" ...
## $ Age : int 18 21 32 24 24 25 26 26 27 27 ...
## $ Education : int 2 2 1 2 2 2 2 2 2 2 ...
## $ X : logi NA NA NA NA NA NA ...
## $ Region : chr "European" "European" "American" "Asian" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
## $ MPG : int 15 15 24 26 26 26 26 26 26 26 ...
## $ Cyl : int 8 8 4 4 4 4 4 4 4 4 ...
## $ acc1 : num 5.5 5.5 8.2 8 8 8 8 8 8 8 ...
## $ C_cost. : num 16 16 10 7 7 7 7 7 7 7 ...
## $ H_Cost : num 14 14 8 6 6 6 6 6 6 6 ...
## $ Post.Satis : int 4 5 4 6 5 6 5 6 7 6 ...
head(Car_Total)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1 6 6 6 6 5 6 3 3 3
## 2 Res10 6 6 4 4 4 4 1 5 6
## 3 Res100 6 7 7 3 5 6 6 3 5
## 4 Res1000 6 6 7 6 6 6 6 6 6
## 5 Res1001 6 6 7 6 6 6 6 4 4
## 6 Res1002 3 1 4 3 5 6 6 2 6
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 3 3 5 2 6 4
## 2 6 6 6 6 6 6
## 3 6 6 7 6 5 5
## 4 6 6 4 6 6 3
## 5 4 6 5 6 6 7
## 6 6 6 5 4 5 5
## Residence Pay_Meth Insur_Type Gender Age Education X Region
## 1 2 2 Collision Male 18 2 NA European
## 2 1 2 Collision Male 21 2 NA European
## 3 2 1 Collision Female 32 1 NA American
## 4 2 3 Liability Female 24 2 NA Asian
## 5 1 3 Liability Female 24 2 NA Asian
## 6 1 3 Liability Female 25 2 NA Asian
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Ford Expedition 15 8 5.5 16 14 4
## 2 Ford Expedition 15 8 5.5 16 14 5
## 3 Toyota Rav4 24 4 8.2 10 8 4
## 4 Toyota Corolla 26 4 8.0 7 6 6
## 5 Toyota Corolla 26 4 8.0 7 6 5
## 6 Toyota Corolla 26 4 8.0 7 6 6
##Save the merged data (Car_Total) to a file
write.csv(Car_Total, "Car_Total", row.names = FALSE)
#Using readxl package ##Load the package
library(readxl)
#Replace Missing Values ##Check the Master Dataset
summary(Car_Total)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Mode :character Median :6.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :4
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :2 NA's :4 NA's :1
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## NA's :1 NA's :3 NA's :5 NA's :2 NA's :4
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## NA's :1 NA's :3 NA's :4 NA's :5
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
##
## Education X Region Model
## Min. :1.000 Mode:logical Length:1049 Length:1049
## 1st Qu.:2.000 NA's:1049 Class :character Class :character
## Median :2.000 Mode :character Mode :character
## Mean :1.989
## 3rd Qu.:2.000
## Max. :3.000
##
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
##
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
##
#Replace Missing Values in Att_1 ##Calculate the Mean of Att_1
mean(Car_Total$Att_1)
## [1] NA
##Find Rows with NA in Att_1
na_rows<-Car_Total[is.na(Car_Total$Att_1),]
print(na_rows)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 109 Res151 NA 2 NA 2 3 NA 2 2 NA
## 110 Res152 NA 5 5 4 5 6 4 5 6
## 112 Res154 NA 6 6 6 5 4 5 6 6
## 127 Res168 NA 3 3 3 6 5 3 5 6
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 109 2 2 NA 2 2 3
## 110 6 6 6 2 4 5
## 112 5 6 5 6 5 5
## 127 6 7 5 3 5 4
## Residence Pay_Meth Insur_Type Gender Age Education X Region
## 109 1 2 Comprehensive Female 21 2 NA American
## 110 2 1 Comprehensive Female 21 2 NA American
## 112 2 2 Comprehensive Female 23 2 NA American
## 127 1 2 Collision Male 29 2 NA Asian
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 109 Chrysler Jeep 18 6 3.6 12 10.0 4
## 110 Chrysler Jeep 18 6 3.6 12 10.0 6
## 112 Chrysler Jeep 18 6 3.6 12 10.0 6
## 127 Toyota Highlander 20 6 7.2 10 8.5 6
##Find mean of Att_1 without NA values
meanAtt_1<-mean(Car_Total$Att_1,na.rm = TRUE)
print(meanAtt_1)
## [1] 4.882297
##Replace NA values with calculated mean of Att_1
Car_Total[is.na(Car_Total$Att_1),"Att_1"] <-meanAtt_1
##Check to see if NA rows are replaced with calculated mean
Car_Total[c(rownames(na_rows)),]
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 109 Res151 4.882297 2 NA 2 3 NA 2 2 NA
## 110 Res152 4.882297 5 5 4 5 6 4 5 6
## 112 Res154 4.882297 6 6 6 5 4 5 6 6
## 127 Res168 4.882297 3 3 3 6 5 3 5 6
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 109 2 2 NA 2 2 3
## 110 6 6 6 2 4 5
## 112 5 6 5 6 5 5
## 127 6 7 5 3 5 4
## Residence Pay_Meth Insur_Type Gender Age Education X Region
## 109 1 2 Comprehensive Female 21 2 NA American
## 110 2 1 Comprehensive Female 21 2 NA American
## 112 2 2 Comprehensive Female 23 2 NA American
## 127 1 2 Collision Male 29 2 NA Asian
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 109 Chrysler Jeep 18 6 3.6 12 10.0 4
## 110 Chrysler Jeep 18 6 3.6 12 10.0 6
## 112 Chrysler Jeep 18 6 3.6 12 10.0 6
## 127 Toyota Highlander 20 6 7.2 10 8.5 6
#Replace Missing Values in Futu_Pur_1 ##Calculate the Mean of Futu_Pur_1
mean(Car_Total$Futu_Pur_1)
## [1] NA
##Find Rows with NA in Futu_Pur_1
na_rows<-Car_Total[is.na(Car_Total$Futu_Pur_1),]
print(na_rows)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 62 Res109 6 6 6 5 5 6 NA NA 4
## 120 Res161 6 6 6 3 5 1 4 5 5
## 181 Res216 5 6 6 7 7 4 4 5 5
## 873 Res84 6 6 6 6 NA NA 5 5 NA
## 917 Res88 7 7 NA NA 7 7 5 7 6
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 62 NA NA NA NA NA NA
## 120 NA 5 5 6 5 5
## 181 NA 6 5 5 5 5
## 873 NA 6 6 5 5 6
## 917 NA 5 5 4 5 5
## Residence Pay_Meth Insur_Type Gender Age Education X Region
## 62 1 3 Liability Male 36 2 NA American
## 120 1 3 Comprehensive Male 24 3 NA Middle Eastern
## 181 2 1 Liability Male 19 3 NA American
## 873 2 1 Collision Male 23 2 NA American
## 917 1 2 Collision Female 24 1 NA American
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 62 Toyota Rav4 24 4 8.2 10 8 3
## 120 Chrysler Jeep 18 6 3.6 12 10 4
## 181 Toyota Rav4 24 4 8.2 10 8 6
## 873 Honda CRV 26 4 8.5 8 7 4
## 917 Honda CRV 26 4 8.5 8 7 4
##Find mean of Futu_Pur_1 without NA values
meanFut_Pur_1<-mean(Car_Total$Futu_Pur_1,na.rm = TRUE)
print(meanFut_Pur_1)
## [1] 5.320881
##Replace NA values with calculated mean of Futu_Pur_1
Car_Total[is.na(Car_Total$Futu_Pur_1),"Futu_Pur_1"] <-meanFut_Pur_1
##Check to see if NA rows are replaced with calculated mean
Car_Total[c(rownames(na_rows)),]
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 62 Res109 6 6 6 5 5 6 NA NA 4
## 120 Res161 6 6 6 3 5 1 4 5 5
## 181 Res216 5 6 6 7 7 4 4 5 5
## 873 Res84 6 6 6 6 NA NA 5 5 NA
## 917 Res88 7 7 NA NA 7 7 5 7 6
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 62 5.320881 NA NA NA NA NA
## 120 5.320881 5 5 6 5 5
## 181 5.320881 6 5 5 5 5
## 873 5.320881 6 6 5 5 6
## 917 5.320881 5 5 4 5 5
## Residence Pay_Meth Insur_Type Gender Age Education X Region
## 62 1 3 Liability Male 36 2 NA American
## 120 1 3 Comprehensive Male 24 3 NA Middle Eastern
## 181 2 1 Liability Male 19 3 NA American
## 873 2 1 Collision Male 23 2 NA American
## 917 1 2 Collision Female 24 1 NA American
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 62 Toyota Rav4 24 4 8.2 10 8 3
## 120 Chrysler Jeep 18 6 3.6 12 10 4
## 181 Toyota Rav4 24 4 8.2 10 8 6
## 873 Honda CRV 26 4 8.5 8 7 4
## 917 Honda CRV 26 4 8.5 8 7 4
#Replace Missing Values in Futu_Pur_2 ##Calculate the Mean of Futu_Pur_2
mean(Car_Total$Futu_Pur_2)
## [1] NA
##Find Rows with NA in Futu_Pur_2
na_rows<-Car_Total[is.na(Car_Total$Futu_Pur_2),]
print(na_rows)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 62 Res109 6 6 6 5 5 6 NA NA 4
## 507 Res51 4 4 4 1 2 3 3 3 3
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 62 5.320881 NA NA NA NA NA
## 507 2.000000 NA 2 2 2 6
## Residence Pay_Meth Insur_Type Gender Age Education X Region
## 62 1 3 Liability Male 36 2 NA American
## 507 2 1 Comprehensive Male 48 2 NA American
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 62 Toyota Rav4 24 4 8.2 10 8 3
## 507 Ford Expedition 15 8 5.5 16 14 4
##Find mean of Futu_Pur_2 without NA values
meanFut_Pur_2<-mean(Car_Total$Futu_Pur_2,na.rm = TRUE)
print(meanFut_Pur_2)
## [1] 5.370583
##Replace NA values with calculated mean of Futu_Pur_2
Car_Total[is.na(Car_Total$Futu_Pur_2),"Futu_Pur_2"] <-meanFut_Pur_2
##Check to see if NA rows are replaced with calculated mean
Car_Total[c(rownames(na_rows)),]
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 62 Res109 6 6 6 5 5 6 NA NA 4
## 507 Res51 4 4 4 1 2 3 3 3 3
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 62 5.320881 5.370583 NA NA NA NA
## 507 2.000000 5.370583 2 2 2 6
## Residence Pay_Meth Insur_Type Gender Age Education X Region
## 62 1 3 Liability Male 36 2 NA American
## 507 2 1 Comprehensive Male 48 2 NA American
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 62 Toyota Rav4 24 4 8.2 10 8 3
## 507 Ford Expedition 15 8 5.5 16 14 4
#Create a new column (variable) to calculate mean of Att_1 and Att_2
Car_Total$Att_Mean=(Car_Total$Att_1+
Car_Total$Att_2)/2
View(Car_Total[c("Att_1","Att_2", "Att_Mean")])
##Check for NA values in Att_Mean
na_rows<-Car_Total[is.na(Car_Total$Att_Mean),]
print(na_rows)
## [1] Resp Att_1 Att_2 Enj_1 Enj_2
## [6] Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## [11] Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1
## [16] Pur_Proces_2 Residence Pay_Meth Insur_Type Gender
## [21] Age Education X Region Model
## [26] MPG Cyl acc1 C_cost. H_Cost
## [31] Post.Satis Att_Mean
## <0 rows> (or 0-length row.names)
#Create a new column (variable) to calculate mean of Futu_Pur_1 and Fut_Pur_2
Car_Total$Futu_Pur_Mean=(Car_Total$Futu_Pur_1+
Car_Total$Futu_Pur_2)/2
View(Car_Total[c("Futu_Pur_1","Futu_Pur_2", "Futu_Pur_Mean")])
##Check for NA values in Futu_Pur_Mean
na_rows<-Car_Total[is.na(Car_Total$Futu_Pur_Mean),]
print(na_rows)
## [1] Resp Att_1 Att_2 Enj_1 Enj_2
## [6] Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## [11] Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1
## [16] Pur_Proces_2 Residence Pay_Meth Insur_Type Gender
## [21] Age Education X Region Model
## [26] MPG Cyl acc1 C_cost. H_Cost
## [31] Post.Satis Att_Mean Futu_Pur_Mean
## <0 rows> (or 0-length row.names)
#Load in Librarys ##Load in ggplot
library(ggplot2)
##Load in Stringr
library(stringr)
#Seperate Make and Model into two Columns
Car_Total[c("Make","Model_v1")] <-str_split_fixed(Car_Total$Model," ", 2)
View(Car_Total)
#Number of Cars per Make by Region ##Graph
ggplot(Car_Total,aes(x=Region,fill = Make))+
theme_bw()+
geom_bar()+
labs(y="Number of Cars",
title = "Number of Cars per Make by Region")
#Number of Cars per Make by Age Group ##Convert Age into 3 Catagories
Car_Total$AgeGrp<-cut(Car_Total$Age,
breaks = c(0, 30, 50, Inf),
labels = c("Young Adults", "Adults", "Mature Adults"),
right = FALSE)
names(Car_Total)
## [1] "Resp" "Att_1" "Att_2" "Enj_1"
## [5] "Enj_2" "Perform_1" "Perform_2" "Perform_3"
## [9] "WOM_1" "WOM_2" "Futu_Pur_1" "Futu_Pur_2"
## [13] "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1" "Pur_Proces_2"
## [17] "Residence" "Pay_Meth" "Insur_Type" "Gender"
## [21] "Age" "Education" "X" "Region"
## [25] "Model" "MPG" "Cyl" "acc1"
## [29] "C_cost." "H_Cost" "Post.Satis" "Att_Mean"
## [33] "Futu_Pur_Mean" "Make" "Model_v1" "AgeGrp"
head(Car_Total$ageGrp)
## NULL
##Graph
ggplot(Car_Total,aes(x=AgeGrp,fill = Make))+
theme_bw()+
geom_bar()+
labs(y="Number of Cars",
title = "Number of Cars per Make by Age")
#Number of Cars per Make by Gender ##Graph
ggplot(Car_Total,aes(x=Gender,fill = Make))+
theme_bw()+
geom_bar()+
labs(y="Number of Cars",
title = "Number of Cars per Make by Gender")
#Number of Cars per Make by Education ##Convert Eduction into 3 Catagories
Car_Total <- Car_Total %>%
mutate(education_level = recode(Education,
`1` = "Highschool",
`2` = "Undergraduate",
`3` = "Masters Degree"))
##Graph
ggplot(Car_Total,aes(x=education_level,fill = Make))+
theme_bw()+
geom_bar()+
labs(y="Number of Cars",
title = "Number of Cars per Make by Education")
#Group by Parent Company ##Install Packages
library(dplyr)
##Group
Car_Total <- Car_Total %>%
mutate(Parent = case_when(Make == "Buick" ~ "General Motors",
Make == "Chevrolet" ~ "General Motors",
Make == "Chrysler" ~ "Chrysler",
Make == "Dodge" ~ "Chrysler",
Make == "Fiat" ~ "Chrysler",
Make == "Ford" ~ "Ford",
Make == "Honda" ~ "Honda",
Make == "Kia" ~ "Kia",
Make == "Lincoln" ~ "Ford",
Make == "Toyota" ~ "Toyota",
TRUE ~ "Check"))
##Check if Grouping is Correct
count(Car_Total, Car_Total$Make, Car_Total$Parent, name = "Freq")
## Car_Total$Make Car_Total$Parent Freq
## 1 Buick General Motors 31
## 2 Chevrolet General Motors 64
## 3 Chrysler Chrysler 169
## 4 Dodge Chrysler 41
## 5 Fiat Chrysler 18
## 6 Ford Ford 202
## 7 Honda Honda 159
## 8 Kia Kia 34
## 9 Lincoln Ford 39
## 10 Toyota Toyota 292
table(Car_Total$Make)
##
## Buick Chevrolet Chrysler Dodge Fiat Ford Honda Kia
## 31 64 169 41 18 202 159 34
## Lincoln Toyota
## 39 292
##Graph
ggplot(Car_Total,aes(x=Region,fill = Parent))+
theme_bw()+
facet_wrap(~Model)+
geom_bar()+
labs(y="Number of Cars",
title = "Number of Cars by Model and Region")
#What is the Attitude Mean by Make and Region? ##Create Contingency Table
brand_region_table_Att1 <- aggregate(Att_1~Parent+Region, Car_Total, mean)
print(brand_region_table_Att1)
## Parent Region Att_1
## 1 Chrysler American 4.925258
## 2 Ford American 4.567901
## 3 General Motors American 5.358974
## 4 Honda American 5.657895
## 5 Kia American 3.266667
## 6 Toyota American 5.254902
## 7 Chrysler Asian 5.291667
## 8 Ford Asian 4.166667
## 9 Honda Asian 5.568966
## 10 Toyota Asian 4.881199
## 11 Chrysler European 4.636364
## 12 Ford European 5.241379
## 13 General Motors European 5.000000
## 14 Honda European 4.758621
## 15 Kia European 3.789474
## 16 Toyota European 4.800000
## 17 Chrysler Middle Eastern 4.123711
## 18 Ford Middle Eastern 4.626168
## 19 General Motors Middle Eastern 5.920000
## 20 Honda Middle Eastern 5.500000
## 21 Toyota Middle Eastern 5.375000
##Graph
ggplot(brand_region_table_Att1, aes(x=Region, y=Att_1, group = Parent)) +
geom_line(aes(colour = Parent))+
geom_point(aes(colour = Parent))+
labs(y="Att_1 Mean",
title = "Attitude Mean by Parent and Region")
#What is the Attitude Mean for Toyota by Region? ##Create Contingency Table
Toyota_Att1_Mean <- brand_region_table_Att1%>%
filter(Parent == "Toyota")
head(Toyota_Att1_Mean,n=10)
## Parent Region Att_1
## 1 Toyota American 5.254902
## 2 Toyota Asian 4.881199
## 3 Toyota European 4.800000
## 4 Toyota Middle Eastern 5.375000
table(Toyota_Att1_Mean$Parent)
##
## Toyota
## 4
##Graph
ggplot(Toyota_Att1_Mean, aes(x=Region, y=Att_1, group = Parent)) +
geom_line(aes(colour = Parent))+
geom_point(aes(colour = Parent))+
scale_y_continuous(limits = c(3, 6))
labs(y="Att_1 Mean",
title = "Attitude Mean for Toyota by Region")
## $y
## [1] "Att_1 Mean"
##
## $title
## [1] "Attitude Mean for Toyota by Region"
##
## attr(,"class")
## [1] "labels"
#Filter for 1+ brands (Toyota, Ford)
Multiple_Att1_Mean <- brand_region_table_Att1 %>%
filter(Parent == "Toyota" | Parent == "Ford")
head(Multiple_Att1_Mean,n=10)
## Parent Region Att_1
## 1 Ford American 4.567901
## 2 Toyota American 5.254902
## 3 Ford Asian 4.166667
## 4 Toyota Asian 4.881199
## 5 Ford European 5.241379
## 6 Toyota European 4.800000
## 7 Ford Middle Eastern 4.626168
## 8 Toyota Middle Eastern 5.375000
table(Multiple_Att1_Mean$Parent)
##
## Ford Toyota
## 4 4
##Graph
ggplot(Multiple_Att1_Mean, aes(x=Region, y=Att_1, group = Parent)) +
geom_line(aes(colour = Parent)) +
geom_point(aes(colour = Parent)) +
scale_y_continuous(limits = c(3, 6))
labs(y="Att_1 Mean",
title = "Attitude Mean for Toyota and Ford by Region")
## $y
## [1] "Att_1 Mean"
##
## $title
## [1] "Attitude Mean for Toyota and Ford by Region"
##
## attr(,"class")
## [1] "labels"
#What is the Future Purchase Mean by Make and Region? ##Create Contingency Table
brand_region_table_Pur <- aggregate(Futu_Pur_1~Parent+Region, Car_Total, mean)
print(brand_region_table_Pur)
## Parent Region Futu_Pur_1
## 1 Chrysler American 4.929412
## 2 Ford American 5.370370
## 3 General Motors American 5.282051
## 4 Honda American 5.227415
## 5 Kia American 6.000000
## 6 Toyota American 5.270998
## 7 Chrysler Asian 5.125000
## 8 Ford Asian 5.166667
## 9 Honda Asian 5.120690
## 10 Toyota Asian 5.382353
## 11 Chrysler European 5.545455
## 12 Ford European 5.620690
## 13 General Motors European 4.548387
## 14 Honda European 5.586207
## 15 Kia European 5.421053
## 16 Toyota European 5.262500
## 17 Chrysler Middle Eastern 5.384751
## 18 Ford Middle Eastern 5.644860
## 19 General Motors Middle Eastern 5.480000
## 20 Honda Middle Eastern 5.441176
## 21 Toyota Middle Eastern 5.250000
##Graph
ggplot(brand_region_table_Pur, aes(x=Region, y=Futu_Pur_1, group = Parent)) +
geom_line(aes(colour = Parent))+
geom_point(aes(colour = Parent))+
labs(y="Futu_Pur_1 Mean",
title = "Future Purchase Mean by Parent and Region")
#What is the Future Purchase Mean for Toyota by Region?
Toyota_Pur1_Mean <- brand_region_table_Pur %>%
filter(Parent == "Toyota")
head(Toyota_Pur1_Mean,n=10)
## Parent Region Futu_Pur_1
## 1 Toyota American 5.270998
## 2 Toyota Asian 5.382353
## 3 Toyota European 5.262500
## 4 Toyota Middle Eastern 5.250000
table(Toyota_Pur1_Mean$Parent)
##
## Toyota
## 4
#Filter for 1+ brands (Toyota, Ford)
Multiple_Pur1_Mean <- brand_region_table_Pur %>%
filter(Parent == "Toyota" | Parent == "Ford")
head(Multiple_Pur1_Mean,n=10)
## Parent Region Futu_Pur_1
## 1 Ford American 5.370370
## 2 Toyota American 5.270998
## 3 Ford Asian 5.166667
## 4 Toyota Asian 5.382353
## 5 Ford European 5.620690
## 6 Toyota European 5.262500
## 7 Ford Middle Eastern 5.644860
## 8 Toyota Middle Eastern 5.250000
table(Multiple_Pur1_Mean$Parent)
##
## Ford Toyota
## 4 4
##Graph
ggplot(Multiple_Pur1_Mean, aes(x=Region, y=Futu_Pur_1, group = Parent)) +
geom_line(aes(colour = Parent)) +
geom_point(aes(colour = Parent)) +
scale_y_continuous(limits = c(3, 6))
labs(y="Futu_Pur_1 Mean",
title = "Future Purchase Mean for Toyota and Ford by Region")
## $y
## [1] "Futu_Pur_1 Mean"
##
## $title
## [1] "Future Purchase Mean for Toyota and Ford by Region"
##
## attr(,"class")
## [1] "labels"
#Filter Data into Toyota only
filtered_data_Toyota <- Car_Total %>%
filter(Parent == "Toyota")
#Graph Age Group by Region- Toyota Only
ggplot(filtered_data_Toyota,aes(x=Region,fill = AgeGrp))+
theme_bw()+
geom_bar()+
labs(y="Number of Cars",
title = "Number of Toyota Cars by Age Group and Region")
#Number of Toyota Cars Purchased by Each Age Group
car_count_by_age <- filtered_data_Toyota %>%
group_by(AgeGrp) %>%
summarize(car_count = n())
print(car_count_by_age)
## # A tibble: 3 × 2
## AgeGrp car_count
## <fct> <int>
## 1 Young Adults 104
## 2 Adults 149
## 3 Mature Adults 39
#Number of Toyota Cars Purchased by Each Region
car_count_by_region <- filtered_data_Toyota %>%
group_by(Region) %>%
summarize(car_count = n())
print(car_count_by_region)
## # A tibble: 4 × 2
## Region car_count
## <chr> <int>
## 1 American 102
## 2 Asian 102
## 3 European 80
## 4 Middle Eastern 8
#Number of Toyota Cars Purchased by Gender
car_count_by_gender <- filtered_data_Toyota %>%
group_by(Gender) %>%
summarize(car_count = n())
print(car_count_by_gender)
## # A tibble: 3 × 2
## Gender car_count
## <chr> <int>
## 1 "" 3
## 2 "Female" 213
## 3 "Male" 76
#Graph Gender by Region- Toyota Only
ggplot(filtered_data_Toyota,aes(x=Region,fill = Gender))+
theme_bw()+
geom_bar()+
labs(y="Number of Cars",
title = "Number of Toyota Cars by Gender and Region")
#Number of Toyota Cars Purchased by Education
car_count_by_education <- filtered_data_Toyota %>%
group_by(education_level) %>%
summarize(car_count = n())
print(car_count_by_education)
## # A tibble: 3 × 2
## education_level car_count
## <chr> <int>
## 1 Highschool 65
## 2 Masters Degree 70
## 3 Undergraduate 157
#Graph Gender by Region- Toyota Only
ggplot(filtered_data_Toyota,aes(x=Region,fill = education_level))+
theme_bw()+
geom_bar()+
labs(y="Number of Cars",
title = "Number of Toyota Cars Purchased by Education Level and Region")
#Graph Gender by Age Group- Toyota Only
ggplot(filtered_data_Toyota,aes(x=Region,fill = AgeGrp))+
theme_bw()+
geom_bar()+
labs(y="Number of Cars",
title = "Number of Toyota Cars Purchased by Age Group and Region")