Analyzing consumer insights for Ford purchases across different regions to inform strategic planning
getwd()
## [1] "/Users/pmashru/Desktop/3P98 Marketing Analytics and Intelligence/Project 1 Files"
setwd("~/Desktop/3P98 Marketing Analytics and Intelligence/Project 1 Files")
Installed the required packages
Data Manipulation
Imported the Car Survey 1 and Car Survey 2 datasets, cleaned up the data and merged the datasets into one - Car_Total
#Open data file "Car Survey 1a"
Car1 <- read.csv("Car Survey 1a.csv")
str(Car1)
## 'data.frame': 1049 obs. of 22 variables:
## $ Resp : chr "Res1" "Res2" "Res3" "Res4" ...
## $ Att_1 : int 6 7 7 4 6 6 1 6 3 6 ...
## $ Att_2 : int 6 5 7 1 6 6 1 5 2 6 ...
## $ Enj_1 : int 6 5 7 1 6 6 1 5 3 4 ...
## $ Enj_2 : int 6 2 5 1 5 5 1 3 2 4 ...
## $ Perform_1 : int 5 2 5 1 5 5 2 5 2 4 ...
## $ Perform_2 : int 6 6 5 1 2 5 2 5 3 4 ...
## $ Perform_3 : int 3 7 3 1 1 7 2 2 1 1 ...
## $ WOM_1 : int 3 5 6 7 7 5 2 4 6 5 ...
## $ WOM_2 : int 3 5 6 7 7 5 3 6 6 6 ...
## $ Futu_Pur_1 : int 3 6 7 3 7 7 5 4 7 6 ...
## $ Futu_Pur_2 : int 3 6 7 3 6 7 2 4 7 6 ...
## $ Valu_Percp_1: int 5 6 5 6 6 7 2 4 6 6 ...
## $ Valu_Percp_2: int 2 7 7 5 5 7 2 4 6 6 ...
## $ Pur_Proces_1: int 6 7 7 5 6 7 2 4 6 6 ...
## $ Pur_Proces_2: int 4 6 7 4 7 7 6 4 6 6 ...
## $ Residence : int 2 2 1 2 1 2 2 1 2 1 ...
## $ Pay_Meth : int 2 2 2 2 2 2 2 2 2 2 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Collision" ...
## $ Gender : chr "Male" "Male" "Male" "Male" ...
## $ Age : int 18 18 19 19 19 19 19 21 21 21 ...
## $ Education : int 2 2 2 2 2 2 2 2 2 2 ...
head(Car1, n=5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1 6 6 6 6 5 6 3 3 3
## 2 Res2 7 5 5 2 2 6 7 5 5
## 3 Res3 7 7 7 5 5 5 3 6 6
## 4 Res4 4 1 1 1 1 1 1 7 7
## 5 Res5 6 6 6 5 5 2 1 7 7
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 3 3 5 2 6 4
## 2 6 6 6 7 7 6
## 3 7 7 5 7 7 7
## 4 3 3 6 5 5 4
## 5 7 6 6 5 6 7
## Residence Pay_Meth Insur_Type Gender Age Education
## 1 2 2 Collision Male 18 2
## 2 2 2 Collision Male 18 2
## 3 1 2 Collision Male 19 2
## 4 2 2 Collision Male 19 2
## 5 1 2 Collision Female 19 2
View(Car1)
#Open data file "Car Survey 2a"
Car2 <- read.csv("Car Survey 2a.csv")
str(Car2)
## 'data.frame': 1049 obs. of 9 variables:
## $ Respondents: chr "Res1" "Res2" "Res3" "Res4" ...
## $ Region : chr "European" "European" "European" "European" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
## $ MPG : int 15 15 15 15 15 15 15 15 15 15 ...
## $ Cyl : int 8 8 8 8 8 8 8 8 8 8 ...
## $ acc1 : num 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
## $ C_cost : num 16 16 16 16 16 16 16 16 16 16 ...
## $ H_Cost : num 14 14 14 14 14 14 14 14 14 14 ...
## $ Post.Satis : int 4 3 5 5 5 3 3 6 3 5 ...
head(Car2, n=5)
## Respondents Region Model MPG Cyl acc1 C_cost H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
## 2 Res2 European Ford Expedition 15 8 5.5 16 14 3
## 3 Res3 European Ford Expedition 15 8 5.5 16 14 5
## 4 Res4 European Ford Expedition 15 8 5.5 16 14 5
## 5 Res5 European Ford Expedition 15 8 5.5 16 14 5
View(Car2)
names(Car2)[1]<-c("Resp")
head(Car2, n=1)
## Resp Region Model MPG Cyl acc1 C_cost H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
#Merge Car1 and Car2 by Respondents
Car_Total<-merge(Car1, Car2, by="Resp")
str(Car_Total)
## 'data.frame': 1049 obs. of 30 variables:
## $ Resp : chr "Res1" "Res10" "Res100" "Res1000" ...
## $ Att_1 : int 6 6 6 6 6 3 2 7 2 6 ...
## $ Att_2 : int 6 6 7 6 6 1 2 7 1 6 ...
## $ Enj_1 : int 6 4 7 7 7 4 1 7 2 6 ...
## $ Enj_2 : int 6 4 3 6 6 3 2 6 1 5 ...
## $ Perform_1 : int 5 4 5 6 6 5 2 5 2 5 ...
## $ Perform_2 : int 6 4 6 6 6 6 2 6 2 5 ...
## $ Perform_3 : int 3 1 6 6 6 6 1 5 2 5 ...
## $ WOM_1 : int 3 5 3 6 4 2 6 6 7 3 ...
## $ WOM_2 : int 3 6 5 6 4 6 7 6 7 3 ...
## $ Futu_Pur_1 : int 3 6 6 6 4 6 6 6 7 6 ...
## $ Futu_Pur_2 : int 3 6 6 6 6 6 5 7 7 6 ...
## $ Valu_Percp_1: int 5 6 7 4 5 5 4 6 4 5 ...
## $ Valu_Percp_2: int 2 6 6 6 6 4 4 5 6 6 ...
## $ Pur_Proces_1: int 6 6 5 6 6 5 4 5 6 6 ...
## $ Pur_Proces_2: int 4 6 5 3 7 5 5 5 7 5 ...
## $ Residence : int 2 1 2 2 1 1 1 2 1 2 ...
## $ Pay_Meth : int 2 2 1 3 3 3 3 3 3 3 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Liability" ...
## $ Gender : chr "Male" "Male" "Female" "Female" ...
## $ Age : int 18 21 32 24 24 25 26 26 27 27 ...
## $ Education : int 2 2 1 2 2 2 2 2 2 2 ...
## $ Region : chr "European" "European" "American" "Asian" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
## $ MPG : int 15 15 24 26 26 26 26 26 26 26 ...
## $ Cyl : int 8 8 4 4 4 4 4 4 4 4 ...
## $ acc1 : num 5.5 5.5 8.2 8 8 8 8 8 8 8 ...
## $ C_cost : num 16 16 10 7 7 7 7 7 7 7 ...
## $ H_Cost : num 14 14 8 6 6 6 6 6 6 6 ...
## $ Post.Satis : int 4 5 4 6 5 6 5 6 7 6 ...
write.csv(Car_Total, "Car_Total", row.names = FALSE)
View(Car_Total)
Checked for missing values and replaced them with the mean of their respective columns.
#Checking missing (NA) values
summary(Car_Total)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Mode :character Median :6.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :4
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :2 NA's :4 NA's :1
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## NA's :1 NA's :3 NA's :5 NA's :2 NA's :4
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## NA's :1 NA's :3 NA's :4 NA's :5
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
##
## Education Region Model MPG
## Min. :1.000 Length:1049 Length:1049 Min. :14.00
## 1st Qu.:2.000 Class :character Class :character 1st Qu.:17.00
## Median :2.000 Mode :character Mode :character Median :19.00
## Mean :1.989 Mean :19.58
## 3rd Qu.:2.000 3rd Qu.:22.00
## Max. :3.000 Max. :26.00
##
## Cyl acc1 C_cost H_Cost Post.Satis
## Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000 Min. :2.00
## 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000 1st Qu.:5.00
## Median :6.0 Median :6.500 Median :12.00 Median :10.000 Median :6.00
## Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634 Mean :5.28
## 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000 3rd Qu.:6.00
## Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000 Max. :7.00
##
#the output has NA values
#Replace NA values in Car_Total with the mean of their respective columns
Car_Total[is.na(Car_Total)] <-mean(Car_Total, na.rm = TRUE)
## Warning in mean.default(Car_Total, na.rm = TRUE): argument is not numeric or
## logical: returning NA
Car_Total <- Car_Total %>%
mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))
summary(Car_Total)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:5.000
## Mode :character Median :5.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
## Education Region Model MPG
## Min. :1.000 Length:1049 Length:1049 Min. :14.00
## 1st Qu.:2.000 Class :character Class :character 1st Qu.:17.00
## Median :2.000 Mode :character Mode :character Median :19.00
## Mean :1.989 Mean :19.58
## 3rd Qu.:2.000 3rd Qu.:22.00
## Max. :3.000 Max. :26.00
## Cyl acc1 C_cost H_Cost Post.Satis
## Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000 Min. :2.00
## 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000 1st Qu.:5.00
## Median :6.0 Median :6.500 Median :12.00 Median :10.000 Median :6.00
## Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634 Mean :5.28
## 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000 3rd Qu.:6.00
## Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000 Max. :7.00
Mutated the data to include mean values of the columns with sub-questions, like the Attitude or Performance questions.
#Creating new columns to calculate the mean of the following variables
#Attitude (Att_1 and Att_2)
Car_Total$Att_Mean = (Car_Total$Att_1 +
Car_Total$Att_2) / 2
#Enjoyment/Pleasure (Enj_1 and Enj_2)
Car_Total$Enj_Mean = (Car_Total$Enj_1 +
Car_Total$Enj_2) / 2
#Performance (Perform_1, Perform_2, and Perform_3)
Car_Total$Perform_Mean = (Car_Total$Perform_1 +
Car_Total$Perform_2 +
Car_Total$Perform_3) / 3
#Word-of-Mouth (WOM_1 and WOM_2)
Car_Total$WOM_Mean = (Car_Total$WOM_1 +
Car_Total$WOM_2) / 2
#Future Purchase Intentions (Futu_Pur_1 and Futu_Pur_2)
Car_Total$FutPur_Mean = (Car_Total$Futu_Pur_1 +
Car_Total$Futu_Pur_2) / 2
#Value Perception (Valu_Percp_1 and Valu_Percp_2)
Car_Total$ValPer_Mean = (Car_Total$Valu_Percp_1 +
Car_Total$Valu_Percp_2) / 2
#Car Purchase Process (Pur_Process_1 and Pur_Process_2)
Car_Total$PurProMean = (Car_Total$Pur_Proces_1 +
Car_Total$Pur_Proces_2) / 2
View(Car_Total)
Organized the data into categories and cleaned up the data with the following:
#Creating additional variables
#Define Model and Region as categorical variables
Car_Total$Model <- as.factor(Car_Total$Model)
Car_Total$Region <- as.factor(Car_Total$Region)
#Split the Model column into Brand and Model
cars_separated <- Car_Total %>% separate(Model, into = c("Brand", "Model"),
sep = " ", extra = "merge")
#Added the split columns to the dataset "Car_Total"
Car_Total <- cars_separated
View(Car_Total)
#Demographic Groups by Age variable (convert age into 3 groups)
Car_Total$AgeGrp <- cut(Car_Total$Age, breaks = c(0, 30, 50, Inf),
labels = c("YoungAdults", "Adults", "Mature Adults"),
right = FALSE)
names(Car_Total)
## [1] "Resp" "Att_1" "Att_2" "Enj_1" "Enj_2"
## [6] "Perform_1" "Perform_2" "Perform_3" "WOM_1" "WOM_2"
## [11] "Futu_Pur_1" "Futu_Pur_2" "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1"
## [16] "Pur_Proces_2" "Residence" "Pay_Meth" "Insur_Type" "Gender"
## [21] "Age" "Education" "Region" "Brand" "Model"
## [26] "MPG" "Cyl" "acc1" "C_cost" "H_Cost"
## [31] "Post.Satis" "Att_Mean" "Enj_Mean" "Perform_Mean" "WOM_Mean"
## [36] "FutPur_Mean" "ValPer_Mean" "PurProMean" "AgeGrp"
head(Car_Total, n=5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1 6 6 6 6 5 6 3 3 3
## 2 Res10 6 6 4 4 4 4 1 5 6
## 3 Res100 6 7 7 3 5 6 6 3 5
## 4 Res1000 6 6 7 6 6 6 6 6 6
## 5 Res1001 6 6 7 6 6 6 6 4 4
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 3 3 5 2 6 4
## 2 6 6 6 6 6 6
## 3 6 6 7 6 5 5
## 4 6 6 4 6 6 3
## 5 4 6 5 6 6 7
## Residence Pay_Meth Insur_Type Gender Age Education Region Brand Model
## 1 2 2 Collision Male 18 2 European Ford Expedition
## 2 1 2 Collision Male 21 2 European Ford Expedition
## 3 2 1 Collision Female 32 1 American Toyota Rav4
## 4 2 3 Liability Female 24 2 Asian Toyota Corolla
## 5 1 3 Liability Female 24 2 Asian Toyota Corolla
## MPG Cyl acc1 C_cost H_Cost Post.Satis Att_Mean Enj_Mean Perform_Mean WOM_Mean
## 1 15 8 5.5 16 14 4 6.0 6.0 4.666667 3.0
## 2 15 8 5.5 16 14 5 6.0 4.0 3.000000 5.5
## 3 24 4 8.2 10 8 4 6.5 5.0 5.666667 4.0
## 4 26 4 8.0 7 6 6 6.0 6.5 6.000000 6.0
## 5 26 4 8.0 7 6 5 6.0 6.5 6.000000 4.0
## FutPur_Mean ValPer_Mean PurProMean AgeGrp
## 1 3 3.5 5.0 YoungAdults
## 2 6 6.0 6.0 YoungAdults
## 3 6 6.5 5.0 Adults
## 4 6 5.0 4.5 YoungAdults
## 5 5 5.5 6.5 YoungAdults
View(Car_Total)
#Extracting Ford Data from Car_Total
Ford_data <- Car_Total %>%
filter(Brand == "Ford")
View(Ford_data)
Descriptive Analysis
We will be conducting the analysis and creating data visualization to inform the three key variables (Market Demand and Customer Preferences, Competitive Landscape, and Customer Satisfaction and Future Purchase Intentions) to gain consumer insights.
# 1 Market Demand and Customer Preferences
# To understand who is buying Ford cars, we will divide the market into segments by analyzing age, gender, education, and payment methods.
# Age group analysis for Ford across Regions
agegrp_region_table <- table(Ford_data$AgeGrp, Ford_data$Region)
print(agegrp_region_table)
##
## American Asian European Middle Eastern
## YoungAdults 35 0 23 26
## Adults 20 21 2 17
## Mature Adults 26 3 4 25
#Created a bar graph
ggplot(Ford_data, aes(x = Region, fill = AgeGrp)) +
theme_bw() +
geom_bar() +
labs(y = "Number of Cars",
title = "Number of Ford Cars by Age Group and Regions") +
scale_fill_manual(values = c("YoungAdults" = "steelblue", "Adults" = "purple", "Mature Adults" = "lightblue")) #Manually assigning colors to the chart
#Gender and Payment Preference Distribution
#Converting Pay_Meth to factor
Ford_data <- Ford_data %>%
mutate(Pay_Meth = factor(Pay_Meth,
labels = c("Lease", "Finance", "Cash")))
#Understanding whether men or women prefer leasing, financing, or cash when purchasing Ford
ggplot(Ford_data, aes(x = Pay_Meth, fill = Gender)) +
theme_bw()+
geom_bar()+
labs(y= "Number of Cars", x= "Payment Method",
title = "Payment Method Preference by Gender for Ford") +
scale_fill_manual(values = c("Female" = "steelblue", "Male" = "purple"))
#Analysis of customers who perceive Ford as good value (Value Perception) also had a smooth purchase experience (Purchase Process Satisfaction)
ggplot(Ford_data, aes(x=ValPer_Mean, y=PurProMean))+
geom_point(alpha = 0.5)+
geom_smooth(method = "lm", color = "purple")+
labs(y= "Purchase Process Satisfaction (1-7)", x= "Value Perception (1-7)",
title = "Value Perception vs. Purchase Process Satisfaction")
## `geom_smooth()` using formula = 'y ~ x'
# 2 Competitive Landscape
#Key Competitors in the auto maker industry and comparisons with Ford
#Count the total number of cars by Brand across Region
brand_region_counts <-table(Car_Total$Brand, Car_Total$Region)
print(brand_region_counts)
##
## American Asian European Middle Eastern
## Buick 17 0 10 4
## Chevrolet 22 0 21 21
## Chrysler 54 24 13 78
## Dodge 22 0 0 19
## Fiat 9 0 9 0
## Ford 81 24 29 68
## Honda 38 58 29 34
## Kia 15 0 19 0
## Lincoln 0 0 0 39
## Toyota 102 102 80 8
ggplot(Car_Total, aes(x=Region, fill=Brand))+
theme_bw()+
geom_bar()+
labs(y="Number of cars",
title = "Number of cars by Brands across Region")
#Comparing Ford's Customer satisfaction with other brands
ggplot(Car_Total, aes(x=Brand, y=Post.Satis, fill=Brand)) +
geom_boxplot()+
labs(y="Post-Purchase Satisfaction (1-7)", x="Brand",
title = "Customer Satisfaction by Brand")
#Does fuel efficiency (MPG) impact cost perception (C_Cost) compared to competitors
ggplot(Car_Total, aes(x = MPG, y = C_cost, color = Brand)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(y = "Cost (Cents per Mile)", x = "MPG",
title = "Miles per Gallon vs. Cost of Travel")
## `geom_smooth()` using formula = 'y ~ x'
# 3 Customer Satisfaction and Future Purchase Intentions
#Understanding the relationship between Performance and Future Purchase Intentions for Ford
ggplot(Ford_data, aes(x = Perform_Mean, y = FutPur_Mean)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", color = "purple") +
labs( y = "Future Purchase Likelihood (1-7)", x = "Performance Satisfaction (1-7)",
title = "Performance Satisfaction vs. Future Purchase Intention")
## `geom_smooth()` using formula = 'y ~ x'
# Analyzing Word-of-Mouth scores for Ford
#Define Model and Region as categorical variables
Ford_data$Model <- as.factor(Ford_data$Model)
Ford_data$WOM_1 <- as.numeric(Ford_data$WOM_1) # Likelihood to Recommend
Ford_data$WOM_2 <- as.numeric(Ford_data$WOM_2) # Positive Sentiment
Ford_data$WOM_Mean <- as.numeric(Ford_data$WOM_Mean) # Overall Word-of-Mouth Scores
# Group by Model and summarize WOM sentiment
ford_wom_summary <- Ford_data %>%
group_by(Model) %>%
summarise(
Avg_Recommend = mean(WOM_1, na.rm = TRUE),
Avg_Positive_Talk = mean(WOM_2, na.rm = TRUE),
Count = n(),
) %>%
arrange(desc(Avg_Recommend)) # Sort by recommendation score
print(ford_wom_summary)
## # A tibble: 2 × 4
## Model Avg_Recommend Avg_Positive_Talk Count
## <fct> <dbl> <dbl> <int>
## 1 Explorer 5.36 5.70 99
## 2 Expedition 5.21 5.27 103
# Count number of cars per WOM_1 score for Ford models
ford_wom_counts <- Ford_data %>%
group_by(WOM_1, Model) %>%
summarise(Num_Cars = n(), .groups = "drop")
#Created a bar graph
ggplot(ford_wom_counts, aes(x = factor(WOM_1), y = Num_Cars, fill = Model)) +
geom_bar(stat = "identity", position = "dodge") +
labs( x = "Likelihood to Recommend (WOM_1)", y = "Number of Ford Cars",
title = "Likelihood to Recommend by Ford Model") +
scale_fill_manual(values = c("steelblue", "purple"))
# Count number of cars per WOM_2 score for Ford models
ford_wom2_counts <- Ford_data %>%
group_by(WOM_2, Model) %>%
summarise(Num_Cars = n(), .groups = "drop")
#Created a bar graph
ggplot(ford_wom2_counts, aes(x = factor(WOM_2), y = Num_Cars, fill = Model)) +
geom_bar(stat = "identity", position = "dodge") +
labs( x = "Positive Sentiment (WOM_2)", y = "Number of Ford Cars",
title = "Positive Sentiment by Ford Model") +
scale_fill_manual(values = c("steelblue", "purple"))
# Calculate mean WOM scores for each brand
brand_wom_summary <- Car_Total %>%
group_by(Brand) %>%
summarise(
Mean_WOM_1 = mean(WOM_1, na.rm = TRUE),
Mean_WOM_2 = mean(WOM_2, na.rm = TRUE),
WOM_Mean = mean(c(WOM_1, WOM_2), na.rm = TRUE) # Overall WOM Score
) %>%
arrange(desc(WOM_Mean)) # Sort brands by WOM score
# Print summary
print(brand_wom_summary)
## # A tibble: 10 × 4
## Brand Mean_WOM_1 Mean_WOM_2 WOM_Mean
## <chr> <dbl> <dbl> <dbl>
## 1 Toyota 5.63 5.68 5.65
## 2 Ford 5.29 5.48 5.38
## 3 Honda 5.38 5.30 5.34
## 4 Chrysler 5.27 5.35 5.31
## 5 Chevrolet 5.09 5.20 5.15
## 6 Kia 5 5.15 5.07
## 7 Lincoln 4.72 4.85 4.78
## 8 Fiat 4.61 4.78 4.69
## 9 Buick 4.65 4.32 4.48
## 10 Dodge 4.39 4.49 4.44
#Created a bar graph to compare Ford's WOM scores with its competitors
ggplot(brand_wom_summary, aes(x = reorder(Brand, -WOM_Mean), y = WOM_Mean, fill = Brand == "Ford")) +
geom_bar(stat = "identity") +
labs(y = "Average WOM Score", x = "Brand",
title = "Comparison of WOM Scores: Ford vs Other Brands") +
scale_fill_manual(values = c("steelblue", "purple"), guide = FALSE) # Ford highlighted in blue
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#Regional Variation in Customer Satisfaction for Ford Purchases
ggplot(Ford_data, aes(x = Region, y = Post.Satis, fill = Region)) +
geom_boxplot() +
labs(x = "Region", y = "Satisfaction Score",
title = "Customer Satisfaction by Region (Ford)")