Analyzing consumer insights for Ford purchases across different regions to inform strategic planning

getwd()
## [1] "/Users/pmashru/Desktop/3P98 Marketing Analytics and Intelligence/Project 1 Files"
setwd("~/Desktop/3P98 Marketing Analytics and Intelligence/Project 1 Files")

Installed the required packages

Data Manipulation

Imported the Car Survey 1 and Car Survey 2 datasets, cleaned up the data and merged the datasets into one - Car_Total

#Open data file "Car Survey 1a"
Car1 <- read.csv("Car Survey 1a.csv")
str(Car1)
## 'data.frame':    1049 obs. of  22 variables:
##  $ Resp        : chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Att_1       : int  6 7 7 4 6 6 1 6 3 6 ...
##  $ Att_2       : int  6 5 7 1 6 6 1 5 2 6 ...
##  $ Enj_1       : int  6 5 7 1 6 6 1 5 3 4 ...
##  $ Enj_2       : int  6 2 5 1 5 5 1 3 2 4 ...
##  $ Perform_1   : int  5 2 5 1 5 5 2 5 2 4 ...
##  $ Perform_2   : int  6 6 5 1 2 5 2 5 3 4 ...
##  $ Perform_3   : int  3 7 3 1 1 7 2 2 1 1 ...
##  $ WOM_1       : int  3 5 6 7 7 5 2 4 6 5 ...
##  $ WOM_2       : int  3 5 6 7 7 5 3 6 6 6 ...
##  $ Futu_Pur_1  : int  3 6 7 3 7 7 5 4 7 6 ...
##  $ Futu_Pur_2  : int  3 6 7 3 6 7 2 4 7 6 ...
##  $ Valu_Percp_1: int  5 6 5 6 6 7 2 4 6 6 ...
##  $ Valu_Percp_2: int  2 7 7 5 5 7 2 4 6 6 ...
##  $ Pur_Proces_1: int  6 7 7 5 6 7 2 4 6 6 ...
##  $ Pur_Proces_2: int  4 6 7 4 7 7 6 4 6 6 ...
##  $ Residence   : int  2 2 1 2 1 2 2 1 2 1 ...
##  $ Pay_Meth    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Collision" ...
##  $ Gender      : chr  "Male" "Male" "Male" "Male" ...
##  $ Age         : int  18 18 19 19 19 19 19 21 21 21 ...
##  $ Education   : int  2 2 2 2 2 2 2 2 2 2 ...
head(Car1, n=5)
##   Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1     6     6     6     6         5         6         3     3     3
## 2 Res2     7     5     5     2         2         6         7     5     5
## 3 Res3     7     7     7     5         5         5         3     6     6
## 4 Res4     4     1     1     1         1         1         1     7     7
## 5 Res5     6     6     6     5         5         2         1     7     7
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            7            7            6
## 3          7          7            5            7            7            7
## 4          3          3            6            5            5            4
## 5          7          6            6            5            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education
## 1         2        2  Collision   Male  18         2
## 2         2        2  Collision   Male  18         2
## 3         1        2  Collision   Male  19         2
## 4         2        2  Collision   Male  19         2
## 5         1        2  Collision Female  19         2
View(Car1)

#Open data file "Car Survey 2a"
Car2 <- read.csv("Car Survey 2a.csv")
str(Car2)
## 'data.frame':    1049 obs. of  9 variables:
##  $ Respondents: chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Region     : chr  "European" "European" "European" "European" ...
##  $ Model      : chr  "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
##  $ MPG        : int  15 15 15 15 15 15 15 15 15 15 ...
##  $ Cyl        : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ acc1       : num  5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
##  $ C_cost     : num  16 16 16 16 16 16 16 16 16 16 ...
##  $ H_Cost     : num  14 14 14 14 14 14 14 14 14 14 ...
##  $ Post.Satis : int  4 3 5 5 5 3 3 6 3 5 ...
head(Car2, n=5)
##   Respondents   Region           Model MPG Cyl acc1 C_cost H_Cost Post.Satis
## 1        Res1 European Ford Expedition  15   8  5.5     16     14          4
## 2        Res2 European Ford Expedition  15   8  5.5     16     14          3
## 3        Res3 European Ford Expedition  15   8  5.5     16     14          5
## 4        Res4 European Ford Expedition  15   8  5.5     16     14          5
## 5        Res5 European Ford Expedition  15   8  5.5     16     14          5
View(Car2)

names(Car2)[1]<-c("Resp")
head(Car2, n=1)
##   Resp   Region           Model MPG Cyl acc1 C_cost H_Cost Post.Satis
## 1 Res1 European Ford Expedition  15   8  5.5     16     14          4
#Merge Car1 and Car2 by Respondents
Car_Total<-merge(Car1, Car2, by="Resp")
str(Car_Total)
## 'data.frame':    1049 obs. of  30 variables:
##  $ Resp        : chr  "Res1" "Res10" "Res100" "Res1000" ...
##  $ Att_1       : int  6 6 6 6 6 3 2 7 2 6 ...
##  $ Att_2       : int  6 6 7 6 6 1 2 7 1 6 ...
##  $ Enj_1       : int  6 4 7 7 7 4 1 7 2 6 ...
##  $ Enj_2       : int  6 4 3 6 6 3 2 6 1 5 ...
##  $ Perform_1   : int  5 4 5 6 6 5 2 5 2 5 ...
##  $ Perform_2   : int  6 4 6 6 6 6 2 6 2 5 ...
##  $ Perform_3   : int  3 1 6 6 6 6 1 5 2 5 ...
##  $ WOM_1       : int  3 5 3 6 4 2 6 6 7 3 ...
##  $ WOM_2       : int  3 6 5 6 4 6 7 6 7 3 ...
##  $ Futu_Pur_1  : int  3 6 6 6 4 6 6 6 7 6 ...
##  $ Futu_Pur_2  : int  3 6 6 6 6 6 5 7 7 6 ...
##  $ Valu_Percp_1: int  5 6 7 4 5 5 4 6 4 5 ...
##  $ Valu_Percp_2: int  2 6 6 6 6 4 4 5 6 6 ...
##  $ Pur_Proces_1: int  6 6 5 6 6 5 4 5 6 6 ...
##  $ Pur_Proces_2: int  4 6 5 3 7 5 5 5 7 5 ...
##  $ Residence   : int  2 1 2 2 1 1 1 2 1 2 ...
##  $ Pay_Meth    : int  2 2 1 3 3 3 3 3 3 3 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Liability" ...
##  $ Gender      : chr  "Male" "Male" "Female" "Female" ...
##  $ Age         : int  18 21 32 24 24 25 26 26 27 27 ...
##  $ Education   : int  2 2 1 2 2 2 2 2 2 2 ...
##  $ Region      : chr  "European" "European" "American" "Asian" ...
##  $ Model       : chr  "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
##  $ MPG         : int  15 15 24 26 26 26 26 26 26 26 ...
##  $ Cyl         : int  8 8 4 4 4 4 4 4 4 4 ...
##  $ acc1        : num  5.5 5.5 8.2 8 8 8 8 8 8 8 ...
##  $ C_cost      : num  16 16 10 7 7 7 7 7 7 7 ...
##  $ H_Cost      : num  14 14 8 6 6 6 6 6 6 6 ...
##  $ Post.Satis  : int  4 5 4 6 5 6 5 6 7 6 ...
write.csv(Car_Total, "Car_Total", row.names = FALSE)
View(Car_Total)

Checked for missing values and replaced them with the mean of their respective columns.

#Checking missing (NA) values

summary(Car_Total)
##      Resp               Att_1           Att_2           Enj_1      
##  Length:1049        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.000  
##  Mode  :character   Median :6.000   Median :6.000   Median :6.000  
##                     Mean   :4.882   Mean   :5.287   Mean   :5.378  
##                     3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000  
##                     Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                     NA's   :4                       NA's   :4      
##      Enj_2         Perform_1       Perform_2       Perform_3    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :4.575   Mean   :4.947   Mean   :4.831   Mean   :4.217  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##  NA's   :4       NA's   :2       NA's   :4       NA's   :1      
##      WOM_1           WOM_2        Futu_Pur_1      Futu_Pur_2     Valu_Percp_1  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:4.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :6.000   Median :6.00   Median :6.000   Median :6.000   Median :6.000  
##  Mean   :5.286   Mean   :5.35   Mean   :5.321   Mean   :5.371   Mean   :5.411  
##  3rd Qu.:7.000   3rd Qu.:6.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.00   Max.   :9.000   Max.   :7.000   Max.   :7.000  
##  NA's   :1       NA's   :3      NA's   :5       NA's   :2       NA's   :4      
##   Valu_Percp_2    Pur_Proces_1    Pur_Proces_2     Residence    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :5.000   Median :6.000   Median :5.000   Median :1.000  
##  Mean   :5.114   Mean   :5.256   Mean   :4.923   Mean   :1.474  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :5.000  
##  NA's   :1       NA's   :3       NA's   :4       NA's   :5      
##     Pay_Meth      Insur_Type           Gender               Age       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :18.00  
##  1st Qu.:1.000   Class :character   Class :character   1st Qu.:23.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :34.00  
##  Mean   :2.153                                         Mean   :35.22  
##  3rd Qu.:3.000                                         3rd Qu.:48.00  
##  Max.   :3.000                                         Max.   :60.00  
##                                                                       
##    Education        Region             Model                MPG       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :14.00  
##  1st Qu.:2.000   Class :character   Class :character   1st Qu.:17.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :19.00  
##  Mean   :1.989                                         Mean   :19.58  
##  3rd Qu.:2.000                                         3rd Qu.:22.00  
##  Max.   :3.000                                         Max.   :26.00  
##                                                                       
##       Cyl           acc1           C_cost          H_Cost         Post.Satis  
##  Min.   :4.0   Min.   :3.600   Min.   : 7.00   Min.   : 6.000   Min.   :2.00  
##  1st Qu.:4.0   1st Qu.:5.100   1st Qu.:10.00   1st Qu.: 8.000   1st Qu.:5.00  
##  Median :6.0   Median :6.500   Median :12.00   Median :10.000   Median :6.00  
##  Mean   :5.8   Mean   :6.202   Mean   :11.35   Mean   : 9.634   Mean   :5.28  
##  3rd Qu.:6.0   3rd Qu.:7.500   3rd Qu.:13.00   3rd Qu.:11.000   3rd Qu.:6.00  
##  Max.   :8.0   Max.   :8.500   Max.   :16.00   Max.   :14.000   Max.   :7.00  
## 
#the output has NA values

#Replace NA values in Car_Total with the mean of their respective columns
Car_Total[is.na(Car_Total)] <-mean(Car_Total, na.rm = TRUE)
## Warning in mean.default(Car_Total, na.rm = TRUE): argument is not numeric or
## logical: returning NA
Car_Total <- Car_Total %>%
  mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))
summary(Car_Total)
##      Resp               Att_1           Att_2           Enj_1      
##  Length:1049        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:5.000  
##  Mode  :character   Median :5.000   Median :6.000   Median :6.000  
##                     Mean   :4.882   Mean   :5.287   Mean   :5.378  
##                     3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000  
##                     Max.   :7.000   Max.   :7.000   Max.   :7.000  
##      Enj_2         Perform_1       Perform_2       Perform_3    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :4.575   Mean   :4.947   Mean   :4.831   Mean   :4.217  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##      WOM_1           WOM_2        Futu_Pur_1      Futu_Pur_2     Valu_Percp_1  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:5.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :6.000   Median :6.00   Median :6.000   Median :6.000   Median :6.000  
##  Mean   :5.286   Mean   :5.35   Mean   :5.321   Mean   :5.371   Mean   :5.411  
##  3rd Qu.:7.000   3rd Qu.:6.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.00   Max.   :9.000   Max.   :7.000   Max.   :7.000  
##   Valu_Percp_2    Pur_Proces_1    Pur_Proces_2     Residence    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :5.000   Median :6.000   Median :5.000   Median :1.000  
##  Mean   :5.114   Mean   :5.256   Mean   :4.923   Mean   :1.474  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :5.000  
##     Pay_Meth      Insur_Type           Gender               Age       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :18.00  
##  1st Qu.:1.000   Class :character   Class :character   1st Qu.:23.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :34.00  
##  Mean   :2.153                                         Mean   :35.22  
##  3rd Qu.:3.000                                         3rd Qu.:48.00  
##  Max.   :3.000                                         Max.   :60.00  
##    Education        Region             Model                MPG       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :14.00  
##  1st Qu.:2.000   Class :character   Class :character   1st Qu.:17.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :19.00  
##  Mean   :1.989                                         Mean   :19.58  
##  3rd Qu.:2.000                                         3rd Qu.:22.00  
##  Max.   :3.000                                         Max.   :26.00  
##       Cyl           acc1           C_cost          H_Cost         Post.Satis  
##  Min.   :4.0   Min.   :3.600   Min.   : 7.00   Min.   : 6.000   Min.   :2.00  
##  1st Qu.:4.0   1st Qu.:5.100   1st Qu.:10.00   1st Qu.: 8.000   1st Qu.:5.00  
##  Median :6.0   Median :6.500   Median :12.00   Median :10.000   Median :6.00  
##  Mean   :5.8   Mean   :6.202   Mean   :11.35   Mean   : 9.634   Mean   :5.28  
##  3rd Qu.:6.0   3rd Qu.:7.500   3rd Qu.:13.00   3rd Qu.:11.000   3rd Qu.:6.00  
##  Max.   :8.0   Max.   :8.500   Max.   :16.00   Max.   :14.000   Max.   :7.00

Mutated the data to include mean values of the columns with sub-questions, like the Attitude or Performance questions.

#Creating new columns to calculate the mean of the following variables

#Attitude (Att_1 and Att_2)
Car_Total$Att_Mean = (Car_Total$Att_1 +
                        Car_Total$Att_2) / 2

#Enjoyment/Pleasure (Enj_1 and Enj_2)
Car_Total$Enj_Mean = (Car_Total$Enj_1 +
                        Car_Total$Enj_2) / 2

#Performance (Perform_1, Perform_2, and Perform_3)
Car_Total$Perform_Mean = (Car_Total$Perform_1 +
                            Car_Total$Perform_2 +
                            Car_Total$Perform_3) / 3

#Word-of-Mouth (WOM_1 and WOM_2)
Car_Total$WOM_Mean = (Car_Total$WOM_1 +
                        Car_Total$WOM_2) / 2

#Future Purchase Intentions (Futu_Pur_1 and Futu_Pur_2)
Car_Total$FutPur_Mean = (Car_Total$Futu_Pur_1 +
                           Car_Total$Futu_Pur_2) / 2

#Value Perception (Valu_Percp_1 and Valu_Percp_2)
Car_Total$ValPer_Mean = (Car_Total$Valu_Percp_1 +
                           Car_Total$Valu_Percp_2) / 2

#Car Purchase Process (Pur_Process_1 and Pur_Process_2)
Car_Total$PurProMean = (Car_Total$Pur_Proces_1 +
                          Car_Total$Pur_Proces_2) / 2

View(Car_Total)

Organized the data into categories and cleaned up the data with the following:

#Creating additional variables

#Define Model and Region as categorical variables
Car_Total$Model <- as.factor(Car_Total$Model)
Car_Total$Region <- as.factor(Car_Total$Region)

#Split the Model column into Brand and Model
cars_separated <- Car_Total %>% separate(Model, into = c("Brand", "Model"),
                                         sep = " ", extra = "merge")

#Added the split columns to the dataset "Car_Total"
Car_Total <- cars_separated
View(Car_Total)

#Demographic Groups by Age variable (convert age into 3 groups)
Car_Total$AgeGrp <- cut(Car_Total$Age, breaks = c(0, 30, 50, Inf),
                        labels = c("YoungAdults", "Adults", "Mature Adults"),
                        right = FALSE)

names(Car_Total)
##  [1] "Resp"         "Att_1"        "Att_2"        "Enj_1"        "Enj_2"       
##  [6] "Perform_1"    "Perform_2"    "Perform_3"    "WOM_1"        "WOM_2"       
## [11] "Futu_Pur_1"   "Futu_Pur_2"   "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1"
## [16] "Pur_Proces_2" "Residence"    "Pay_Meth"     "Insur_Type"   "Gender"      
## [21] "Age"          "Education"    "Region"       "Brand"        "Model"       
## [26] "MPG"          "Cyl"          "acc1"         "C_cost"       "H_Cost"      
## [31] "Post.Satis"   "Att_Mean"     "Enj_Mean"     "Perform_Mean" "WOM_Mean"    
## [36] "FutPur_Mean"  "ValPer_Mean"  "PurProMean"   "AgeGrp"
head(Car_Total, n=5)
##      Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1    Res1     6     6     6     6         5         6         3     3     3
## 2   Res10     6     6     4     4         4         4         1     5     6
## 3  Res100     6     7     7     3         5         6         6     3     5
## 4 Res1000     6     6     7     6         6         6         6     6     6
## 5 Res1001     6     6     7     6         6         6         6     4     4
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            6            6            6
## 3          6          6            7            6            5            5
## 4          6          6            4            6            6            3
## 5          4          6            5            6            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education   Region  Brand      Model
## 1         2        2  Collision   Male  18         2 European   Ford Expedition
## 2         1        2  Collision   Male  21         2 European   Ford Expedition
## 3         2        1  Collision Female  32         1 American Toyota       Rav4
## 4         2        3  Liability Female  24         2    Asian Toyota    Corolla
## 5         1        3  Liability Female  24         2    Asian Toyota    Corolla
##   MPG Cyl acc1 C_cost H_Cost Post.Satis Att_Mean Enj_Mean Perform_Mean WOM_Mean
## 1  15   8  5.5     16     14          4      6.0      6.0     4.666667      3.0
## 2  15   8  5.5     16     14          5      6.0      4.0     3.000000      5.5
## 3  24   4  8.2     10      8          4      6.5      5.0     5.666667      4.0
## 4  26   4  8.0      7      6          6      6.0      6.5     6.000000      6.0
## 5  26   4  8.0      7      6          5      6.0      6.5     6.000000      4.0
##   FutPur_Mean ValPer_Mean PurProMean      AgeGrp
## 1           3         3.5        5.0 YoungAdults
## 2           6         6.0        6.0 YoungAdults
## 3           6         6.5        5.0      Adults
## 4           6         5.0        4.5 YoungAdults
## 5           5         5.5        6.5 YoungAdults
View(Car_Total)

#Extracting Ford Data from Car_Total
Ford_data <- Car_Total %>%
  filter(Brand == "Ford")
View(Ford_data)

Descriptive Analysis

We will be conducting the analysis and creating data visualization to inform the three key variables (Market Demand and Customer Preferences, Competitive Landscape, and Customer Satisfaction and Future Purchase Intentions) to gain consumer insights.

# 1 Market Demand and Customer Preferences
# To understand who is buying Ford cars, we will divide the market into segments by analyzing age, gender, education, and payment methods.

# Age group analysis for Ford across Regions

agegrp_region_table <- table(Ford_data$AgeGrp, Ford_data$Region)
print(agegrp_region_table)
##                
##                 American Asian European Middle Eastern
##   YoungAdults         35     0       23             26
##   Adults              20    21        2             17
##   Mature Adults       26     3        4             25
#Created a bar graph
ggplot(Ford_data, aes(x = Region, fill = AgeGrp)) +
  theme_bw() +
  geom_bar() +
  labs(y = "Number of Cars",
       title = "Number of Ford Cars by Age Group and Regions") +
  scale_fill_manual(values = c("YoungAdults" = "steelblue", "Adults" = "purple", "Mature Adults" = "lightblue")) #Manually assigning colors to the chart

#Gender and Payment Preference Distribution

#Converting Pay_Meth to factor
Ford_data <- Ford_data %>%
  mutate(Pay_Meth = factor(Pay_Meth,
                           labels = c("Lease", "Finance", "Cash")))

#Understanding whether men or women prefer leasing, financing, or cash when purchasing Ford
ggplot(Ford_data, aes(x = Pay_Meth, fill = Gender)) +
  theme_bw()+
  geom_bar()+
  labs(y= "Number of Cars", x= "Payment Method",
       title = "Payment Method Preference by Gender for Ford") +
  scale_fill_manual(values = c("Female" = "steelblue", "Male" = "purple"))

#Analysis of customers who perceive Ford as good value (Value Perception) also had a smooth purchase experience (Purchase Process Satisfaction)
ggplot(Ford_data, aes(x=ValPer_Mean, y=PurProMean))+
  geom_point(alpha = 0.5)+
  geom_smooth(method = "lm", color = "purple")+
  labs(y= "Purchase Process Satisfaction (1-7)", x= "Value Perception (1-7)",
       title = "Value Perception vs. Purchase Process Satisfaction")
## `geom_smooth()` using formula = 'y ~ x'

# 2 Competitive Landscape

#Key Competitors in the auto maker industry and comparisons with Ford
#Count the total number of cars by Brand across Region
brand_region_counts <-table(Car_Total$Brand, Car_Total$Region)
print(brand_region_counts)
##            
##             American Asian European Middle Eastern
##   Buick           17     0       10              4
##   Chevrolet       22     0       21             21
##   Chrysler        54    24       13             78
##   Dodge           22     0        0             19
##   Fiat             9     0        9              0
##   Ford            81    24       29             68
##   Honda           38    58       29             34
##   Kia             15     0       19              0
##   Lincoln          0     0        0             39
##   Toyota         102   102       80              8
ggplot(Car_Total, aes(x=Region, fill=Brand))+
  theme_bw()+
  geom_bar()+
  labs(y="Number of cars",
       title = "Number of cars by Brands across Region")

#Comparing Ford's Customer satisfaction with other brands
ggplot(Car_Total, aes(x=Brand, y=Post.Satis, fill=Brand)) +
  geom_boxplot()+
  labs(y="Post-Purchase Satisfaction (1-7)", x="Brand",
       title = "Customer Satisfaction by Brand")

#Does fuel efficiency (MPG) impact cost perception (C_Cost) compared to competitors
ggplot(Car_Total, aes(x = MPG, y = C_cost, color = Brand)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(y = "Cost (Cents per Mile)", x = "MPG",
       title = "Miles per Gallon vs. Cost of Travel")
## `geom_smooth()` using formula = 'y ~ x'

# 3 Customer Satisfaction and Future Purchase Intentions

#Understanding the relationship between Performance and Future Purchase Intentions for Ford
ggplot(Ford_data, aes(x = Perform_Mean, y = FutPur_Mean)) +
  geom_jitter(alpha = 0.5) +
  geom_smooth(method = "lm", color = "purple") +
  labs( y = "Future Purchase Likelihood (1-7)", x = "Performance Satisfaction (1-7)",
        title = "Performance Satisfaction vs. Future Purchase Intention")
## `geom_smooth()` using formula = 'y ~ x'

# Analyzing Word-of-Mouth scores for Ford

#Define Model and Region as categorical variables
Ford_data$Model <- as.factor(Ford_data$Model)

Ford_data$WOM_1 <- as.numeric(Ford_data$WOM_1) # Likelihood to Recommend
Ford_data$WOM_2 <- as.numeric(Ford_data$WOM_2) # Positive Sentiment
Ford_data$WOM_Mean <- as.numeric(Ford_data$WOM_Mean) # Overall Word-of-Mouth Scores

# Group by Model and summarize WOM sentiment
ford_wom_summary <- Ford_data %>%
  group_by(Model) %>%
  summarise(
    Avg_Recommend = mean(WOM_1, na.rm = TRUE),
    Avg_Positive_Talk = mean(WOM_2, na.rm = TRUE),
    Count = n(),
  ) %>%
  arrange(desc(Avg_Recommend))  # Sort by recommendation score

print(ford_wom_summary)
## # A tibble: 2 × 4
##   Model      Avg_Recommend Avg_Positive_Talk Count
##   <fct>              <dbl>             <dbl> <int>
## 1 Explorer            5.36              5.70    99
## 2 Expedition          5.21              5.27   103
# Count number of cars per WOM_1 score for Ford models
ford_wom_counts <- Ford_data %>%
  group_by(WOM_1, Model) %>%
  summarise(Num_Cars = n(), .groups = "drop")

#Created a bar graph
ggplot(ford_wom_counts, aes(x = factor(WOM_1), y = Num_Cars, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs( x = "Likelihood to Recommend (WOM_1)", y = "Number of Ford Cars",
        title = "Likelihood to Recommend by Ford Model") +
  scale_fill_manual(values = c("steelblue", "purple"))

# Count number of cars per WOM_2 score for Ford models
ford_wom2_counts <- Ford_data %>%
  group_by(WOM_2, Model) %>%
  summarise(Num_Cars = n(), .groups = "drop")

#Created a bar graph
ggplot(ford_wom2_counts, aes(x = factor(WOM_2), y = Num_Cars, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs( x = "Positive Sentiment (WOM_2)", y = "Number of Ford Cars",
        title = "Positive Sentiment by Ford Model") +
  scale_fill_manual(values = c("steelblue", "purple"))

# Calculate mean WOM scores for each brand
brand_wom_summary <- Car_Total %>%
  group_by(Brand) %>%
  summarise(
    Mean_WOM_1 = mean(WOM_1, na.rm = TRUE),
    Mean_WOM_2 = mean(WOM_2, na.rm = TRUE),
    WOM_Mean = mean(c(WOM_1, WOM_2), na.rm = TRUE)  # Overall WOM Score
  ) %>%
  arrange(desc(WOM_Mean))  # Sort brands by WOM score

# Print summary
print(brand_wom_summary)
## # A tibble: 10 × 4
##    Brand     Mean_WOM_1 Mean_WOM_2 WOM_Mean
##    <chr>          <dbl>      <dbl>    <dbl>
##  1 Toyota          5.63       5.68     5.65
##  2 Ford            5.29       5.48     5.38
##  3 Honda           5.38       5.30     5.34
##  4 Chrysler        5.27       5.35     5.31
##  5 Chevrolet       5.09       5.20     5.15
##  6 Kia             5          5.15     5.07
##  7 Lincoln         4.72       4.85     4.78
##  8 Fiat            4.61       4.78     4.69
##  9 Buick           4.65       4.32     4.48
## 10 Dodge           4.39       4.49     4.44
#Created a bar graph to compare Ford's WOM scores with its competitors
ggplot(brand_wom_summary, aes(x = reorder(Brand, -WOM_Mean), y = WOM_Mean, fill = Brand == "Ford")) +
  geom_bar(stat = "identity") +
  labs(y = "Average WOM Score",  x = "Brand", 
       title = "Comparison of WOM Scores: Ford vs Other Brands") +
  scale_fill_manual(values = c("steelblue", "purple"), guide = FALSE)  # Ford highlighted in blue
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Regional Variation in Customer Satisfaction for Ford Purchases
ggplot(Ford_data, aes(x = Region, y = Post.Satis, fill = Region)) +
  geom_boxplot() +
  labs(x = "Region", y = "Satisfaction Score",
       title = "Customer Satisfaction by Region (Ford)")