Project 1

Get working directory

getwd()

## [1] "C:/Users/kylev/OneDrive/Documents"

setwd("C:/Users/kylev/OneDrive/Documents")

Open and rename, as well as merging and viewing the files.

Car1 <- read.csv("Car_Survey_1a.csv")
Car2 <- read.csv("Car_Survey_2a.csv")

str(Car1)

## 'data.frame':    1180 obs. of  23 variables:
##  $ Resp        : chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Att_1       : int  6 7 7 4 6 6 1 6 3 6 ...
##  $ Att_2       : int  6 5 7 1 6 6 1 5 2 6 ...
##  $ Enj_1       : int  6 5 7 1 6 6 1 5 3 4 ...
##  $ Enj_2       : int  6 2 5 1 5 5 1 3 2 4 ...
##  $ Perform_1   : int  5 2 5 1 5 5 2 5 2 4 ...
##  $ Perform_2   : int  6 6 5 1 2 5 2 5 3 4 ...
##  $ Perform_3   : int  3 7 3 1 1 7 2 2 1 1 ...
##  $ WOM_1       : int  3 5 6 7 7 5 2 4 6 5 ...
##  $ WOM_2       : int  3 5 6 7 7 5 3 6 6 6 ...
##  $ Futu_Pur_1  : int  3 6 7 3 7 7 5 4 7 6 ...
##  $ Futu_Pur_2  : int  3 6 7 3 6 7 2 4 7 6 ...
##  $ Valu_Percp_1: int  5 6 5 6 6 7 2 4 6 6 ...
##  $ Valu_Percp_2: int  2 7 7 5 5 7 2 4 6 6 ...
##  $ Pur_Proces_1: int  6 7 7 5 6 7 2 4 6 6 ...
##  $ Pur_Proces_2: int  4 6 7 4 7 7 6 4 6 6 ...
##  $ Residence   : int  2 2 1 2 1 2 2 1 2 1 ...
##  $ Pay_Meth    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Collision" ...
##  $ Gender      : chr  "Male" "Male" "Male" "Male" ...
##  $ Age         : int  18 18 19 19 19 19 19 21 21 21 ...
##  $ Education   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ X           : logi  NA NA NA NA NA NA ...

head(Car1, n = 5)

##   Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1     6     6     6     6         5         6         3     3     3
## 2 Res2     7     5     5     2         2         6         7     5     5
## 3 Res3     7     7     7     5         5         5         3     6     6
## 4 Res4     4     1     1     1         1         1         1     7     7
## 5 Res5     6     6     6     5         5         2         1     7     7
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            7            7            6
## 3          7          7            5            7            7            7
## 4          3          3            6            5            5            4
## 5          7          6            6            5            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education  X
## 1         2        2  Collision   Male  18         2 NA
## 2         2        2  Collision   Male  18         2 NA
## 3         1        2  Collision   Male  19         2 NA
## 4         2        2  Collision   Male  19         2 NA
## 5         1        2  Collision Female  19         2 NA

View(Car1)

str(Car2)

## 'data.frame':    1049 obs. of  9 variables:
##  $ Respondents: chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Region     : chr  "European" "European" "European" "European" ...
##  $ Model      : chr  "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
##  $ MPG        : int  15 15 15 15 15 15 15 15 15 15 ...
##  $ Cyl        : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ acc1       : num  5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
##  $ C_cost.    : num  16 16 16 16 16 16 16 16 16 16 ...
##  $ H_Cost     : num  14 14 14 14 14 14 14 14 14 14 ...
##  $ Post.Satis : int  4 3 5 5 5 3 3 6 3 5 ...

head(Car2, n = 5)

##   Respondents   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1        Res1 European Ford Expedition  15   8  5.5      16     14          4
## 2        Res2 European Ford Expedition  15   8  5.5      16     14          3
## 3        Res3 European Ford Expedition  15   8  5.5      16     14          5
## 4        Res4 European Ford Expedition  15   8  5.5      16     14          5
## 5        Res5 European Ford Expedition  15   8  5.5      16     14          5

View(Car2)

#changing the name
names(Car2)[1]<-c("Resp")
head(Car2, n=5)

##   Resp   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition  15   8  5.5      16     14          4
## 2 Res2 European Ford Expedition  15   8  5.5      16     14          3
## 3 Res3 European Ford Expedition  15   8  5.5      16     14          5
## 4 Res4 European Ford Expedition  15   8  5.5      16     14          5
## 5 Res5 European Ford Expedition  15   8  5.5      16     14          5

#merge the data files
Car_Total<-merge(Car1, Car2, by="Resp")
str(Car_Total)

## 'data.frame':    1049 obs. of  31 variables:
##  $ Resp        : chr  "Res1" "Res10" "Res100" "Res1000" ...
##  $ Att_1       : int  6 6 6 6 6 3 2 7 2 6 ...
##  $ Att_2       : int  6 6 7 6 6 1 2 7 1 6 ...
##  $ Enj_1       : int  6 4 7 7 7 4 1 7 2 6 ...
##  $ Enj_2       : int  6 4 3 6 6 3 2 6 1 5 ...
##  $ Perform_1   : int  5 4 5 6 6 5 2 5 2 5 ...
##  $ Perform_2   : int  6 4 6 6 6 6 2 6 2 5 ...
##  $ Perform_3   : int  3 1 6 6 6 6 1 5 2 5 ...
##  $ WOM_1       : int  3 5 3 6 4 2 6 6 7 3 ...
##  $ WOM_2       : int  3 6 5 6 4 6 7 6 7 3 ...
##  $ Futu_Pur_1  : int  3 6 6 6 4 6 6 6 7 6 ...
##  $ Futu_Pur_2  : int  3 6 6 6 6 6 5 7 7 6 ...
##  $ Valu_Percp_1: int  5 6 7 4 5 5 4 6 4 5 ...
##  $ Valu_Percp_2: int  2 6 6 6 6 4 4 5 6 6 ...
##  $ Pur_Proces_1: int  6 6 5 6 6 5 4 5 6 6 ...
##  $ Pur_Proces_2: int  4 6 5 3 7 5 5 5 7 5 ...
##  $ Residence   : int  2 1 2 2 1 1 1 2 1 2 ...
##  $ Pay_Meth    : int  2 2 1 3 3 3 3 3 3 3 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Liability" ...
##  $ Gender      : chr  "Male" "Male" "Female" "Female" ...
##  $ Age         : int  18 21 32 24 24 25 26 26 27 27 ...
##  $ Education   : int  2 2 1 2 2 2 2 2 2 2 ...
##  $ X           : logi  NA NA NA NA NA NA ...
##  $ Region      : chr  "European" "European" "American" "Asian" ...
##  $ Model       : chr  "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
##  $ MPG         : int  15 15 24 26 26 26 26 26 26 26 ...
##  $ Cyl         : int  8 8 4 4 4 4 4 4 4 4 ...
##  $ acc1        : num  5.5 5.5 8.2 8 8 8 8 8 8 8 ...
##  $ C_cost.     : num  16 16 10 7 7 7 7 7 7 7 ...
##  $ H_Cost      : num  14 14 8 6 6 6 6 6 6 6 ...
##  $ Post.Satis  : int  4 5 4 6 5 6 5 6 7 6 ...

# avoid row numbers
write.csv(Car_Total, "Car_Total", row.names = FALSE) 
View(Car_Total)

Open packages i installed

library(readr)
library(dplyr)

## Warning: package 'dplyr' was built under R version 4.3.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.3.3

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.3.3

## Warning: package 'tidyr' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Check Missing values, and values for Att_1

#check missing values
summary(Car_Total)

##      Resp               Att_1           Att_2           Enj_1      
##  Length:1049        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.000  
##  Mode  :character   Median :6.000   Median :6.000   Median :6.000  
##                     Mean   :4.882   Mean   :5.287   Mean   :5.378  
##                     3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000  
##                     Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                     NA's   :4                       NA's   :4      
##      Enj_2         Perform_1       Perform_2       Perform_3    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :4.575   Mean   :4.947   Mean   :4.831   Mean   :4.217  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##  NA's   :4       NA's   :2       NA's   :4       NA's   :1      
##      WOM_1           WOM_2        Futu_Pur_1      Futu_Pur_2     Valu_Percp_1  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:4.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :6.000   Median :6.00   Median :6.000   Median :6.000   Median :6.000  
##  Mean   :5.286   Mean   :5.35   Mean   :5.321   Mean   :5.371   Mean   :5.411  
##  3rd Qu.:7.000   3rd Qu.:6.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.00   Max.   :9.000   Max.   :7.000   Max.   :7.000  
##  NA's   :1       NA's   :3      NA's   :5       NA's   :2       NA's   :4      
##   Valu_Percp_2    Pur_Proces_1    Pur_Proces_2     Residence    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :5.000   Median :6.000   Median :5.000   Median :1.000  
##  Mean   :5.114   Mean   :5.256   Mean   :4.923   Mean   :1.474  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :5.000  
##  NA's   :1       NA's   :3       NA's   :4       NA's   :5      
##     Pay_Meth      Insur_Type           Gender               Age       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :18.00  
##  1st Qu.:1.000   Class :character   Class :character   1st Qu.:23.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :34.00  
##  Mean   :2.153                                         Mean   :35.22  
##  3rd Qu.:3.000                                         3rd Qu.:48.00  
##  Max.   :3.000                                         Max.   :60.00  
##                                                                       
##    Education        X              Region             Model          
##  Min.   :1.000   Mode:logical   Length:1049        Length:1049       
##  1st Qu.:2.000   NA's:1049      Class :character   Class :character  
##  Median :2.000                  Mode  :character   Mode  :character  
##  Mean   :1.989                                                       
##  3rd Qu.:2.000                                                       
##  Max.   :3.000                                                       
##                                                                      
##       MPG             Cyl           acc1          C_cost.          H_Cost      
##  Min.   :14.00   Min.   :4.0   Min.   :3.600   Min.   : 7.00   Min.   : 6.000  
##  1st Qu.:17.00   1st Qu.:4.0   1st Qu.:5.100   1st Qu.:10.00   1st Qu.: 8.000  
##  Median :19.00   Median :6.0   Median :6.500   Median :12.00   Median :10.000  
##  Mean   :19.58   Mean   :5.8   Mean   :6.202   Mean   :11.35   Mean   : 9.634  
##  3rd Qu.:22.00   3rd Qu.:6.0   3rd Qu.:7.500   3rd Qu.:13.00   3rd Qu.:11.000  
##  Max.   :26.00   Max.   :8.0   Max.   :8.500   Max.   :16.00   Max.   :14.000  
##                                                                                
##    Post.Satis  
##  Min.   :2.00  
##  1st Qu.:5.00  
##  Median :6.00  
##  Mean   :5.28  
##  3rd Qu.:6.00  
##  Max.   :7.00  
##

#compute the mean value of Att_1
mean(Car_Total$Att_1)

## [1] NA

# repalce missing value with coulmn mean of att_1
meanAAT1<-mean(Car_Total$Att_1, na.rm = TRUE)
Car_Total[is.na(Car_Total$Att_1),"Att_1"]<-meanAAT1

Calculte the mean values for all other variables

mean(Car_Total$Att_1)

## [1] 4.882297

mean(Car_Total$Att_2)

## [1] 5.28694

mean(Car_Total$Enj_1)

## [1] NA

mean(Car_Total$Enj_2)

## [1] NA

mean(Car_Total$Perform_1)

## [1] NA

mean(Car_Total$Perform_2)

## [1] NA

mean(Car_Total$Perform_3)

## [1] NA

mean(Car_Total$WOM_1)

## [1] NA

mean(Car_Total$WOM_2)

## [1] NA

mean(Car_Total$Futu_Pur_1)

## [1] NA

mean(Car_Total$Futu_Pur_2)

## [1] NA

mean(Car_Total$Valu_Percp_1)

## [1] NA

mean(Car_Total$Valu_Percp_2)

## [1] NA

mean(Car_Total$Pur_Proces_1)

## [1] NA

mean(Car_Total$Pur_Proces_2)

## [1] NA

mean(Car_Total$Residence)

## [1] NA

mean(Car_Total$Pay_Meth)

## [1] 2.15348

mean(Car_Total$Insur_Type)

## Warning in mean.default(Car_Total$Insur_Type): argument is not numeric or
## logical: returning NA

## [1] NA

mean(Car_Total$Gender)

## Warning in mean.default(Car_Total$Gender): argument is not numeric or logical:
## returning NA

## [1] NA

mean(Car_Total$Age)

## [1] 35.22021

mean(Car_Total$Education)

## [1] 1.988561

mean(Car_Total$X)

## [1] NA

mean(Car_Total$Region)

## Warning in mean.default(Car_Total$Region): argument is not numeric or logical:
## returning NA

## [1] NA

mean(Car_Total$Model)

## Warning in mean.default(Car_Total$Model): argument is not numeric or logical:
## returning NA

## [1] NA

mean(Car_Total$MPG)

## [1] 19.58055

mean(Car_Total$Cyl)

## [1] 5.799809

mean(Car_Total$acc1)

## [1] 6.201668

mean(Car_Total$H_Cost)

## [1] 9.634223

mean(Car_Total$C_cost.)

## [1] 11.35319

mean(Car_Total$Post.Satis)

## [1] 5.280267

Return and replace all missing NA values for each variable to the mean of each column

Car_Total <- Car_Total %>%
  mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))
summary(Car_Total)

##      Resp               Att_1           Att_2           Enj_1      
##  Length:1049        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:5.000  
##  Mode  :character   Median :5.000   Median :6.000   Median :6.000  
##                     Mean   :4.882   Mean   :5.287   Mean   :5.378  
##                     3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000  
##                     Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                    
##      Enj_2         Perform_1       Perform_2       Perform_3    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :4.575   Mean   :4.947   Mean   :4.831   Mean   :4.217  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                 
##      WOM_1           WOM_2        Futu_Pur_1      Futu_Pur_2     Valu_Percp_1  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:5.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :6.000   Median :6.00   Median :6.000   Median :6.000   Median :6.000  
##  Mean   :5.286   Mean   :5.35   Mean   :5.321   Mean   :5.371   Mean   :5.411  
##  3rd Qu.:7.000   3rd Qu.:6.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.00   Max.   :9.000   Max.   :7.000   Max.   :7.000  
##                                                                                
##   Valu_Percp_2    Pur_Proces_1    Pur_Proces_2     Residence    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :5.000   Median :6.000   Median :5.000   Median :1.000  
##  Mean   :5.114   Mean   :5.256   Mean   :4.923   Mean   :1.474  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :5.000  
##                                                                 
##     Pay_Meth      Insur_Type           Gender               Age       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :18.00  
##  1st Qu.:1.000   Class :character   Class :character   1st Qu.:23.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :34.00  
##  Mean   :2.153                                         Mean   :35.22  
##  3rd Qu.:3.000                                         3rd Qu.:48.00  
##  Max.   :3.000                                         Max.   :60.00  
##                                                                       
##    Education           X           Region             Model          
##  Min.   :1.000   Min.   : NA    Length:1049        Length:1049       
##  1st Qu.:2.000   1st Qu.: NA    Class :character   Class :character  
##  Median :2.000   Median : NA    Mode  :character   Mode  :character  
##  Mean   :1.989   Mean   :NaN                                         
##  3rd Qu.:2.000   3rd Qu.: NA                                         
##  Max.   :3.000   Max.   : NA                                         
##                  NA's   :1049                                        
##       MPG             Cyl           acc1          C_cost.          H_Cost      
##  Min.   :14.00   Min.   :4.0   Min.   :3.600   Min.   : 7.00   Min.   : 6.000  
##  1st Qu.:17.00   1st Qu.:4.0   1st Qu.:5.100   1st Qu.:10.00   1st Qu.: 8.000  
##  Median :19.00   Median :6.0   Median :6.500   Median :12.00   Median :10.000  
##  Mean   :19.58   Mean   :5.8   Mean   :6.202   Mean   :11.35   Mean   : 9.634  
##  3rd Qu.:22.00   3rd Qu.:6.0   3rd Qu.:7.500   3rd Qu.:13.00   3rd Qu.:11.000  
##  Max.   :26.00   Max.   :8.0   Max.   :8.500   Max.   :16.00   Max.   :14.000  
##                                                                                
##    Post.Satis  
##  Min.   :2.00  
##  1st Qu.:5.00  
##  Median :6.00  
##  Mean   :5.28  
##  3rd Qu.:6.00  
##  Max.   :7.00  
##

Find total mean average of ATT1 and ATT2.

Car_Total$Att_Mean = (Car_Total$Att_1 +
                        Car_Total$Att_2) / 2
View(Car_Total)

Find total mean average for more variables

Car_Total$Enj_Mean = (Car_Total$Enj_1 +
                        Car_Total$Enj_2) / 2
Car_Total$Perform_Mean = (Car_Total$Perform_1 +
                        Car_Total$Perform_2 +
                         Car_Total$Perform_3) / 3
Car_Total$Valu_Mean = (Car_Total$Valu_Percp_1 +
                        Car_Total$Valu_Percp_2) / 2
Car_Total$WOM_Mean = (Car_Total$WOM_1 +
                        Car_Total$WOM_2) / 2
Car_Total$Future_Mean = (Car_Total$Futu_Pur_1 +
                        Car_Total$Futu_Pur_2) / 2
Car_Total$Pur_Purchase_Mean = (Car_Total$Pur_Proces_1 +
                        Car_Total$Pur_Proces_2) / 2
view(Car_Total)

Graph frequency count of cars across the different regions.

ggplot(Car_Total, aes(x=Region, fill = Region))+
  theme_bw()+
  geom_bar()+
  geom_text(stat = "count", aes(label=..count..), vjust=0)+
  labs(y = "Number of Cars",
       x = "Region",
       title = "Number of Cars by Region")

## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Numerical percentage distribution of whole market.

car_region_percentage <- prop.table(table(Car_Total$Region))
print(car_region_percentage)

## 
##       American          Asian       European Middle Eastern 
##      0.3431840      0.1982841      0.2001907      0.2583413

Define model and region as categorical type Variables

Car_Total$Model <- as.factor(Car_Total$Model)
Car_Total$Region <- as.factor(Car_Total$Region)

create a graph to show number of cars by total and region

ggplot(Car_Total, aes(x=Region, fill = Model))+
  theme_bw()+
  geom_bar()+
  labs(y = "Number of Cars",
       title = "Number of Cars by Model and Region")

Split the model into Brand and Column

cars_seperated <- Car_Total %>% separate(Model, into = c("Brand", "Model"),
                                         sep = " ", extra = "merge")

Car_Total <-cars_seperated
view(Car_Total)

View Graph of cars split by brand and region

ggplot(Car_Total, aes(x=Region, fill = Brand))+
  theme_bw()+
  geom_bar()+
  labs(y = "Number of Cars",
       title = "Number of Cars by Brand and Region")

Count number of cars , create cross tabulation and count total by brands and region.

count(Car_Total, Car_Total$Make, Car_Total$Brand, name = "Freq")

##    Car_Total$Brand Freq
## 1            Buick   31
## 2        Chevrolet   64
## 3         Chrysler  169
## 4            Dodge   41
## 5             Fiat   18
## 6             Ford  202
## 7            Honda  159
## 8              Kia   34
## 9          Lincoln   39
## 10          Toyota  292

xtabs(~Region + Brand, Car_Total)

##                 Brand
## Region           Buick Chevrolet Chrysler Dodge Fiat Ford Honda Kia Lincoln
##   American          17        22       54    22    9   81    38  15       0
##   Asian              0         0       24     0    0   24    58   0       0
##   European          10        21       13     0    9   29    29  19       0
##   Middle Eastern     4        21       78    19    0   68    34   0      39
##                 Brand
## Region           Toyota
##   American          102
##   Asian             102
##   European           80
##   Middle Eastern      8

brand_region_counts <- table(Car_Total$Brand, Car_Total$Region)
print(brand_region_counts)

##            
##             American Asian European Middle Eastern
##   Buick           17     0       10              4
##   Chevrolet       22     0       21             21
##   Chrysler        54    24       13             78
##   Dodge           22     0        0             19
##   Fiat             9     0        9              0
##   Ford            81    24       29             68
##   Honda           38    58       29             34
##   Kia             15     0       19              0
##   Lincoln          0     0        0             39
##   Toyota         102   102       80              8

Develop and graph attitude of brand by region.

# attitude toward car brand by region
brand_region_table <- aggregate(Att_1~Brand+Region, Car_Total, mean)
print(brand_region_table)

##        Brand         Region    Att_1
## 1      Buick       American 5.529412
## 2  Chevrolet       American 5.227273
## 3   Chrysler       American 5.011979
## 4      Dodge       American 4.818182
## 5       Fiat       American 4.666667
## 6       Ford       American 4.567901
## 7      Honda       American 5.657895
## 8        Kia       American 3.266667
## 9     Toyota       American 5.254902
## 10  Chrysler          Asian 5.291667
## 11      Ford          Asian 4.166667
## 12     Honda          Asian 5.568966
## 13    Toyota          Asian 4.881199
## 14     Buick       European 5.000000
## 15 Chevrolet       European 5.000000
## 16  Chrysler       European 5.230769
## 17      Fiat       European 3.777778
## 18      Ford       European 5.241379
## 19     Honda       European 4.758621
## 20       Kia       European 3.789474
## 21    Toyota       European 4.800000
## 22     Buick Middle Eastern 6.000000
## 23 Chevrolet Middle Eastern 5.904762
## 24  Chrysler Middle Eastern 4.038462
## 25     Dodge Middle Eastern 4.473684
## 26      Ford Middle Eastern 4.014706
## 27     Honda Middle Eastern 5.500000
## 28   Lincoln Middle Eastern 5.692308
## 29    Toyota Middle Eastern 5.375000

ggplot(brand_region_table, aes(x=Region, y=Att_1,  group = Brand))+
  geom_line(aes(colour = Brand))+
  geom_point(aes(colour = Brand))+
  labs(y = "Att_1 Mean",
       title = "Attitude Mean by Brand and region")

Filter brand region by Toyota.

Toyota_Att1_Mean <- brand_region_table %>%
  filter( Brand == "Toyota")
print(Toyota_Att1_Mean)

##    Brand         Region    Att_1
## 1 Toyota       American 5.254902
## 2 Toyota          Asian 4.881199
## 3 Toyota       European 4.800000
## 4 Toyota Middle Eastern 5.375000

Graph filtered Toyota data.

ggplot(Toyota_Att1_Mean, aes(x=Region, y=Att_1,  group = Brand))+
  geom_line(aes(colour = Brand))+
  geom_point(aes(colour = Brand))+
  labs(y = "Att_1 Mean",
       title = "Attitude Mean by for Toyota by Region")

Filter and Compare attitudes of Toyota and Honda.

Multiple_Att1_Mean <- brand_region_table %>% 
  filter( Brand == "Toyota"| Brand == "Honda")
view(Multiple_Att1_Mean)

Graph attitute mean by Toyota and Honda.

ggplot(Multiple_Att1_Mean, aes(x=Region, y=Att_1,  group = Brand))+
  geom_line(aes(colour = Brand))+
  geom_point(aes(colour = Brand))+
  labs(y = "Att_1 Mean",
       title = "Attitude Mean  for Toyota and Honda by Region")

Create demographic variable by age group.

Car_Total$AgeGr <-cut(Car_Total$Age, breaks = c(0, 30, 50, Inf),
                      labels = c("Young Adults", "Adults", "Mature Adults"),
                      right = FALSE)
names(Car_Total)

##  [1] "Resp"              "Att_1"             "Att_2"            
##  [4] "Enj_1"             "Enj_2"             "Perform_1"        
##  [7] "Perform_2"         "Perform_3"         "WOM_1"            
## [10] "WOM_2"             "Futu_Pur_1"        "Futu_Pur_2"       
## [13] "Valu_Percp_1"      "Valu_Percp_2"      "Pur_Proces_1"     
## [16] "Pur_Proces_2"      "Residence"         "Pay_Meth"         
## [19] "Insur_Type"        "Gender"            "Age"              
## [22] "Education"         "X"                 "Region"           
## [25] "Brand"             "Model"             "MPG"              
## [28] "Cyl"               "acc1"              "C_cost."          
## [31] "H_Cost"            "Post.Satis"        "Att_Mean"         
## [34] "Enj_Mean"          "Perform_Mean"      "Valu_Mean"        
## [37] "WOM_Mean"          "Future_Mean"       "Pur_Purchase_Mean"
## [40] "AgeGr"

head(Car_Total, n=5)

##      Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1    Res1     6     6     6     6         5         6         3     3     3
## 2   Res10     6     6     4     4         4         4         1     5     6
## 3  Res100     6     7     7     3         5         6         6     3     5
## 4 Res1000     6     6     7     6         6         6         6     6     6
## 5 Res1001     6     6     7     6         6         6         6     4     4
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            6            6            6
## 3          6          6            7            6            5            5
## 4          6          6            4            6            6            3
## 5          4          6            5            6            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education   X   Region  Brand
## 1         2        2  Collision   Male  18         2 NaN European   Ford
## 2         1        2  Collision   Male  21         2 NaN European   Ford
## 3         2        1  Collision Female  32         1 NaN American Toyota
## 4         2        3  Liability Female  24         2 NaN    Asian Toyota
## 5         1        3  Liability Female  24         2 NaN    Asian Toyota
##        Model MPG Cyl acc1 C_cost. H_Cost Post.Satis Att_Mean Enj_Mean
## 1 Expedition  15   8  5.5      16     14          4      6.0      6.0
## 2 Expedition  15   8  5.5      16     14          5      6.0      4.0
## 3       Rav4  24   4  8.2      10      8          4      6.5      5.0
## 4    Corolla  26   4  8.0       7      6          6      6.0      6.5
## 5    Corolla  26   4  8.0       7      6          5      6.0      6.5
##   Perform_Mean Valu_Mean WOM_Mean Future_Mean Pur_Purchase_Mean        AgeGr
## 1     4.666667       3.5      3.0           3               5.0 Young Adults
## 2     3.000000       6.0      5.5           6               6.0 Young Adults
## 3     5.666667       6.5      4.0           6               5.0       Adults
## 4     6.000000       5.0      6.0           6               4.5 Young Adults
## 5     6.000000       5.5      4.0           5               6.5 Young Adults

Age group analysis of Toyota by Region.

Filtered_Data_Toyota <- Car_Total %>%
  filter(Brand == "Toyota")

ggplot(Filtered_Data_Toyota, aes(x=Region, fill = AgeGr))+
  theme_bw()+
  geom_bar()+
  labs(y = "Number of Cars",
       title = "Number of Toyota cars by Age group and Region")

Analysis of Toyota by Model and Age Group

ggplot(Filtered_Data_Toyota, aes(x=Model, fill = AgeGr))+
  theme_bw()+
  geom_bar()+
  labs(y = "Number of Cars",
       title = "Number of Toyota cars by Age group and Model")

Customer average satisfaction by age group

ggplot(Filtered_Data_Toyota, aes(x=Post.Satis, fill = AgeGr))+
  theme_bw()+
  geom_bar()+
  labs(y = "Total Number of Customers",
        x = "Post Purchase Satisfaction",
       title = "Average Customer Satisfaction by Age Group")

Customer Satisfaction by Car Model

ggplot(Filtered_Data_Toyota, aes(x=Post.Satis, fill = Model))+
  theme_bw()+
  geom_bar()+
  labs(y = "Total Number of Customers",
       x = "Post Purchase Satisfaction",
       title = "Average Customer Satisfaction by Car Model")

Future purchase intent by region

future_purchase_table <- aggregate(Future_Mean~Brand+Region, Car_Total, mean)

ggplot(future_purchase_table, aes(x=Region, y=Future_Mean,  group = Brand))+
  geom_line(aes(colour = Brand))+
  geom_point(aes(colour = Brand))+
  labs(y = "Future Purchase Mean",
       title = "Future Purchase Mean by Brand and region")

Filter Toyota from above

Toyota_future_Mean <- future_purchase_table %>%
  filter( Brand == "Toyota")
print(Toyota_future_Mean)

##    Brand         Region Future_Mean
## 1 Toyota       American    5.313786
## 2 Toyota          Asian    5.382353
## 3 Toyota       European    5.275000
## 4 Toyota Middle Eastern    5.125000

Future purchase intent by model

ggplot(Toyota_future_Mean, aes(x=Region, y=Future_Mean,  group = Brand))+
  geom_line(aes(colour = Brand))+
  geom_point(aes(colour = Brand))+
  labs(y = "Future Purchase Mean",
       title = "Future Purchase Mean for Toyota by Region")

Graphing and filtering Toyotas market future purchase intent with its main competitors

future_mean_competitors <- Car_Total %>%
  filter( Brand == "Toyota"| Brand == "Honda"| Brand == "Ford"| Brand == "Chrysler")


future_purchase_tables <- aggregate(Future_Mean~Brand+Region, future_mean_competitors, mean)
view(future_purchase_table)

ggplot(future_purchase_tables, aes(x=Region, y=Future_Mean,  group = Brand))+
  geom_line(aes(colour = Brand))+
  geom_point(aes(colour = Brand))+
  labs(y = "Future Purchase Mean",
       title = "Future Purchase Mean by Brand and region")

Graphing and filtering Toyota market share contrasting it with its main competitors

Brands_competitors <- Car_Total %>%
  filter( Brand == "Toyota"| Brand == "Honda"| Brand == "Ford"| Brand == "Chrysler")

ggplot(Brands_competitors, aes(x=Region,  fill = Brand))+
  theme_bw()+
  geom_bar()+
  labs(y = "Number of Cars",
       title = "Market Share of Main Competitors")

Project 1

Kyle VanEvery

2025-01-29