getwd()
## [1] "/Users/markyuhasz/FallWinter 2024 Classes/MKTG3P98-Business analytics and intell/Assignment1"
setwd("/Users/markyuhasz/FallWinter 2024 Classes/MKTG3P98-Business analytics and intell/Assignment1")
library(readxl)
library(ggplot2)
library(readr) #To read CSV files
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Car1<-read.csv("Car_Survey_1a.csv")
str(Car1)
## 'data.frame': 1180 obs. of 23 variables:
## $ Resp : chr "Res1" "Res2" "Res3" "Res4" ...
## $ Att_1 : int 6 7 7 4 6 6 1 6 3 6 ...
## $ Att_2 : int 6 5 7 1 6 6 1 5 2 6 ...
## $ Enj_1 : int 6 5 7 1 6 6 1 5 3 4 ...
## $ Enj_2 : int 6 2 5 1 5 5 1 3 2 4 ...
## $ Perform_1 : int 5 2 5 1 5 5 2 5 2 4 ...
## $ Perform_2 : int 6 6 5 1 2 5 2 5 3 4 ...
## $ Perform_3 : int 3 7 3 1 1 7 2 2 1 1 ...
## $ WOM_1 : int 3 5 6 7 7 5 2 4 6 5 ...
## $ WOM_2 : int 3 5 6 7 7 5 3 6 6 6 ...
## $ Futu_Pur_1 : int 3 6 7 3 7 7 5 4 7 6 ...
## $ Futu_Pur_2 : int 3 6 7 3 6 7 2 4 7 6 ...
## $ Valu_Percp_1: int 5 6 5 6 6 7 2 4 6 6 ...
## $ Valu_Percp_2: int 2 7 7 5 5 7 2 4 6 6 ...
## $ Pur_Proces_1: int 6 7 7 5 6 7 2 4 6 6 ...
## $ Pur_Proces_2: int 4 6 7 4 7 7 6 4 6 6 ...
## $ Residence : int 2 2 1 2 1 2 2 1 2 1 ...
## $ Pay_Meth : int 2 2 2 2 2 2 2 2 2 2 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Collision" ...
## $ Gender : chr "Male" "Male" "Male" "Male" ...
## $ Age : int 18 18 19 19 19 19 19 21 21 21 ...
## $ Education : int 2 2 2 2 2 2 2 2 2 2 ...
## $ X : logi NA NA NA NA NA NA ...
Car2<-read.csv("Car_Survey_2a.csv")
str(Car2)
## 'data.frame': 1049 obs. of 9 variables:
## $ Respondents: chr "Res1" "Res2" "Res3" "Res4" ...
## $ Region : chr "European" "European" "European" "European" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
## $ MPG : int 15 15 15 15 15 15 15 15 15 15 ...
## $ Cyl : int 8 8 8 8 8 8 8 8 8 8 ...
## $ acc1 : num 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
## $ C_cost. : num 16 16 16 16 16 16 16 16 16 16 ...
## $ H_Cost : num 14 14 14 14 14 14 14 14 14 14 ...
## $ Post.Satis : int 4 3 5 5 5 3 3 6 3 5 ...
names(Car2)[1]<-c("Resp")
head(Car2, n = 1)
## Resp Region Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
Car_Total <- merge(Car1, Car2, by = "Resp")
numeric_cols <- sapply(Car_Total, is.numeric)
Car_Total[, numeric_cols] <- lapply(Car_Total[, numeric_cols], function(x) {
mean_val <- mean (x, na.rm = TRUE)
x[is.na(x)] <- mean_val
return(x)
})
summary(Car_Total)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:5.000
## Mode :character Median :5.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
## Education X Region Model
## Min. :1.000 Mode:logical Length:1049 Length:1049
## 1st Qu.:2.000 NA's:1049 Class :character Class :character
## Median :2.000 Mode :character Mode :character
## Mean :1.989
## 3rd Qu.:2.000
## Max. :3.000
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
ggplot(Car_Total, aes(x = Model, fill = Model)) +
geom_bar() +
theme_bw() +
labs(title = " Number of People Owning Different Car Models",
x = "Car Model",
y = "Number") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Car_Total[c('Make', 'Model_v1')] <- str_split_fixed(Car_Total$Model, " ", 2) #slips it by second character
Car_Total <- Car_Total %>% #call dataframe and create new column with new groupings
mutate(Parent = case_when(Make == "Buick" ~ "General Motors",
Make == "Chevrolet" ~ "General Motors",
Make == "Chrysler" ~ "Chrysler",
Make == "Dodge" ~ "Chrysler",
Make == "Fiat" ~ "Chrysler",
Make == "Ford" ~ "Ford",
Make == "Honda" ~ "Honda",
Make == "Kia" ~ "Kia",
Make == "Lincoln" ~ "Ford",
Make == "Toyota" ~ "Toyota",
TRUE ~ "Check"))
Car_Total$Parent<-as.factor(Car_Total$Parent)
p <-ggplot(Car_Total, aes(x=Post.Satis, fill = Parent)) +
theme_bw() +
geom_bar(position = 'dodge2') +
labs(y='Number of Cars',
x = 'Post Satisfaction',
title = "Post Purchase Satisfaction by Make")
p + annotate("text", x=3, y=70, label = "1 = Dissatified, 7 = Very Satified")
Car_Total$Futmean = (Car_Total$Futu_Pur_1 + Car_Total$Futu_Pur_2) / 2
View(Car_Total[c("Futu_Pur_1", "Futu_Pur_2", "Futmean")])
Car_Total_Multi <- Car_Total %>%
filter(Parent %in% c("Ford", "Toyota", "Honda", "Chrysler"))
f <-ggplot(Car_Total_Multi, aes(x = factor(round(Futmean)), fill = Parent)) +
geom_bar(position = 'dodge') +
labs(y = 'Owners Rating',
x = "Future Purchase Intention",
title = "Number of People that will/likely Purchase the same Car") +
theme_bw()
f + annotate("text", x=3, y=80, label = "1 = Strongly Disagree, 7 = Strongly Agree")
### Value Perception Contingency Table
Car_Total$Valu_mean <- (Car_Total$Valu_Percp_1 + Car_Total$Valu_Percp_2) / 2
brand_region_table <- aggregate(Valu_mean~Parent + Region, Car_Total, mean)
print(brand_region_table)
## Parent Region Valu_mean
## 1 Chrysler American 5.143597
## 2 Ford American 5.061728
## 3 General Motors American 5.333333
## 4 Honda American 5.026316
## 5 Kia American 5.666667
## 6 Toyota American 5.119353
## 7 Chrysler Asian 4.791667
## 8 Ford Asian 5.062500
## 9 Honda Asian 5.422414
## 10 Toyota Asian 5.127451
## 11 Chrysler European 5.340909
## 12 Ford European 5.637931
## 13 General Motors European 4.854839
## 14 Honda European 5.413793
## 15 Kia European 5.421053
## 16 Toyota European 5.281250
## 17 Chrysler Middle Eastern 5.489691
## 18 Ford Middle Eastern 5.485981
## 19 General Motors Middle Eastern 5.120000
## 20 Honda Middle Eastern 5.455882
## 21 Toyota Middle Eastern 5.750000
Multiple_Valu_mean <- brand_region_table %>%
filter(Parent == "Honda" | Parent == "Toyota" | Parent == "Ford" | Parent == "Chrysler")
head(Multiple_Valu_mean, n=10)
## Parent Region Valu_mean
## 1 Chrysler American 5.143597
## 2 Ford American 5.061728
## 3 Honda American 5.026316
## 4 Toyota American 5.119353
## 5 Chrysler Asian 4.791667
## 6 Ford Asian 5.062500
## 7 Honda Asian 5.422414
## 8 Toyota Asian 5.127451
## 9 Chrysler European 5.340909
## 10 Ford European 5.637931
table(Multiple_Valu_mean$Parent)
##
## Chrysler Ford General Motors Honda Kia
## 4 4 0 4 0
## Toyota
## 4
ggplot(Multiple_Valu_mean, aes(x=Region, y=Valu_mean, group=Parent)) +
geom_line(aes(color=Parent)) +
geom_point(aes(color=Parent)) +
scale_y_continuous(limits = c(4.5,6))
labs(y="Value Mean",
title = "Value Mean for Toyota, Honda, Ford and Chrysler by Region")
## $y
## [1] "Value Mean"
##
## $title
## [1] "Value Mean for Toyota, Honda, Ford and Chrysler by Region"
##
## attr(,"class")
## [1] "labels"
Car_Total$AgeGrp<-cut(Car_Total$Age,
breaks = c(0, 30, 50, Inf),
labels = c("Young adults", "Adults", "Mature Adults"),
right = FALSE)
names(Car_Total)
## [1] "Resp" "Att_1" "Att_2" "Enj_1" "Enj_2"
## [6] "Perform_1" "Perform_2" "Perform_3" "WOM_1" "WOM_2"
## [11] "Futu_Pur_1" "Futu_Pur_2" "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1"
## [16] "Pur_Proces_2" "Residence" "Pay_Meth" "Insur_Type" "Gender"
## [21] "Age" "Education" "X" "Region" "Model"
## [26] "MPG" "Cyl" "acc1" "C_cost." "H_Cost"
## [31] "Post.Satis" "Make" "Model_v1" "Parent" "Futmean"
## [36] "Valu_mean" "AgeGrp"
head(Car_Total$AgeGrp)
## [1] Young adults Young adults Adults Young adults Young adults
## [6] Young adults
## Levels: Young adults Adults Mature Adults
ggplot(Car_Total, aes(x=Parent, fill = AgeGrp)) +
theme_bw() +
geom_bar(position = 'dodge') +
labs(y="Number of People",
x="Age",
title = "Age Groups Purchasing Make")