getwd()
## [1] "/Users/markyuhasz/FallWinter 2024 Classes/MKTG3P98-Business analytics and intell/Assignment 2"
setwd("/Users/markyuhasz/FallWinter 2024 Classes/MKTG3P98-Business analytics and intell/Assignment 2")
library(readxl)
library(ggplot2)
library(readr)
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Car_Total<-read.csv("Car_Total.csv")
names(Car_Total)
## [1] "Resp" "Att_1" "Att_2" "Enj_1" "Enj_2"
## [6] "Perform_1" "Perform_2" "Perform_3" "WOM_1" "WOM_2"
## [11] "Futu_Pur_1" "Futu_Pur_2" "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1"
## [16] "Pur_Proces_2" "Residence" "Pay_Meth" "Insur_Type" "Gender"
## [21] "Age" "Education" "X" "Region" "Model"
## [26] "MPG" "Cyl" "acc1" "C_cost." "H_Cost"
## [31] "Post.Satis"
summary(Car_Total)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Mode :character Median :6.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :4
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :2 NA's :4 NA's :1
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## NA's :1 NA's :3 NA's :5 NA's :2 NA's :4
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## NA's :1 NA's :3 NA's :4 NA's :5
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
##
## Education X Region Model
## Min. :1.000 Mode:logical Length:1049 Length:1049
## 1st Qu.:2.000 NA's:1049 Class :character Class :character
## Median :2.000 Mode :character Mode :character
## Mean :1.989
## 3rd Qu.:2.000
## Max. :3.000
##
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
##
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
##
numeric_cols <- sapply(Car_Total, is.numeric)
Car_Total[, numeric_cols] <- lapply(Car_Total[, numeric_cols], function(x) {
mean_val <- mean(x, na.rm = TRUE)
x[is.na(x)] <- mean_val
return(x)
})
summary(Car_Total)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:5.000
## Mode :character Median :5.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
## Education X Region Model
## Min. :1.000 Mode:logical Length:1049 Length:1049
## 1st Qu.:2.000 NA's:1049 Class :character Class :character
## Median :2.000 Mode :character Mode :character
## Mean :1.989
## 3rd Qu.:2.000
## Max. :3.000
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
Car_Total$AgeGrp<-cut(Car_Total$Age,
breaks = c(0, 30, 50, Inf),
labels = c("Young Adults", "Adults", "Mature Adults"),
right = FALSE)
names(Car_Total)
## [1] "Resp" "Att_1" "Att_2" "Enj_1" "Enj_2"
## [6] "Perform_1" "Perform_2" "Perform_3" "WOM_1" "WOM_2"
## [11] "Futu_Pur_1" "Futu_Pur_2" "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1"
## [16] "Pur_Proces_2" "Residence" "Pay_Meth" "Insur_Type" "Gender"
## [21] "Age" "Education" "X" "Region" "Model"
## [26] "MPG" "Cyl" "acc1" "C_cost." "H_Cost"
## [31] "Post.Satis" "AgeGrp"
head(Car_Total$AgeGrp)
## [1] Young Adults Young Adults Adults Young Adults Young Adults
## [6] Young Adults
## Levels: Young Adults Adults Mature Adults
Car_Total[c("Make","Model_v1")] <-str_split_fixed(Car_Total$Model," ", 2)
View(Car_Total)
Car_Total <- Car_Total %>%
mutate(Parent = case_when(Make == "Buick" ~ "General Motors",
Make == "Chevrolet" ~ "General Motors",
Make == "Chrysler" ~ "Chrysler",
Make == "Dodge" ~ "Chrysler",
Make == "Fiat" ~ "Chrysler",
Make == "Ford" ~ "Ford",
Make == "Honda" ~ "Honda",
Make == "Kia" ~ "Kia",
Make == "Lincoln" ~ "Ford",
Make == "Toyota" ~ "Toyota",
TRUE ~ "Check"))
Car_Total$Parent<-as.factor(Car_Total$Parent)
ct <- Car_Total
stat_table <- group_by(ct, ct$AgeGrp) %>% summarise(count = n(),
mean=mean(Post.Satis, na.rm = TRUE),
var=var(Post.Satis, na.rm = TRUE),
sd=sd(Post.Satis, na.rm = TRUE))
print(stat_table)
## # A tibble: 3 × 5
## `ct$AgeGrp` count mean var sd
## <fct> <int> <dbl> <dbl> <dbl>
## 1 Young Adults 443 5.15 1.70 1.30
## 2 Adults 375 5.40 1.25 1.12
## 3 Mature Adults 231 5.34 1.59 1.26
head(stringr::str_detect(ct$Model, "Toyota"), 10)
## [1] FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
head(ct[str_detect(ct$Model,"Toyota"),], 10)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 3 Res100 6 7 7 3 5 6 6 3 5
## 4 Res1000 6 6 7 6 6 6 6 6 6
## 5 Res1001 6 6 7 6 6 6 6 4 4
## 6 Res1002 3 1 4 3 5 6 6 2 6
## 7 Res1003 2 2 1 2 2 2 1 6 7
## 8 Res1004 7 7 7 6 5 6 5 6 6
## 9 Res1005 2 1 2 1 2 2 2 7 7
## 10 Res1006 6 6 6 5 5 5 5 3 3
## 11 Res1007 4 4 4 2 3 5 3 7 7
## 14 Res101 6 6 7 6 5 6 3 5 6
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 3 6 6 7 6 5 5
## 4 6 6 4 6 6 3
## 5 4 6 5 6 6 7
## 6 6 6 5 4 5 5
## 7 6 5 4 4 4 5
## 8 6 7 6 5 5 5
## 9 7 7 4 6 6 7
## 10 6 6 5 6 6 5
## 11 5 6 6 2 2 5
## 14 4 3 3 2 2 2
## Residence Pay_Meth Insur_Type Gender Age Education X Region
## 3 2 1 Collision Female 32 1 NA American
## 4 2 3 Liability Female 24 2 NA Asian
## 5 1 3 Liability Female 24 2 NA Asian
## 6 1 3 Liability Female 25 2 NA Asian
## 7 1 3 Liability Female 26 2 NA Asian
## 8 2 3 Liability Female 26 2 NA Asian
## 9 1 3 Liability Female 27 2 NA Asian
## 10 2 3 Liability Female 27 2 NA Asian
## 11 2 3 Liability Male 27 1 NA Asian
## 14 2 2 Collision Female 32 2 NA American
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis AgeGrp Make
## 3 Toyota Rav4 24 4 8.2 10 8 4 Adults Toyota
## 4 Toyota Corolla 26 4 8.0 7 6 6 Young Adults Toyota
## 5 Toyota Corolla 26 4 8.0 7 6 5 Young Adults Toyota
## 6 Toyota Corolla 26 4 8.0 7 6 6 Young Adults Toyota
## 7 Toyota Corolla 26 4 8.0 7 6 5 Young Adults Toyota
## 8 Toyota Corolla 26 4 8.0 7 6 6 Young Adults Toyota
## 9 Toyota Corolla 26 4 8.0 7 6 7 Young Adults Toyota
## 10 Toyota Corolla 26 4 8.0 7 6 6 Young Adults Toyota
## 11 Toyota Corolla 26 4 8.0 7 6 6 Young Adults Toyota
## 14 Toyota Rav4 24 4 8.2 10 8 5 Adults Toyota
## Model_v1 Parent
## 3 Rav4 Toyota
## 4 Corolla Toyota
## 5 Corolla Toyota
## 6 Corolla Toyota
## 7 Corolla Toyota
## 8 Corolla Toyota
## 9 Corolla Toyota
## 10 Corolla Toyota
## 11 Corolla Toyota
## 14 Rav4 Toyota
ct_Toyota <- head(str_detect(ct$Model, "Toyota"), 10)
subtoyota<-ct[ct_Toyota,]
table(subtoyota$Make)
##
## Buick Chevrolet Chrysler Dodge Fiat Ford Honda Kia
## 23 50 134 35 14 161 124 27
## Lincoln Toyota
## 33 238
tapply(subtoyota$Post.Satis,subtoyota$AgeGrp,shapiro.test)
## $`Young Adults`
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.87974, p-value = 5.208e-16
##
##
## $Adults
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.90027, p-value = 3.431e-13
##
##
## $`Mature Adults`
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.88798, p-value = 1.627e-10
res_aov <- aov(subtoyota$Post.Satis ~ as.factor(subtoyota$AgeGrp), data = ct)
hist(res_aov$residuals)
qqnorm(res_aov$residuals)
qqline(res_aov$residuals, col = "blue")
shapiro.test(res_aov$residuals)
##
## Shapiro-Wilk normality test
##
## data: res_aov$residuals
## W = 0.92886, p-value < 2.2e-16
bartlett.test(subtoyota$Post.Satis ~ subtoyota$AgeGrp)
##
## Bartlett test of homogeneity of variances
##
## data: subtoyota$Post.Satis by subtoyota$AgeGrp
## Bartlett's K-squared = 9.8668, df = 2, p-value = 0.007202
welch_test<- oneway.test(Post.Satis ~ as.factor(AgeGrp), var.equal = FALSE, data = subtoyota)
print(welch_test)
##
## One-way analysis of means (not assuming equal variances)
##
## data: Post.Satis and as.factor(AgeGrp)
## F = 4.1148, num df = 2.00, denom df = 468.87, p-value = 0.01692
ct$Avg_Purchase_Sat <- rowMeans(ct[, c("Pur_Proces_1", "Pur_Proces_2")], na.rm = TRUE)
subtoyota <- ct[ct_Toyota,]
library(ggplot2)
ggplot(subtoyota, aes(x = Region, y = Avg_Purchase_Sat, fill = Region)) +
geom_boxplot(outlier.color = "red", outlier.shape = 16) +
stat_summary(fun = mean, geom = "point", shape = 4, size = 3, color = "black") +
stat_summary(fun = mean, geom = "text", vjust = -0.5, aes(label = round(after_stat(y), 2)), color = "black") +
theme_minimal() +
labs(title = "Average Purchase Satisfaction Across Regions for Toyota",
x = "Region",
y = "Average Purchase Satisfaction") +
scale_fill_brewer(palette = "Set3") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none")
tapply(subtoyota$Avg_Purchase_Sat, subtoyota$Region, shapiro.test)
## $American
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.9549, p-value = 8.373e-08
##
##
## $Asian
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.94111, p-value = 2.937e-06
##
##
## $European
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.94939, p-value = 9.963e-06
##
##
## $`Middle Eastern`
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.92133, p-value = 2.109e-09
bartlett.test(subtoyota$Avg_Purchase_Sat, subtoyota$Region)
##
## Bartlett test of homogeneity of variances
##
## data: subtoyota$Avg_Purchase_Sat and subtoyota$Region
## Bartlett's K-squared = 14.01, df = 3, p-value = 0.002892
aov_test_avg <- aov(Avg_Purchase_Sat ~ as.factor(Region), data = subtoyota)
print(aov_test_avg)
## Call:
## aov(formula = Avg_Purchase_Sat ~ as.factor(Region), data = subtoyota)
##
## Terms:
## as.factor(Region) Residuals
## Sum of Squares 35.9247 1166.4424
## Deg. of Freedom 3 835
##
## Residual standard error: 1.181921
## Estimated effects may be unbalanced
TukeyHSD(aov_test_avg)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Avg_Purchase_Sat ~ as.factor(Region), data = subtoyota)
##
## $`as.factor(Region)`
## diff lwr upr p adj
## Asian-American 0.1263815 -0.17205054 0.42481353 0.6956795
## European-American -0.1497654 -0.44475846 0.14522770 0.5587499
## Middle Eastern-American 0.4201430 0.14776592 0.69252009 0.0004514
## European-Asian -0.2761469 -0.61117213 0.05887838 0.1469822
## Middle Eastern-Asian 0.2937615 -0.02153245 0.60905547 0.0781441
## Middle Eastern-European 0.5699084 0.25786749 0.88194928 0.0000179
plot(TukeyHSD(aov_test_avg))
oneway_result <- oneway.test(Avg_Purchase_Sat ~ as.factor(Region), data = subtoyota)
print(oneway_result)
##
## One-way analysis of means (not assuming equal variances)
##
## data: Avg_Purchase_Sat and as.factor(Region)
## F = 10.048, num df = 3.00, denom df = 420.52, p-value = 2.087e-06
pairwise.t.test(subtoyota$Avg_Purchase_Sat, subtoyota$Region, p.adjust.method = "BH", pool.sd = FALSE)
##
## Pairwise comparisons using t tests with non-pooled SD
##
## data: subtoyota$Avg_Purchase_Sat and subtoyota$Region
##
## American Asian European
## Asian 0.2971 - -
## European 0.2536 0.0596 -
## Middle Eastern 0.0001 0.0252 7.9e-06
##
## P value adjustment method: BH
Car_Total_filtered <- subset(Car_Total, Make %in% c("Toyota", "Honda"))
boxplot(Car_Total_filtered$MPG ~ Car_Total_filtered$Make,
col=c("lightblue", "lightgreen"),
main="Comparison of MPG Between Toyota and Honda",
xlab="Car brand",
ylab="Miles Per Gallon (MPG)")
shapiro.test(Car_Total_filtered$MPG)
##
## Shapiro-Wilk normality test
##
## data: Car_Total_filtered$MPG
## W = 0.70381, p-value < 2.2e-16
res_aov <- aov(MPG ~ Make, data=Car_Total_filtered)
summary(res_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Make 1 0 0.286 0.036 0.849
## Residuals 449 3536 7.876
hist(res_aov$residuals, main="Histogram of Residuals", xlab="Residuals", col="lightcoral", border="black")
qqnorm(res_aov$residuals, pch=1, frame=FALSE)
qqline(res_aov$residuals, col="blue", lwd=2)
#### Interpretation: The strong deviation from the diagonal line,
coupled with the clustering of points and presence of outliers, confirms
that the residuals are not normally distributed. This supports the
findings from the Shapiro-Wilk test, where the p-value was significantly
below 0.05, indicating that the assumption of normality is violated.
shapiro.test(res_aov$residuals)
##
## Shapiro-Wilk normality test
##
## data: res_aov$residuals
## W = 0.70945, p-value < 2.2e-16
bartlett.test(Car_Total_filtered$MPG, Car_Total_filtered$Make)
##
## Bartlett test of homogeneity of variances
##
## data: Car_Total_filtered$MPG and Car_Total_filtered$Make
## Bartlett's K-squared = 2.4429, df = 1, p-value = 0.1181
t.test(MPG ~ Make, data=Car_Total_filtered, var.eq=TRUE)
##
## Two Sample t-test
##
## data: MPG by Make
## t = 0.19062, df = 449, p-value = 0.8489
## alternative hypothesis: true difference in means between group Honda and group Toyota is not equal to 0
## 95 percent confidence interval:
## -0.4908668 0.5963204
## sample estimates:
## mean in group Honda mean in group Toyota
## 22.79245 22.73973
Car_Total$Valu_Percp_Mean = (Car_Total$Valu_Percp_1 +
Car_Total$Valu_Percp_2) / 2
View(Car_Total[c("Valu_Percp_1", "Valu_Percp_2", "Valu_Percp_Mean")])
head(stringr::str_detect(Car_Total$Model, "Toyota"), 10)
## [1] FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
head(Car_Total[str_detect(Car_Total$Model,"Toyota"),], 10)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 3 Res100 6 7 7 3 5 6 6 3 5
## 4 Res1000 6 6 7 6 6 6 6 6 6
## 5 Res1001 6 6 7 6 6 6 6 4 4
## 6 Res1002 3 1 4 3 5 6 6 2 6
## 7 Res1003 2 2 1 2 2 2 1 6 7
## 8 Res1004 7 7 7 6 5 6 5 6 6
## 9 Res1005 2 1 2 1 2 2 2 7 7
## 10 Res1006 6 6 6 5 5 5 5 3 3
## 11 Res1007 4 4 4 2 3 5 3 7 7
## 14 Res101 6 6 7 6 5 6 3 5 6
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 3 6 6 7 6 5 5
## 4 6 6 4 6 6 3
## 5 4 6 5 6 6 7
## 6 6 6 5 4 5 5
## 7 6 5 4 4 4 5
## 8 6 7 6 5 5 5
## 9 7 7 4 6 6 7
## 10 6 6 5 6 6 5
## 11 5 6 6 2 2 5
## 14 4 3 3 2 2 2
## Residence Pay_Meth Insur_Type Gender Age Education X Region
## 3 2 1 Collision Female 32 1 NA American
## 4 2 3 Liability Female 24 2 NA Asian
## 5 1 3 Liability Female 24 2 NA Asian
## 6 1 3 Liability Female 25 2 NA Asian
## 7 1 3 Liability Female 26 2 NA Asian
## 8 2 3 Liability Female 26 2 NA Asian
## 9 1 3 Liability Female 27 2 NA Asian
## 10 2 3 Liability Female 27 2 NA Asian
## 11 2 3 Liability Male 27 1 NA Asian
## 14 2 2 Collision Female 32 2 NA American
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis AgeGrp Make
## 3 Toyota Rav4 24 4 8.2 10 8 4 Adults Toyota
## 4 Toyota Corolla 26 4 8.0 7 6 6 Young Adults Toyota
## 5 Toyota Corolla 26 4 8.0 7 6 5 Young Adults Toyota
## 6 Toyota Corolla 26 4 8.0 7 6 6 Young Adults Toyota
## 7 Toyota Corolla 26 4 8.0 7 6 5 Young Adults Toyota
## 8 Toyota Corolla 26 4 8.0 7 6 6 Young Adults Toyota
## 9 Toyota Corolla 26 4 8.0 7 6 7 Young Adults Toyota
## 10 Toyota Corolla 26 4 8.0 7 6 6 Young Adults Toyota
## 11 Toyota Corolla 26 4 8.0 7 6 6 Young Adults Toyota
## 14 Toyota Rav4 24 4 8.2 10 8 5 Adults Toyota
## Model_v1 Parent Valu_Percp_Mean
## 3 Rav4 Toyota 6.5
## 4 Corolla Toyota 5.0
## 5 Corolla Toyota 5.5
## 6 Corolla Toyota 4.5
## 7 Corolla Toyota 4.0
## 8 Corolla Toyota 5.5
## 9 Corolla Toyota 5.0
## 10 Corolla Toyota 5.5
## 11 Corolla Toyota 4.0
## 14 Rav4 Toyota 2.5
head(Car_Total_Toyota<-str_detect(Car_Total$Model,"Toyota"), 5)
## [1] FALSE FALSE TRUE TRUE TRUE
head(Car_Total_Toyota,5)
## [1] FALSE FALSE TRUE TRUE TRUE
head(subtoyota<-Car_Total[Car_Total_Toyota,],5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 3 Res100 6 7 7 3 5 6 6 3 5
## 4 Res1000 6 6 7 6 6 6 6 6 6
## 5 Res1001 6 6 7 6 6 6 6 4 4
## 6 Res1002 3 1 4 3 5 6 6 2 6
## 7 Res1003 2 2 1 2 2 2 1 6 7
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 3 6 6 7 6 5 5
## 4 6 6 4 6 6 3
## 5 4 6 5 6 6 7
## 6 6 6 5 4 5 5
## 7 6 5 4 4 4 5
## Residence Pay_Meth Insur_Type Gender Age Education X Region Model
## 3 2 1 Collision Female 32 1 NA American Toyota Rav4
## 4 2 3 Liability Female 24 2 NA Asian Toyota Corolla
## 5 1 3 Liability Female 24 2 NA Asian Toyota Corolla
## 6 1 3 Liability Female 25 2 NA Asian Toyota Corolla
## 7 1 3 Liability Female 26 2 NA Asian Toyota Corolla
## MPG Cyl acc1 C_cost. H_Cost Post.Satis AgeGrp Make Model_v1 Parent
## 3 24 4 8.2 10 8 4 Adults Toyota Rav4 Toyota
## 4 26 4 8.0 7 6 6 Young Adults Toyota Corolla Toyota
## 5 26 4 8.0 7 6 5 Young Adults Toyota Corolla Toyota
## 6 26 4 8.0 7 6 6 Young Adults Toyota Corolla Toyota
## 7 26 4 8.0 7 6 5 Young Adults Toyota Corolla Toyota
## Valu_Percp_Mean
## 3 6.5
## 4 5.0
## 5 5.5
## 6 4.5
## 7 4.0
head(subtoyota,5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 3 Res100 6 7 7 3 5 6 6 3 5
## 4 Res1000 6 6 7 6 6 6 6 6 6
## 5 Res1001 6 6 7 6 6 6 6 4 4
## 6 Res1002 3 1 4 3 5 6 6 2 6
## 7 Res1003 2 2 1 2 2 2 1 6 7
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 3 6 6 7 6 5 5
## 4 6 6 4 6 6 3
## 5 4 6 5 6 6 7
## 6 6 6 5 4 5 5
## 7 6 5 4 4 4 5
## Residence Pay_Meth Insur_Type Gender Age Education X Region Model
## 3 2 1 Collision Female 32 1 NA American Toyota Rav4
## 4 2 3 Liability Female 24 2 NA Asian Toyota Corolla
## 5 1 3 Liability Female 24 2 NA Asian Toyota Corolla
## 6 1 3 Liability Female 25 2 NA Asian Toyota Corolla
## 7 1 3 Liability Female 26 2 NA Asian Toyota Corolla
## MPG Cyl acc1 C_cost. H_Cost Post.Satis AgeGrp Make Model_v1 Parent
## 3 24 4 8.2 10 8 4 Adults Toyota Rav4 Toyota
## 4 26 4 8.0 7 6 6 Young Adults Toyota Corolla Toyota
## 5 26 4 8.0 7 6 5 Young Adults Toyota Corolla Toyota
## 6 26 4 8.0 7 6 6 Young Adults Toyota Corolla Toyota
## 7 26 4 8.0 7 6 5 Young Adults Toyota Corolla Toyota
## Valu_Percp_Mean
## 3 6.5
## 4 5.0
## 5 5.5
## 6 4.5
## 7 4.0
table(subtoyota$Make)
##
## Toyota
## 292
subtoyota <-subtoyota %>%
mutate(Gender_numeric = case_when(Gender == "Female" ~ 1,
Gender == "Male" ~ 0))
head(subtoyota, 5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 3 Res100 6 7 7 3 5 6 6 3 5
## 4 Res1000 6 6 7 6 6 6 6 6 6
## 5 Res1001 6 6 7 6 6 6 6 4 4
## 6 Res1002 3 1 4 3 5 6 6 2 6
## 7 Res1003 2 2 1 2 2 2 1 6 7
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 3 6 6 7 6 5 5
## 4 6 6 4 6 6 3
## 5 4 6 5 6 6 7
## 6 6 6 5 4 5 5
## 7 6 5 4 4 4 5
## Residence Pay_Meth Insur_Type Gender Age Education X Region Model
## 3 2 1 Collision Female 32 1 NA American Toyota Rav4
## 4 2 3 Liability Female 24 2 NA Asian Toyota Corolla
## 5 1 3 Liability Female 24 2 NA Asian Toyota Corolla
## 6 1 3 Liability Female 25 2 NA Asian Toyota Corolla
## 7 1 3 Liability Female 26 2 NA Asian Toyota Corolla
## MPG Cyl acc1 C_cost. H_Cost Post.Satis AgeGrp Make Model_v1 Parent
## 3 24 4 8.2 10 8 4 Adults Toyota Rav4 Toyota
## 4 26 4 8.0 7 6 6 Young Adults Toyota Corolla Toyota
## 5 26 4 8.0 7 6 5 Young Adults Toyota Corolla Toyota
## 6 26 4 8.0 7 6 6 Young Adults Toyota Corolla Toyota
## 7 26 4 8.0 7 6 5 Young Adults Toyota Corolla Toyota
## Valu_Percp_Mean Gender_numeric
## 3 6.5 1
## 4 5.0 1
## 5 5.5 1
## 6 4.5 1
## 7 4.0 1
boxplot(subtoyota$Valu_Percp_Mean ~ subtoyota$Gender_numeric, col=c(5,7))
shapiro.test(subtoyota$Valu_Percp_Mean)
##
## Shapiro-Wilk normality test
##
## data: subtoyota$Valu_Percp_Mean
## W = 0.94228, p-value = 2.831e-09
res_aov<-aov(Valu_Percp_Mean~Gender_numeric, data=subtoyota)
res_aov
## Call:
## aov(formula = Valu_Percp_Mean ~ Gender_numeric, data = subtoyota)
##
## Terms:
## Gender_numeric Residuals
## Sum of Squares 0.3812 318.7387
## Deg. of Freedom 1 287
##
## Residual standard error: 1.053844
## Estimated effects may be unbalanced
## 3 observations deleted due to missingness
hist(res_aov$residuals)
qqnorm(res_aov$residuals, pch=1, frame=FALSE)
qqline(res_aov$residuals, col="red", lwd=4)
shapiro.test(res_aov$residuals)
##
## Shapiro-Wilk normality test
##
## data: res_aov$residuals
## W = 0.94881, p-value = 1.687e-08
bartlett.test(subtoyota$Valu_Percp_Mean, subtoyota$Gender_numeric)
##
## Bartlett test of homogeneity of variances
##
## data: subtoyota$Valu_Percp_Mean and subtoyota$Gender_numeric
## Bartlett's K-squared = 2.4132, df = 1, p-value = 0.1203
t.test(Valu_Percp_Mean~Gender_numeric, data=subtoyota, var.eq=TRUE)
##
## Two Sample t-test
##
## data: Valu_Percp_Mean by Gender_numeric
## t = -0.58585, df = 287, p-value = 0.5584
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.3596413 0.1946552
## sample estimates:
## mean in group 0 mean in group 1
## 5.118003 5.200496
head(filtered_car_total <- Car_Total %>%
filter(str_detect(Parent, "Toyota") | str_detect(Parent, "Honda")),5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res100 6 7 7 3 5 6 6 3 5
## 2 Res1000 6 6 7 6 6 6 6 6 6
## 3 Res1001 6 6 7 6 6 6 6 4 4
## 4 Res1002 3 1 4 3 5 6 6 2 6
## 5 Res1003 2 2 1 2 2 2 1 6 7
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 6 6 7 6 5 5
## 2 6 6 4 6 6 3
## 3 4 6 5 6 6 7
## 4 6 6 5 4 5 5
## 5 6 5 4 4 4 5
## Residence Pay_Meth Insur_Type Gender Age Education X Region Model
## 1 2 1 Collision Female 32 1 NA American Toyota Rav4
## 2 2 3 Liability Female 24 2 NA Asian Toyota Corolla
## 3 1 3 Liability Female 24 2 NA Asian Toyota Corolla
## 4 1 3 Liability Female 25 2 NA Asian Toyota Corolla
## 5 1 3 Liability Female 26 2 NA Asian Toyota Corolla
## MPG Cyl acc1 C_cost. H_Cost Post.Satis AgeGrp Make Model_v1 Parent
## 1 24 4 8.2 10 8 4 Adults Toyota Rav4 Toyota
## 2 26 4 8.0 7 6 6 Young Adults Toyota Corolla Toyota
## 3 26 4 8.0 7 6 5 Young Adults Toyota Corolla Toyota
## 4 26 4 8.0 7 6 6 Young Adults Toyota Corolla Toyota
## 5 26 4 8.0 7 6 5 Young Adults Toyota Corolla Toyota
## Valu_Percp_Mean
## 1 6.5
## 2 5.0
## 3 5.5
## 4 4.5
## 5 4.0
head(filtered_car_total,5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res100 6 7 7 3 5 6 6 3 5
## 2 Res1000 6 6 7 6 6 6 6 6 6
## 3 Res1001 6 6 7 6 6 6 6 4 4
## 4 Res1002 3 1 4 3 5 6 6 2 6
## 5 Res1003 2 2 1 2 2 2 1 6 7
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 6 6 7 6 5 5
## 2 6 6 4 6 6 3
## 3 4 6 5 6 6 7
## 4 6 6 5 4 5 5
## 5 6 5 4 4 4 5
## Residence Pay_Meth Insur_Type Gender Age Education X Region Model
## 1 2 1 Collision Female 32 1 NA American Toyota Rav4
## 2 2 3 Liability Female 24 2 NA Asian Toyota Corolla
## 3 1 3 Liability Female 24 2 NA Asian Toyota Corolla
## 4 1 3 Liability Female 25 2 NA Asian Toyota Corolla
## 5 1 3 Liability Female 26 2 NA Asian Toyota Corolla
## MPG Cyl acc1 C_cost. H_Cost Post.Satis AgeGrp Make Model_v1 Parent
## 1 24 4 8.2 10 8 4 Adults Toyota Rav4 Toyota
## 2 26 4 8.0 7 6 6 Young Adults Toyota Corolla Toyota
## 3 26 4 8.0 7 6 5 Young Adults Toyota Corolla Toyota
## 4 26 4 8.0 7 6 6 Young Adults Toyota Corolla Toyota
## 5 26 4 8.0 7 6 5 Young Adults Toyota Corolla Toyota
## Valu_Percp_Mean
## 1 6.5
## 2 5.0
## 3 5.5
## 4 4.5
## 5 4.0
filtered_car_total$Att_Mean = (filtered_car_total$Att_1 +
filtered_car_total$Att_2) / 2
View(filtered_car_total[c("Att_1", "Att_2", "Att_Mean")])
filtered_car_total <-filtered_car_total %>%
mutate(Parent_numeric = case_when(Parent == "Toyota" ~ 1,
Parent == "Honda" ~ 0))
boxplot(filtered_car_total$Att_Mean ~ filtered_car_total$Parent_numeric, col=c(5,7))
shapiro.test(filtered_car_total$Att_Mean)
##
## Shapiro-Wilk normality test
##
## data: filtered_car_total$Att_Mean
## W = 0.92712, p-value = 5.313e-14
res_aov<-aov(Att_Mean~Parent_numeric, data=filtered_car_total)
res_aov
## Call:
## aov(formula = Att_Mean ~ Parent_numeric, data = filtered_car_total)
##
## Terms:
## Parent_numeric Residuals
## Sum of Squares 6.3422 726.1419
## Deg. of Freedom 1 449
##
## Residual standard error: 1.271709
## Estimated effects may be unbalanced
hist(res_aov$residuals)
qqnorm(res_aov$residuals, pch=1, frame=FALSE)
qqline(res_aov$residuals, col="red", lwd=4)
shapiro.test(res_aov$residuals)
##
## Shapiro-Wilk normality test
##
## data: res_aov$residuals
## W = 0.94609, p-value = 9.63e-12
bartlett.test(filtered_car_total$Att_Mean, filtered_car_total$Parent_numeric)
##
## Bartlett test of homogeneity of variances
##
## data: filtered_car_total$Att_Mean and filtered_car_total$Parent_numeric
## Bartlett's K-squared = 7.0355, df = 1, p-value = 0.007991
t.test(Att_Mean~Parent_numeric, data=filtered_car_total, var.eq=TRUE)
##
## Two Sample t-test
##
## data: Att_Mean by Parent_numeric
## t = 1.9803, df = 449, p-value = 0.04828
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 0.001885513 0.494532916
## sample estimates:
## mean in group 0 mean in group 1
## 5.503145 5.254935
ggplot(subtoyota)+aes(x=Valu_Percp_Mean, y=Region, colour=acc1, size=MPG)+
geom_point()+ scale_color_gradient() +
labs (y = "Region", x = "Value", color = "Acceleration",
size = "MPG") +
theme_minimal()
model2<-lm(Valu_Percp_Mean ~ Region+acc1+MPG, data=subtoyota)
summary(model2)
##
## Call:
## lm(formula = Valu_Percp_Mean ~ Region + acc1 + MPG, data = subtoyota)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1515 -0.6034 0.2487 0.7956 1.9849
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.91205 1.61289 1.805 0.0721 .
## RegionAsian 0.08833 0.15871 0.557 0.5783
## RegionEuropean 0.18931 0.16178 1.170 0.2429
## RegionMiddle Eastern 0.73495 0.39572 1.857 0.0643 .
## acc1 0.42850 0.34923 1.227 0.2208
## MPG -0.04911 0.05763 -0.852 0.3948
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.048 on 286 degrees of freedom
## Multiple R-squared: 0.01927, Adjusted R-squared: 0.002123
## F-statistic: 1.124 on 5 and 286 DF, p-value: 0.3478