options(repos = c(CRAN = "https://cloud.r-project.org/"))
# Install necessary packages
install.packages("readxl")
##
## The downloaded binary packages are in
## /var/folders/8w/l_qbjg8n5v3_5v1lkl2ww_z80000gn/T//Rtmp2KYW6b/downloaded_packages
install.packages("dplyr")
##
## The downloaded binary packages are in
## /var/folders/8w/l_qbjg8n5v3_5v1lkl2ww_z80000gn/T//Rtmp2KYW6b/downloaded_packages
install.packages("ggplot2")
##
## The downloaded binary packages are in
## /var/folders/8w/l_qbjg8n5v3_5v1lkl2ww_z80000gn/T//Rtmp2KYW6b/downloaded_packages
install.packages("tidyr")
##
## The downloaded binary packages are in
## /var/folders/8w/l_qbjg8n5v3_5v1lkl2ww_z80000gn/T//Rtmp2KYW6b/downloaded_packages
# Load libraries
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
# Set working directory
setwd("~/Desktop/MKTG 3P98")
# Read the data files
car1 <- read.csv("Car_Survey_1.csv")
car2 <- read.csv("Car_Survey_2.csv")
# Check the structure and first few rows of the datasets
str(car1)
## 'data.frame': 1180 obs. of 23 variables:
## $ Resp : chr "Res1" "Res2" "Res3" "Res4" ...
## $ Att_1 : int 6 7 7 4 6 6 1 6 3 6 ...
## $ Att_2 : int 6 5 7 1 6 6 1 5 2 6 ...
## $ Enj_1 : int 6 5 7 1 6 6 1 5 3 4 ...
## $ Enj_2 : int 6 2 5 1 5 5 1 3 2 4 ...
## $ Perform_1 : int 5 2 5 1 5 5 2 5 2 4 ...
## $ Perform_2 : int 6 6 5 1 2 5 2 5 3 4 ...
## $ Perform_3 : int 3 7 3 1 1 7 2 2 1 1 ...
## $ WOM_1 : int 3 5 6 7 7 5 2 4 6 5 ...
## $ WOM_2 : int 3 5 6 7 7 5 3 6 6 6 ...
## $ Futu_Pur_1 : int 3 6 7 3 7 7 5 4 7 6 ...
## $ Futu_Pur_2 : int 3 6 7 3 6 7 2 4 7 6 ...
## $ Valu_Percp_1: int 5 6 5 6 6 7 2 4 6 6 ...
## $ Valu_Percp_2: int 2 7 7 5 5 7 2 4 6 6 ...
## $ Pur_Proces_1: int 6 7 7 5 6 7 2 4 6 6 ...
## $ Pur_Proces_2: int 4 6 7 4 7 7 6 4 6 6 ...
## $ Residence : int 2 2 1 2 1 2 2 1 2 1 ...
## $ Pay_Meth : int 2 2 2 2 2 2 2 2 2 2 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Collision" ...
## $ Gender : chr "Male" "Male" "Male" "Male" ...
## $ Age : int 18 18 19 19 19 19 19 21 21 21 ...
## $ Education : int 2 2 2 2 2 2 2 2 2 2 ...
## $ X : logi NA NA NA NA NA NA ...
head(car1, n = 5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1 6 6 6 6 5 6 3 3 3
## 2 Res2 7 5 5 2 2 6 7 5 5
## 3 Res3 7 7 7 5 5 5 3 6 6
## 4 Res4 4 1 1 1 1 1 1 7 7
## 5 Res5 6 6 6 5 5 2 1 7 7
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 3 3 5 2 6 4
## 2 6 6 6 7 7 6
## 3 7 7 5 7 7 7
## 4 3 3 6 5 5 4
## 5 7 6 6 5 6 7
## Residence Pay_Meth Insur_Type Gender Age Education X
## 1 2 2 Collision Male 18 2 NA
## 2 2 2 Collision Male 18 2 NA
## 3 1 2 Collision Male 19 2 NA
## 4 2 2 Collision Male 19 2 NA
## 5 1 2 Collision Female 19 2 NA
View(car1)
str(car2)
## 'data.frame': 1049 obs. of 9 variables:
## $ Respondents: chr "Res1" "Res2" "Res3" "Res4" ...
## $ Region : chr "European" "European" "European" "European" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
## $ MPG : int 15 15 15 15 15 15 15 15 15 15 ...
## $ Cyl : int 8 8 8 8 8 8 8 8 8 8 ...
## $ acc1 : num 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
## $ C_cost. : num 16 16 16 16 16 16 16 16 16 16 ...
## $ H_Cost : num 14 14 14 14 14 14 14 14 14 14 ...
## $ Post.Satis : int 4 3 5 5 5 3 3 6 3 5 ...
head(car2, n = 5)
## Respondents Region Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
## 2 Res2 European Ford Expedition 15 8 5.5 16 14 3
## 3 Res3 European Ford Expedition 15 8 5.5 16 14 5
## 4 Res4 European Ford Expedition 15 8 5.5 16 14 5
## 5 Res5 European Ford Expedition 15 8 5.5 16 14 5
View(car2)
# Rename first column of car2
names(car2)[1] <- "Resp"
head(car2, n = 1)
## Resp Region Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition 15 8 5.5 16 14 4
# Merge the datasets by "Resp"
Car_Total <- merge(car1, car2, by = "Resp")
str(Car_Total)
## 'data.frame': 1049 obs. of 31 variables:
## $ Resp : chr "Res1" "Res10" "Res100" "Res1000" ...
## $ Att_1 : int 6 6 6 6 6 3 2 7 2 6 ...
## $ Att_2 : int 6 6 7 6 6 1 2 7 1 6 ...
## $ Enj_1 : int 6 4 7 7 7 4 1 7 2 6 ...
## $ Enj_2 : int 6 4 3 6 6 3 2 6 1 5 ...
## $ Perform_1 : int 5 4 5 6 6 5 2 5 2 5 ...
## $ Perform_2 : int 6 4 6 6 6 6 2 6 2 5 ...
## $ Perform_3 : int 3 1 6 6 6 6 1 5 2 5 ...
## $ WOM_1 : int 3 5 3 6 4 2 6 6 7 3 ...
## $ WOM_2 : int 3 6 5 6 4 6 7 6 7 3 ...
## $ Futu_Pur_1 : int 3 6 6 6 4 6 6 6 7 6 ...
## $ Futu_Pur_2 : int 3 6 6 6 6 6 5 7 7 6 ...
## $ Valu_Percp_1: int 5 6 7 4 5 5 4 6 4 5 ...
## $ Valu_Percp_2: int 2 6 6 6 6 4 4 5 6 6 ...
## $ Pur_Proces_1: int 6 6 5 6 6 5 4 5 6 6 ...
## $ Pur_Proces_2: int 4 6 5 3 7 5 5 5 7 5 ...
## $ Residence : int 2 1 2 2 1 1 1 2 1 2 ...
## $ Pay_Meth : int 2 2 1 3 3 3 3 3 3 3 ...
## $ Insur_Type : chr "Collision" "Collision" "Collision" "Liability" ...
## $ Gender : chr "Male" "Male" "Female" "Female" ...
## $ Age : int 18 21 32 24 24 25 26 26 27 27 ...
## $ Education : int 2 2 1 2 2 2 2 2 2 2 ...
## $ X : logi NA NA NA NA NA NA ...
## $ Region : chr "European" "European" "American" "Asian" ...
## $ Model : chr "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
## $ MPG : int 15 15 24 26 26 26 26 26 26 26 ...
## $ Cyl : int 8 8 4 4 4 4 4 4 4 4 ...
## $ acc1 : num 5.5 5.5 8.2 8 8 8 8 8 8 8 ...
## $ C_cost. : num 16 16 10 7 7 7 7 7 7 7 ...
## $ H_Cost : num 14 14 8 6 6 6 6 6 6 6 ...
## $ Post.Satis : int 4 5 4 6 5 6 5 6 7 6 ...
# Write merged data to CSV
write.csv(Car_Total, "Car_Total.csv", row.names = FALSE)
View(Car_Total)
# Checking and replacing missing (NA) values
summary(Car_Total)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Mode :character Median :6.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :4
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## NA's :4 NA's :2 NA's :4 NA's :1
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## NA's :1 NA's :3 NA's :5 NA's :2 NA's :4
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## NA's :1 NA's :3 NA's :4 NA's :5
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
##
## Education X Region Model
## Min. :1.000 Mode:logical Length:1049 Length:1049
## 1st Qu.:2.000 NA's:1049 Class :character Class :character
## Median :2.000 Mode :character Mode :character
## Mean :1.989
## 3rd Qu.:2.000
## Max. :3.000
##
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
##
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
##
# Compute mean value of Att_1
mean(Car_Total$Att_1)
## [1] NA
# Replace NA values with column mean for numeric columns
meanAtt1 <- mean(Car_Total$Att_1, na.rm = TRUE)
Car_Total[] <- lapply(Car_Total, function(x) {
if (is.numeric(x)) {
x[is.na(x)] <- mean(x, na.rm = TRUE)
}
return(x)
})
# Checking the structure after replacing NAs
summary(Car_Total)
## Resp Att_1 Att_2 Enj_1
## Length:1049 Min. :1.000 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:5.000
## Mode :character Median :5.000 Median :6.000 Median :6.000
## Mean :4.882 Mean :5.287 Mean :5.378
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000
## Enj_2 Perform_1 Perform_2 Perform_3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000
## Median :5.000 Median :5.000 Median :5.000 Median :5.000
## Mean :4.575 Mean :4.947 Mean :4.831 Mean :4.217
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
## WOM_1 WOM_2 Futu_Pur_1 Futu_Pur_2 Valu_Percp_1
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.00 Median :6.000 Median :6.000 Median :6.000
## Mean :5.286 Mean :5.35 Mean :5.321 Mean :5.371 Mean :5.411
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.00 Max. :9.000 Max. :7.000 Max. :7.000
## Valu_Percp_2 Pur_Proces_1 Pur_Proces_2 Residence
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :6.000 Median :5.000 Median :1.000
## Mean :5.114 Mean :5.256 Mean :4.923 Mean :1.474
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :5.000
## Pay_Meth Insur_Type Gender Age
## Min. :1.000 Length:1049 Length:1049 Min. :18.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:23.00
## Median :2.000 Mode :character Mode :character Median :34.00
## Mean :2.153 Mean :35.22
## 3rd Qu.:3.000 3rd Qu.:48.00
## Max. :3.000 Max. :60.00
## Education X Region Model
## Min. :1.000 Mode:logical Length:1049 Length:1049
## 1st Qu.:2.000 NA's:1049 Class :character Class :character
## Median :2.000 Mode :character Mode :character
## Mean :1.989
## 3rd Qu.:2.000
## Max. :3.000
## MPG Cyl acc1 C_cost. H_Cost
## Min. :14.00 Min. :4.0 Min. :3.600 Min. : 7.00 Min. : 6.000
## 1st Qu.:17.00 1st Qu.:4.0 1st Qu.:5.100 1st Qu.:10.00 1st Qu.: 8.000
## Median :19.00 Median :6.0 Median :6.500 Median :12.00 Median :10.000
## Mean :19.58 Mean :5.8 Mean :6.202 Mean :11.35 Mean : 9.634
## 3rd Qu.:22.00 3rd Qu.:6.0 3rd Qu.:7.500 3rd Qu.:13.00 3rd Qu.:11.000
## Max. :26.00 Max. :8.0 Max. :8.500 Max. :16.00 Max. :14.000
## Post.Satis
## Min. :2.00
## 1st Qu.:5.00
## Median :6.00
## Mean :5.28
## 3rd Qu.:6.00
## Max. :7.00
# Create new column for Att_Mean by taking the row means of Att_1 and Att_2
Car_Total <- Car_Total %>%
mutate(Att_Mean = rowMeans(select(., Att_1, Att_2), na.rm = TRUE))
# Alternatively, calculate Att_Mean by adding Att_1 and Att_2 and dividing by 2
Car_Total$Att_Mean <- (Car_Total$Att_1 + Car_Total$Att_2) / 2
View(Car_Total)
# Frequency count of cars by region
ggplot(Car_Total, aes(x = Region, fill = Region)) +
theme_bw() +
geom_bar() +
geom_text(stat = "count", aes(label = after_stat(count)), vjust = 0) +
labs(y = "Number of Cars", x = "Region", title = "Number of Cars by Region")

# Print car region percentage
car_region_percentage <- prop.table(table(Car_Total$Region))
print(car_region_percentage)
##
## American Asian European Middle Eastern
## 0.3431840 0.1982841 0.2001907 0.2583413
# Define Model and Region as categorical variables
Car_Total$Model <- as.factor(Car_Total$Model)
Car_Total$Region <- as.factor(Car_Total$Region)
# Graph number of cars by model and region
ggplot(Car_Total, aes(x = Region, fill = Model)) +
theme_bw() +
geom_bar() +
labs(y = "Number of Cars", title = "Number of Cars by Model and Region")

# Split the Model column into Brand and Model
cars_separated <- Car_Total %>%
separate(Model, into = c("Brand", "Model"), sep = " ", extra = "merge")
# Add the split columns to the Car_Total dataset
Car_Total <- cars_separated
View(Car_Total)
# Create a graph for cars by brand across regions
ggplot(Car_Total, aes(x = Region, fill = Brand)) +
theme_bw() +
geom_bar(position = "stack") + # 'stack' will show stacked bars for each region with different brands
labs(y = "Number of Cars",
x = "Region",
title = "Number of Cars by Brand across Region") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Count the total number of cars by Brand
count(Car_Total, Car_Total$Make, Car_Total$Brand, name = "Freq")
## Car_Total$Brand Freq
## 1 Buick 31
## 2 Chevrolet 64
## 3 Chrysler 169
## 4 Dodge 41
## 5 Fiat 18
## 6 Ford 202
## 7 Honda 159
## 8 Kia 34
## 9 Lincoln 39
## 10 Toyota 292
# Create a crosstabulation table for Brand and Region
xtabs(~ Region + Brand, Car_Total)
## Brand
## Region Buick Chevrolet Chrysler Dodge Fiat Ford Honda Kia Lincoln
## American 17 22 54 22 9 81 38 15 0
## Asian 0 0 24 0 0 24 58 0 0
## European 10 21 13 0 9 29 29 19 0
## Middle Eastern 4 21 78 19 0 68 34 0 39
## Brand
## Region Toyota
## American 102
## Asian 102
## European 80
## Middle Eastern 8
# Count the total number of cars by Brand across Region
brand_region_counts <- table(Car_Total$Brand, Car_Total$Region)
print(brand_region_counts)
##
## American Asian European Middle Eastern
## Buick 17 0 10 4
## Chevrolet 22 0 21 21
## Chrysler 54 24 13 78
## Dodge 22 0 0 19
## Fiat 9 0 9 0
## Ford 81 24 29 68
## Honda 38 58 29 34
## Kia 15 0 19 0
## Lincoln 0 0 0 39
## Toyota 102 102 80 8
# Attitude toward car brand by region (average Att_1 by Brand and Region)
brand_region_table <- aggregate(Att_1 ~ Brand + Region, Car_Total, mean)
print(brand_region_table)
## Brand Region Att_1
## 1 Buick American 5.529412
## 2 Chevrolet American 5.227273
## 3 Chrysler American 5.011979
## 4 Dodge American 4.818182
## 5 Fiat American 4.666667
## 6 Ford American 4.567901
## 7 Honda American 5.657895
## 8 Kia American 3.266667
## 9 Toyota American 5.254902
## 10 Chrysler Asian 5.291667
## 11 Ford Asian 4.166667
## 12 Honda Asian 5.568966
## 13 Toyota Asian 4.881199
## 14 Buick European 5.000000
## 15 Chevrolet European 5.000000
## 16 Chrysler European 5.230769
## 17 Fiat European 3.777778
## 18 Ford European 5.241379
## 19 Honda European 4.758621
## 20 Kia European 3.789474
## 21 Toyota European 4.800000
## 22 Buick Middle Eastern 6.000000
## 23 Chevrolet Middle Eastern 5.904762
## 24 Chrysler Middle Eastern 4.038462
## 25 Dodge Middle Eastern 4.473684
## 26 Ford Middle Eastern 4.014706
## 27 Honda Middle Eastern 5.500000
## 28 Lincoln Middle Eastern 5.692308
## 29 Toyota Middle Eastern 5.375000
# Filter for Toyota and plot graph for its attitude across regions
Toyota_Att1_Mean <- Car_Total %>%
filter(grepl("Toyota", Brand, ignore.case = TRUE))
ggplot(Toyota_Att1_Mean, aes(x = Region, fill = Brand)) +
theme_bw() +
geom_bar(position = "stack") +
labs(y = "Number of Cars",
x = "Region",
title = "Number of Toyota Cars across Regions") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Create a line graph for Toyota attitude mean by region
ggplot(brand_region_table, aes(x = Region, y = Att_1, group = Brand)) +
geom_line(aes(color = Brand)) +
geom_point(aes(color = Brand)) +
labs(y = "Att_Mean", title = "Attitude Mean by Brand and Region")

# Filter Toyota's attitude data for further analysis
Toyota_Att1_Mean <- brand_region_table %>%
filter(Brand == "Toyota")
print(Toyota_Att1_Mean)
## Brand Region Att_1
## 1 Toyota American 5.254902
## 2 Toyota Asian 4.881199
## 3 Toyota European 4.800000
## 4 Toyota Middle Eastern 5.375000
# Filter for attitude values between 3 and 6 for Toyota
Toyota_Att1_Mean <- Toyota_Att1_Mean %>%
filter(Att_1 >= 3 & Att_1 <= 6)
head(Toyota_Att1_Mean)
## Brand Region Att_1
## 1 Toyota American 5.254902
## 2 Toyota Asian 4.881199
## 3 Toyota European 4.800000
## 4 Toyota Middle Eastern 5.375000
# Create a graph for Toyota attitude mean with y-axis limit set between 3 and 6
ggplot(Toyota_Att1_Mean, aes(x = Region, y = Att_1, group = Brand)) +
geom_line(aes(color = Brand)) +
geom_point(aes(color = Brand)) +
scale_y_continuous(limits = c(3, 6)) +
labs(y = "Att_1 Mean", title = "Attitude Mean for Toyota by Region")

# Filter for competing brands (Toyota vs. Honda) and comparing attitudes
Multiple_Att1_Mean <- brand_region_table %>%
filter(Brand == "Toyota" | Brand == "Honda")
# View the filtered data
print(Multiple_Att1_Mean)
## Brand Region Att_1
## 1 Honda American 5.657895
## 2 Toyota American 5.254902
## 3 Honda Asian 5.568966
## 4 Toyota Asian 4.881199
## 5 Honda European 4.758621
## 6 Toyota European 4.800000
## 7 Honda Middle Eastern 5.500000
## 8 Toyota Middle Eastern 5.375000
# Filter for attitude values between 3 and 6 for competing brands
Multiple_Att1_Mean <- Multiple_Att1_Mean %>%
filter(Att_1 >= 3 & Att_1 <= 6)
# Create a graph comparing Toyota vs Honda attitudes across regions
ggplot(Multiple_Att1_Mean, aes(Region, y = Att_1, group = Brand)) +
geom_line(aes(color = Brand)) +
geom_point(aes(color = Brand)) +
scale_y_continuous(limits = c(3, 6)) +
ggtitle("Attitude Mean for Toyota and Honda by Region") +
ylab("Att_1 Mean")

# Demographic Groups by Age (Age converted into 3 groups)
Car_Total$AgeGrp <- cut(Car_Total$Age,
breaks = c(0, 30, 50, Inf),
labels = c("Young Adults", "Adults", "Mature Adults"),
right = FALSE)
# View the updated dataset structure
names(Car_Total)
## [1] "Resp" "Att_1" "Att_2" "Enj_1" "Enj_2"
## [6] "Perform_1" "Perform_2" "Perform_3" "WOM_1" "WOM_2"
## [11] "Futu_Pur_1" "Futu_Pur_2" "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1"
## [16] "Pur_Proces_2" "Residence" "Pay_Meth" "Insur_Type" "Gender"
## [21] "Age" "Education" "X" "Region" "Brand"
## [26] "Model" "MPG" "Cyl" "acc1" "C_cost."
## [31] "H_Cost" "Post.Satis" "Att_Mean" "AgeGrp"
head(Car_Total, n = 5)
## Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1 6 6 6 6 5 6 3 3 3
## 2 Res10 6 6 4 4 4 4 1 5 6
## 3 Res100 6 7 7 3 5 6 6 3 5
## 4 Res1000 6 6 7 6 6 6 6 6 6
## 5 Res1001 6 6 7 6 6 6 6 4 4
## Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1 3 3 5 2 6 4
## 2 6 6 6 6 6 6
## 3 6 6 7 6 5 5
## 4 6 6 4 6 6 3
## 5 4 6 5 6 6 7
## Residence Pay_Meth Insur_Type Gender Age Education X Region Brand
## 1 2 2 Collision Male 18 2 NA European Ford
## 2 1 2 Collision Male 21 2 NA European Ford
## 3 2 1 Collision Female 32 1 NA American Toyota
## 4 2 3 Liability Female 24 2 NA Asian Toyota
## 5 1 3 Liability Female 24 2 NA Asian Toyota
## Model MPG Cyl acc1 C_cost. H_Cost Post.Satis Att_Mean AgeGrp
## 1 Expedition 15 8 5.5 16 14 4 6.0 Young Adults
## 2 Expedition 15 8 5.5 16 14 5 6.0 Young Adults
## 3 Rav4 24 4 8.2 10 8 4 6.5 Adults
## 4 Corolla 26 4 8.0 7 6 6 6.0 Young Adults
## 5 Corolla 26 4 8.0 7 6 5 6.0 Young Adults
# Age group analysis for Toyota across regions
filtered_data_Toyota <- Car_Total %>%
filter(Brand == "Toyota")
# Create a graph showing the number of Toyota cars by age group for each region
ggplot(filtered_data_Toyota, aes(x = Region, fill = AgeGrp)) +
theme_bw() +
geom_bar() +
labs(y = "Number of Cars",
title = "Number of Toyota Cars by Age Group and Region")
