Project 1

options(repos = c(CRAN = "https://cloud.r-project.org/"))

# Install necessary packages
install.packages("readxl")

## 
## The downloaded binary packages are in
##  /var/folders/8w/l_qbjg8n5v3_5v1lkl2ww_z80000gn/T//Rtmp2KYW6b/downloaded_packages

install.packages("dplyr")

## 
## The downloaded binary packages are in
##  /var/folders/8w/l_qbjg8n5v3_5v1lkl2ww_z80000gn/T//Rtmp2KYW6b/downloaded_packages

install.packages("ggplot2")

## 
## The downloaded binary packages are in
##  /var/folders/8w/l_qbjg8n5v3_5v1lkl2ww_z80000gn/T//Rtmp2KYW6b/downloaded_packages

install.packages("tidyr")

## 
## The downloaded binary packages are in
##  /var/folders/8w/l_qbjg8n5v3_5v1lkl2ww_z80000gn/T//Rtmp2KYW6b/downloaded_packages

# Load libraries
library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(tidyr)

# Set working directory
setwd("~/Desktop/MKTG 3P98")

# Read the data files
car1 <- read.csv("Car_Survey_1.csv")
car2 <- read.csv("Car_Survey_2.csv")

# Check the structure and first few rows of the datasets
str(car1)

## 'data.frame':    1180 obs. of  23 variables:
##  $ Resp        : chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Att_1       : int  6 7 7 4 6 6 1 6 3 6 ...
##  $ Att_2       : int  6 5 7 1 6 6 1 5 2 6 ...
##  $ Enj_1       : int  6 5 7 1 6 6 1 5 3 4 ...
##  $ Enj_2       : int  6 2 5 1 5 5 1 3 2 4 ...
##  $ Perform_1   : int  5 2 5 1 5 5 2 5 2 4 ...
##  $ Perform_2   : int  6 6 5 1 2 5 2 5 3 4 ...
##  $ Perform_3   : int  3 7 3 1 1 7 2 2 1 1 ...
##  $ WOM_1       : int  3 5 6 7 7 5 2 4 6 5 ...
##  $ WOM_2       : int  3 5 6 7 7 5 3 6 6 6 ...
##  $ Futu_Pur_1  : int  3 6 7 3 7 7 5 4 7 6 ...
##  $ Futu_Pur_2  : int  3 6 7 3 6 7 2 4 7 6 ...
##  $ Valu_Percp_1: int  5 6 5 6 6 7 2 4 6 6 ...
##  $ Valu_Percp_2: int  2 7 7 5 5 7 2 4 6 6 ...
##  $ Pur_Proces_1: int  6 7 7 5 6 7 2 4 6 6 ...
##  $ Pur_Proces_2: int  4 6 7 4 7 7 6 4 6 6 ...
##  $ Residence   : int  2 2 1 2 1 2 2 1 2 1 ...
##  $ Pay_Meth    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Collision" ...
##  $ Gender      : chr  "Male" "Male" "Male" "Male" ...
##  $ Age         : int  18 18 19 19 19 19 19 21 21 21 ...
##  $ Education   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ X           : logi  NA NA NA NA NA NA ...

head(car1, n = 5)

##   Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1     6     6     6     6         5         6         3     3     3
## 2 Res2     7     5     5     2         2         6         7     5     5
## 3 Res3     7     7     7     5         5         5         3     6     6
## 4 Res4     4     1     1     1         1         1         1     7     7
## 5 Res5     6     6     6     5         5         2         1     7     7
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            7            7            6
## 3          7          7            5            7            7            7
## 4          3          3            6            5            5            4
## 5          7          6            6            5            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education  X
## 1         2        2  Collision   Male  18         2 NA
## 2         2        2  Collision   Male  18         2 NA
## 3         1        2  Collision   Male  19         2 NA
## 4         2        2  Collision   Male  19         2 NA
## 5         1        2  Collision Female  19         2 NA

View(car1)

str(car2)

## 'data.frame':    1049 obs. of  9 variables:
##  $ Respondents: chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Region     : chr  "European" "European" "European" "European" ...
##  $ Model      : chr  "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
##  $ MPG        : int  15 15 15 15 15 15 15 15 15 15 ...
##  $ Cyl        : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ acc1       : num  5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
##  $ C_cost.    : num  16 16 16 16 16 16 16 16 16 16 ...
##  $ H_Cost     : num  14 14 14 14 14 14 14 14 14 14 ...
##  $ Post.Satis : int  4 3 5 5 5 3 3 6 3 5 ...

head(car2, n = 5)

##   Respondents   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1        Res1 European Ford Expedition  15   8  5.5      16     14          4
## 2        Res2 European Ford Expedition  15   8  5.5      16     14          3
## 3        Res3 European Ford Expedition  15   8  5.5      16     14          5
## 4        Res4 European Ford Expedition  15   8  5.5      16     14          5
## 5        Res5 European Ford Expedition  15   8  5.5      16     14          5

View(car2)

# Rename first column of car2
names(car2)[1] <- "Resp"
head(car2, n = 1)

##   Resp   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition  15   8  5.5      16     14          4

# Merge the datasets by "Resp"
Car_Total <- merge(car1, car2, by = "Resp")
str(Car_Total)

## 'data.frame':    1049 obs. of  31 variables:
##  $ Resp        : chr  "Res1" "Res10" "Res100" "Res1000" ...
##  $ Att_1       : int  6 6 6 6 6 3 2 7 2 6 ...
##  $ Att_2       : int  6 6 7 6 6 1 2 7 1 6 ...
##  $ Enj_1       : int  6 4 7 7 7 4 1 7 2 6 ...
##  $ Enj_2       : int  6 4 3 6 6 3 2 6 1 5 ...
##  $ Perform_1   : int  5 4 5 6 6 5 2 5 2 5 ...
##  $ Perform_2   : int  6 4 6 6 6 6 2 6 2 5 ...
##  $ Perform_3   : int  3 1 6 6 6 6 1 5 2 5 ...
##  $ WOM_1       : int  3 5 3 6 4 2 6 6 7 3 ...
##  $ WOM_2       : int  3 6 5 6 4 6 7 6 7 3 ...
##  $ Futu_Pur_1  : int  3 6 6 6 4 6 6 6 7 6 ...
##  $ Futu_Pur_2  : int  3 6 6 6 6 6 5 7 7 6 ...
##  $ Valu_Percp_1: int  5 6 7 4 5 5 4 6 4 5 ...
##  $ Valu_Percp_2: int  2 6 6 6 6 4 4 5 6 6 ...
##  $ Pur_Proces_1: int  6 6 5 6 6 5 4 5 6 6 ...
##  $ Pur_Proces_2: int  4 6 5 3 7 5 5 5 7 5 ...
##  $ Residence   : int  2 1 2 2 1 1 1 2 1 2 ...
##  $ Pay_Meth    : int  2 2 1 3 3 3 3 3 3 3 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Liability" ...
##  $ Gender      : chr  "Male" "Male" "Female" "Female" ...
##  $ Age         : int  18 21 32 24 24 25 26 26 27 27 ...
##  $ Education   : int  2 2 1 2 2 2 2 2 2 2 ...
##  $ X           : logi  NA NA NA NA NA NA ...
##  $ Region      : chr  "European" "European" "American" "Asian" ...
##  $ Model       : chr  "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
##  $ MPG         : int  15 15 24 26 26 26 26 26 26 26 ...
##  $ Cyl         : int  8 8 4 4 4 4 4 4 4 4 ...
##  $ acc1        : num  5.5 5.5 8.2 8 8 8 8 8 8 8 ...
##  $ C_cost.     : num  16 16 10 7 7 7 7 7 7 7 ...
##  $ H_Cost      : num  14 14 8 6 6 6 6 6 6 6 ...
##  $ Post.Satis  : int  4 5 4 6 5 6 5 6 7 6 ...

# Write merged data to CSV
write.csv(Car_Total, "Car_Total.csv", row.names = FALSE)
View(Car_Total)

# Checking and replacing missing (NA) values
summary(Car_Total)

##      Resp               Att_1           Att_2           Enj_1      
##  Length:1049        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.000  
##  Mode  :character   Median :6.000   Median :6.000   Median :6.000  
##                     Mean   :4.882   Mean   :5.287   Mean   :5.378  
##                     3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000  
##                     Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                     NA's   :4                       NA's   :4      
##      Enj_2         Perform_1       Perform_2       Perform_3    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :4.575   Mean   :4.947   Mean   :4.831   Mean   :4.217  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##  NA's   :4       NA's   :2       NA's   :4       NA's   :1      
##      WOM_1           WOM_2        Futu_Pur_1      Futu_Pur_2     Valu_Percp_1  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:4.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :6.000   Median :6.00   Median :6.000   Median :6.000   Median :6.000  
##  Mean   :5.286   Mean   :5.35   Mean   :5.321   Mean   :5.371   Mean   :5.411  
##  3rd Qu.:7.000   3rd Qu.:6.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.00   Max.   :9.000   Max.   :7.000   Max.   :7.000  
##  NA's   :1       NA's   :3      NA's   :5       NA's   :2       NA's   :4      
##   Valu_Percp_2    Pur_Proces_1    Pur_Proces_2     Residence    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :5.000   Median :6.000   Median :5.000   Median :1.000  
##  Mean   :5.114   Mean   :5.256   Mean   :4.923   Mean   :1.474  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :5.000  
##  NA's   :1       NA's   :3       NA's   :4       NA's   :5      
##     Pay_Meth      Insur_Type           Gender               Age       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :18.00  
##  1st Qu.:1.000   Class :character   Class :character   1st Qu.:23.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :34.00  
##  Mean   :2.153                                         Mean   :35.22  
##  3rd Qu.:3.000                                         3rd Qu.:48.00  
##  Max.   :3.000                                         Max.   :60.00  
##                                                                       
##    Education        X              Region             Model          
##  Min.   :1.000   Mode:logical   Length:1049        Length:1049       
##  1st Qu.:2.000   NA's:1049      Class :character   Class :character  
##  Median :2.000                  Mode  :character   Mode  :character  
##  Mean   :1.989                                                       
##  3rd Qu.:2.000                                                       
##  Max.   :3.000                                                       
##                                                                      
##       MPG             Cyl           acc1          C_cost.          H_Cost      
##  Min.   :14.00   Min.   :4.0   Min.   :3.600   Min.   : 7.00   Min.   : 6.000  
##  1st Qu.:17.00   1st Qu.:4.0   1st Qu.:5.100   1st Qu.:10.00   1st Qu.: 8.000  
##  Median :19.00   Median :6.0   Median :6.500   Median :12.00   Median :10.000  
##  Mean   :19.58   Mean   :5.8   Mean   :6.202   Mean   :11.35   Mean   : 9.634  
##  3rd Qu.:22.00   3rd Qu.:6.0   3rd Qu.:7.500   3rd Qu.:13.00   3rd Qu.:11.000  
##  Max.   :26.00   Max.   :8.0   Max.   :8.500   Max.   :16.00   Max.   :14.000  
##                                                                                
##    Post.Satis  
##  Min.   :2.00  
##  1st Qu.:5.00  
##  Median :6.00  
##  Mean   :5.28  
##  3rd Qu.:6.00  
##  Max.   :7.00  
##

# Compute mean value of Att_1
mean(Car_Total$Att_1)

## [1] NA

# Replace NA values with column mean for numeric columns
meanAtt1 <- mean(Car_Total$Att_1, na.rm = TRUE)
Car_Total[] <- lapply(Car_Total, function(x) {
  if (is.numeric(x)) {
    x[is.na(x)] <- mean(x, na.rm = TRUE)
  }
  return(x)
})

# Checking the structure after replacing NAs
summary(Car_Total)

##      Resp               Att_1           Att_2           Enj_1      
##  Length:1049        Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  Class :character   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:5.000  
##  Mode  :character   Median :5.000   Median :6.000   Median :6.000  
##                     Mean   :4.882   Mean   :5.287   Mean   :5.378  
##                     3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:7.000  
##                     Max.   :7.000   Max.   :7.000   Max.   :7.000  
##      Enj_2         Perform_1       Perform_2       Perform_3    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :4.575   Mean   :4.947   Mean   :4.831   Mean   :4.217  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##      WOM_1           WOM_2        Futu_Pur_1      Futu_Pur_2     Valu_Percp_1  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:5.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :6.000   Median :6.00   Median :6.000   Median :6.000   Median :6.000  
##  Mean   :5.286   Mean   :5.35   Mean   :5.321   Mean   :5.371   Mean   :5.411  
##  3rd Qu.:7.000   3rd Qu.:6.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.00   Max.   :9.000   Max.   :7.000   Max.   :7.000  
##   Valu_Percp_2    Pur_Proces_1    Pur_Proces_2     Residence    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :5.000   Median :6.000   Median :5.000   Median :1.000  
##  Mean   :5.114   Mean   :5.256   Mean   :4.923   Mean   :1.474  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :5.000  
##     Pay_Meth      Insur_Type           Gender               Age       
##  Min.   :1.000   Length:1049        Length:1049        Min.   :18.00  
##  1st Qu.:1.000   Class :character   Class :character   1st Qu.:23.00  
##  Median :2.000   Mode  :character   Mode  :character   Median :34.00  
##  Mean   :2.153                                         Mean   :35.22  
##  3rd Qu.:3.000                                         3rd Qu.:48.00  
##  Max.   :3.000                                         Max.   :60.00  
##    Education        X              Region             Model          
##  Min.   :1.000   Mode:logical   Length:1049        Length:1049       
##  1st Qu.:2.000   NA's:1049      Class :character   Class :character  
##  Median :2.000                  Mode  :character   Mode  :character  
##  Mean   :1.989                                                       
##  3rd Qu.:2.000                                                       
##  Max.   :3.000                                                       
##       MPG             Cyl           acc1          C_cost.          H_Cost      
##  Min.   :14.00   Min.   :4.0   Min.   :3.600   Min.   : 7.00   Min.   : 6.000  
##  1st Qu.:17.00   1st Qu.:4.0   1st Qu.:5.100   1st Qu.:10.00   1st Qu.: 8.000  
##  Median :19.00   Median :6.0   Median :6.500   Median :12.00   Median :10.000  
##  Mean   :19.58   Mean   :5.8   Mean   :6.202   Mean   :11.35   Mean   : 9.634  
##  3rd Qu.:22.00   3rd Qu.:6.0   3rd Qu.:7.500   3rd Qu.:13.00   3rd Qu.:11.000  
##  Max.   :26.00   Max.   :8.0   Max.   :8.500   Max.   :16.00   Max.   :14.000  
##    Post.Satis  
##  Min.   :2.00  
##  1st Qu.:5.00  
##  Median :6.00  
##  Mean   :5.28  
##  3rd Qu.:6.00  
##  Max.   :7.00

# Create new column for Att_Mean by taking the row means of Att_1 and Att_2
Car_Total <- Car_Total %>%
  mutate(Att_Mean = rowMeans(select(., Att_1, Att_2), na.rm = TRUE))

# Alternatively, calculate Att_Mean by adding Att_1 and Att_2 and dividing by 2
Car_Total$Att_Mean <- (Car_Total$Att_1 + Car_Total$Att_2) / 2
View(Car_Total)

# Frequency count of cars by region
ggplot(Car_Total, aes(x = Region, fill = Region)) +
  theme_bw() +
  geom_bar() +
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = 0) +
  labs(y = "Number of Cars", x = "Region", title = "Number of Cars by Region")

# Print car region percentage
car_region_percentage <- prop.table(table(Car_Total$Region))
print(car_region_percentage)

## 
##       American          Asian       European Middle Eastern 
##      0.3431840      0.1982841      0.2001907      0.2583413

# Define Model and Region as categorical variables
Car_Total$Model <- as.factor(Car_Total$Model)
Car_Total$Region <- as.factor(Car_Total$Region)

# Graph number of cars by model and region
ggplot(Car_Total, aes(x = Region, fill = Model)) + 
  theme_bw() +
  geom_bar() +
  labs(y = "Number of Cars", title = "Number of Cars by Model and Region")

# Split the Model column into Brand and Model
cars_separated <- Car_Total %>%
  separate(Model, into = c("Brand", "Model"), sep = " ", extra = "merge")

# Add the split columns to the Car_Total dataset
Car_Total <- cars_separated
View(Car_Total)

# Create a graph for cars by brand across regions
ggplot(Car_Total, aes(x = Region, fill = Brand)) +
  theme_bw() +
  geom_bar(position = "stack") +  # 'stack' will show stacked bars for each region with different brands
  labs(y = "Number of Cars", 
       x = "Region", 
       title = "Number of Cars by Brand across Region") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Count the total number of cars by Brand
count(Car_Total, Car_Total$Make, Car_Total$Brand, name = "Freq")

##    Car_Total$Brand Freq
## 1            Buick   31
## 2        Chevrolet   64
## 3         Chrysler  169
## 4            Dodge   41
## 5             Fiat   18
## 6             Ford  202
## 7            Honda  159
## 8              Kia   34
## 9          Lincoln   39
## 10          Toyota  292

# Create a crosstabulation table for Brand and Region
xtabs(~ Region + Brand, Car_Total)

##                 Brand
## Region           Buick Chevrolet Chrysler Dodge Fiat Ford Honda Kia Lincoln
##   American          17        22       54    22    9   81    38  15       0
##   Asian              0         0       24     0    0   24    58   0       0
##   European          10        21       13     0    9   29    29  19       0
##   Middle Eastern     4        21       78    19    0   68    34   0      39
##                 Brand
## Region           Toyota
##   American          102
##   Asian             102
##   European           80
##   Middle Eastern      8

# Count the total number of cars by Brand across Region
brand_region_counts <- table(Car_Total$Brand, Car_Total$Region)
print(brand_region_counts)

##            
##             American Asian European Middle Eastern
##   Buick           17     0       10              4
##   Chevrolet       22     0       21             21
##   Chrysler        54    24       13             78
##   Dodge           22     0        0             19
##   Fiat             9     0        9              0
##   Ford            81    24       29             68
##   Honda           38    58       29             34
##   Kia             15     0       19              0
##   Lincoln          0     0        0             39
##   Toyota         102   102       80              8

# Attitude toward car brand by region (average Att_1 by Brand and Region)
brand_region_table <- aggregate(Att_1 ~ Brand + Region, Car_Total, mean)
print(brand_region_table)

##        Brand         Region    Att_1
## 1      Buick       American 5.529412
## 2  Chevrolet       American 5.227273
## 3   Chrysler       American 5.011979
## 4      Dodge       American 4.818182
## 5       Fiat       American 4.666667
## 6       Ford       American 4.567901
## 7      Honda       American 5.657895
## 8        Kia       American 3.266667
## 9     Toyota       American 5.254902
## 10  Chrysler          Asian 5.291667
## 11      Ford          Asian 4.166667
## 12     Honda          Asian 5.568966
## 13    Toyota          Asian 4.881199
## 14     Buick       European 5.000000
## 15 Chevrolet       European 5.000000
## 16  Chrysler       European 5.230769
## 17      Fiat       European 3.777778
## 18      Ford       European 5.241379
## 19     Honda       European 4.758621
## 20       Kia       European 3.789474
## 21    Toyota       European 4.800000
## 22     Buick Middle Eastern 6.000000
## 23 Chevrolet Middle Eastern 5.904762
## 24  Chrysler Middle Eastern 4.038462
## 25     Dodge Middle Eastern 4.473684
## 26      Ford Middle Eastern 4.014706
## 27     Honda Middle Eastern 5.500000
## 28   Lincoln Middle Eastern 5.692308
## 29    Toyota Middle Eastern 5.375000

# Filter for Toyota and plot graph for its attitude across regions
Toyota_Att1_Mean <- Car_Total %>%
  filter(grepl("Toyota", Brand, ignore.case = TRUE))

ggplot(Toyota_Att1_Mean, aes(x = Region, fill = Brand)) +
  theme_bw() +
  geom_bar(position = "stack") +
  labs(y = "Number of Cars", 
       x = "Region", 
       title = "Number of Toyota Cars across Regions") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Create a line graph for Toyota attitude mean by region
ggplot(brand_region_table, aes(x = Region, y = Att_1, group = Brand)) +
  geom_line(aes(color = Brand)) +
  geom_point(aes(color = Brand)) +
  labs(y = "Att_Mean", title = "Attitude Mean by Brand and Region")

# Filter Toyota's attitude data for further analysis
Toyota_Att1_Mean <- brand_region_table %>%
  filter(Brand == "Toyota")
print(Toyota_Att1_Mean)

##    Brand         Region    Att_1
## 1 Toyota       American 5.254902
## 2 Toyota          Asian 4.881199
## 3 Toyota       European 4.800000
## 4 Toyota Middle Eastern 5.375000

# Filter for attitude values between 3 and 6 for Toyota
Toyota_Att1_Mean <- Toyota_Att1_Mean %>%
  filter(Att_1 >= 3 & Att_1 <= 6)
head(Toyota_Att1_Mean)

##    Brand         Region    Att_1
## 1 Toyota       American 5.254902
## 2 Toyota          Asian 4.881199
## 3 Toyota       European 4.800000
## 4 Toyota Middle Eastern 5.375000

# Create a graph for Toyota attitude mean with y-axis limit set between 3 and 6
ggplot(Toyota_Att1_Mean, aes(x = Region, y = Att_1, group = Brand)) +
  geom_line(aes(color = Brand)) +
  geom_point(aes(color = Brand)) +
  scale_y_continuous(limits = c(3, 6)) +
  labs(y = "Att_1 Mean", title = "Attitude Mean for Toyota by Region")

# Filter for competing brands (Toyota vs. Honda) and comparing attitudes
Multiple_Att1_Mean <- brand_region_table %>%
  filter(Brand == "Toyota" | Brand == "Honda")

# View the filtered data
print(Multiple_Att1_Mean)

##    Brand         Region    Att_1
## 1  Honda       American 5.657895
## 2 Toyota       American 5.254902
## 3  Honda          Asian 5.568966
## 4 Toyota          Asian 4.881199
## 5  Honda       European 4.758621
## 6 Toyota       European 4.800000
## 7  Honda Middle Eastern 5.500000
## 8 Toyota Middle Eastern 5.375000

# Filter for attitude values between 3 and 6 for competing brands
Multiple_Att1_Mean <- Multiple_Att1_Mean %>%
  filter(Att_1 >= 3 & Att_1 <= 6)

# Create a graph comparing Toyota vs Honda attitudes across regions
ggplot(Multiple_Att1_Mean, aes(Region, y = Att_1, group = Brand)) +
  geom_line(aes(color = Brand)) +
  geom_point(aes(color = Brand)) +
  scale_y_continuous(limits = c(3, 6)) +
  ggtitle("Attitude Mean for Toyota and Honda by Region") +
  ylab("Att_1 Mean")

# Demographic Groups by Age (Age converted into 3 groups)
Car_Total$AgeGrp <- cut(Car_Total$Age, 
                        breaks = c(0, 30, 50, Inf), 
                        labels = c("Young Adults", "Adults", "Mature Adults"),
                        right = FALSE)

# View the updated dataset structure
names(Car_Total)

##  [1] "Resp"         "Att_1"        "Att_2"        "Enj_1"        "Enj_2"       
##  [6] "Perform_1"    "Perform_2"    "Perform_3"    "WOM_1"        "WOM_2"       
## [11] "Futu_Pur_1"   "Futu_Pur_2"   "Valu_Percp_1" "Valu_Percp_2" "Pur_Proces_1"
## [16] "Pur_Proces_2" "Residence"    "Pay_Meth"     "Insur_Type"   "Gender"      
## [21] "Age"          "Education"    "X"            "Region"       "Brand"       
## [26] "Model"        "MPG"          "Cyl"          "acc1"         "C_cost."     
## [31] "H_Cost"       "Post.Satis"   "Att_Mean"     "AgeGrp"

head(Car_Total, n = 5)

##      Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1    Res1     6     6     6     6         5         6         3     3     3
## 2   Res10     6     6     4     4         4         4         1     5     6
## 3  Res100     6     7     7     3         5         6         6     3     5
## 4 Res1000     6     6     7     6         6         6         6     6     6
## 5 Res1001     6     6     7     6         6         6         6     4     4
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            6            6            6
## 3          6          6            7            6            5            5
## 4          6          6            4            6            6            3
## 5          4          6            5            6            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education  X   Region  Brand
## 1         2        2  Collision   Male  18         2 NA European   Ford
## 2         1        2  Collision   Male  21         2 NA European   Ford
## 3         2        1  Collision Female  32         1 NA American Toyota
## 4         2        3  Liability Female  24         2 NA    Asian Toyota
## 5         1        3  Liability Female  24         2 NA    Asian Toyota
##        Model MPG Cyl acc1 C_cost. H_Cost Post.Satis Att_Mean       AgeGrp
## 1 Expedition  15   8  5.5      16     14          4      6.0 Young Adults
## 2 Expedition  15   8  5.5      16     14          5      6.0 Young Adults
## 3       Rav4  24   4  8.2      10      8          4      6.5       Adults
## 4    Corolla  26   4  8.0       7      6          6      6.0 Young Adults
## 5    Corolla  26   4  8.0       7      6          5      6.0 Young Adults

# Age group analysis for Toyota across regions
filtered_data_Toyota <- Car_Total %>%
  filter(Brand == "Toyota")

# Create a graph showing the number of Toyota cars by age group for each region
ggplot(filtered_data_Toyota, aes(x = Region, fill = AgeGrp)) +
  theme_bw() +
  geom_bar() +
  labs(y = "Number of Cars", 
       title = "Number of Toyota Cars by Age Group and Region")

Project 1

Kaitlyn

2025-02-02