Setup working directory

getwd()
## [1] "C:/Users/LMNig/OneDrive/Documents/School"
setwd("C:/Users/LMNig/OneDrive/Documents/School")

Survey Overview

# Load data file using import dataset

## Import car_survey_1 using "Import Dataset" function
## Import car_survey_2 using "Import Dataset" function

# Load data file using read.csv
## Using read.csv

# Read excel.csv file (save excel file as CSV UTF-8
Car1<-read.csv("C:/Users/LMNig/OneDrive/Documents/School/Car_Survey_1a.csv")
## Display the structure of Car1  (data frame)
str(Car1)
## 'data.frame':    1049 obs. of  22 variables:
##  $ Resp        : chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Att_1       : int  6 7 7 4 6 6 1 6 3 6 ...
##  $ Att_2       : int  6 5 7 1 6 6 1 5 2 6 ...
##  $ Enj_1       : int  6 5 7 1 6 6 1 5 3 4 ...
##  $ Enj_2       : int  6 2 5 1 5 5 1 3 2 4 ...
##  $ Perform_1   : int  5 2 5 1 5 5 2 5 2 4 ...
##  $ Perform_2   : int  6 6 5 1 2 5 2 5 3 4 ...
##  $ Perform_3   : int  3 7 3 1 1 7 2 2 1 1 ...
##  $ WOM_1       : int  3 5 6 7 7 5 2 4 6 5 ...
##  $ WOM_2       : int  3 5 6 7 7 5 3 6 6 6 ...
##  $ Futu_Pur_1  : int  3 6 7 3 7 7 5 4 7 6 ...
##  $ Futu_Pur_2  : int  3 6 7 3 6 7 2 4 7 6 ...
##  $ Valu_Percp_1: int  5 6 5 6 6 7 2 4 6 6 ...
##  $ Valu_Percp_2: int  2 7 7 5 5 7 2 4 6 6 ...
##  $ Pur_Proces_1: int  6 7 7 5 6 7 2 4 6 6 ...
##  $ Pur_Proces_2: int  4 6 7 4 7 7 6 4 6 6 ...
##  $ Residence   : int  2 2 1 2 1 2 2 1 2 1 ...
##  $ Pay_Meth    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Collision" ...
##  $ Gender      : chr  "Male" "Male" "Male" "Male" ...
##  $ Age         : int  18 18 19 19 19 19 19 21 21 21 ...
##  $ Education   : int  2 2 2 2 2 2 2 2 2 2 ...
## Display the first few rows of Car1(data frame)
head(Car1, n = 5)
##   Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1     6     6     6     6         5         6         3     3     3
## 2 Res2     7     5     5     2         2         6         7     5     5
## 3 Res3     7     7     7     5         5         5         3     6     6
## 4 Res4     4     1     1     1         1         1         1     7     7
## 5 Res5     6     6     6     5         5         2         1     7     7
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            7            7            6
## 3          7          7            5            7            7            7
## 4          3          3            6            5            5            4
## 5          7          6            6            5            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education
## 1         2        2  Collision   Male  18         2
## 2         2        2  Collision   Male  18         2
## 3         1        2  Collision   Male  19         2
## 4         2        2  Collision   Male  19         2
## 5         1        2  Collision Female  19         2
# Read excel.csv file (save excel file as CSV UTF-8
Car2<-read.csv("C:/Users/LMNig/OneDrive/Documents/School/Car_Survey_2b.csv")
## Display the structure of Car1 (data frame)
str(Car2)
## 'data.frame':    1049 obs. of  9 variables:
##  $ Respondents: chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Region     : chr  "European" "European" "European" "European" ...
##  $ Model      : chr  "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
##  $ MPG        : int  15 15 15 15 15 15 15 15 15 15 ...
##  $ Cyl        : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ acc1       : num  5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
##  $ C_cost.    : num  16 16 16 16 16 16 16 16 16 16 ...
##  $ H_Cost     : num  14 14 14 14 14 14 14 14 14 14 ...
##  $ Post.Satis : int  4 3 5 5 5 3 3 6 3 5 ...
## Display the first few rows of Car1 (data frame)
head(Car2,n=5)
##   Respondents   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1        Res1 European Ford Expedition  15   8  5.5      16     14          4
## 2        Res2 European Ford Expedition  15   8  5.5      16     14          3
## 3        Res3 European Ford Expedition  15   8  5.5      16     14          5
## 4        Res4 European Ford Expedition  15   8  5.5      16     14          5
## 5        Res5 European Ford Expedition  15   8  5.5      16     14          5

Merging Data

# Create a Master Dataset (combined car_survey1a and car_survey_2b)

# Rename unique ID in Car2 to match with Car1
## Renaming the first column in Car2 as "Resp"
## (to match with the first column name in Car1)
names(Car2)[1]<-c("Resp")
head(Car2, n=1)
##   Resp   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition  15   8  5.5      16     14          4
# Merge Car_Survey_1a and Car_Survey_2b into one dataset
Car_Total<-merge(Car1,Car2, by="Resp")
str(Car_Total)
## 'data.frame':    1049 obs. of  30 variables:
##  $ Resp        : chr  "Res1" "Res10" "Res100" "Res1000" ...
##  $ Att_1       : int  6 6 6 6 6 3 2 7 2 6 ...
##  $ Att_2       : int  6 6 7 6 6 1 2 7 1 6 ...
##  $ Enj_1       : int  6 4 7 7 7 4 1 7 2 6 ...
##  $ Enj_2       : int  6 4 3 6 6 3 2 6 1 5 ...
##  $ Perform_1   : int  5 4 5 6 6 5 2 5 2 5 ...
##  $ Perform_2   : int  6 4 6 6 6 6 2 6 2 5 ...
##  $ Perform_3   : int  3 1 6 6 6 6 1 5 2 5 ...
##  $ WOM_1       : int  3 5 3 6 4 2 6 6 7 3 ...
##  $ WOM_2       : int  3 6 5 6 4 6 7 6 7 3 ...
##  $ Futu_Pur_1  : int  3 6 6 6 4 6 6 6 7 6 ...
##  $ Futu_Pur_2  : int  3 6 6 6 6 6 5 7 7 6 ...
##  $ Valu_Percp_1: int  5 6 7 4 5 5 4 6 4 5 ...
##  $ Valu_Percp_2: int  2 6 6 6 6 4 4 5 6 6 ...
##  $ Pur_Proces_1: int  6 6 5 6 6 5 4 5 6 6 ...
##  $ Pur_Proces_2: int  4 6 5 3 7 5 5 5 7 5 ...
##  $ Residence   : int  2 1 2 2 1 1 1 2 1 2 ...
##  $ Pay_Meth    : int  2 2 1 3 3 3 3 3 3 3 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Liability" ...
##  $ Gender      : chr  "Male" "Male" "Female" "Female" ...
##  $ Age         : int  18 21 32 24 24 25 26 26 27 27 ...
##  $ Education   : int  2 2 1 2 2 2 2 2 2 2 ...
##  $ Region      : chr  "European" "European" "American" "Asian" ...
##  $ Model       : chr  "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
##  $ MPG         : int  15 15 24 26 26 26 26 26 26 26 ...
##  $ Cyl         : int  8 8 4 4 4 4 4 4 4 4 ...
##  $ acc1        : num  5.5 5.5 8.2 8 8 8 8 8 8 8 ...
##  $ C_cost.     : num  16 16 10 7 7 7 7 7 7 7 ...
##  $ H_Cost      : num  14 14 8 6 6 6 6 6 6 6 ...
##  $ Post.Satis  : int  4 5 4 6 5 6 5 6 7 6 ...
head(Car_Total)
##      Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1    Res1     6     6     6     6         5         6         3     3     3
## 2   Res10     6     6     4     4         4         4         1     5     6
## 3  Res100     6     7     7     3         5         6         6     3     5
## 4 Res1000     6     6     7     6         6         6         6     6     6
## 5 Res1001     6     6     7     6         6         6         6     4     4
## 6 Res1002     3     1     4     3         5         6         6     2     6
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            6            6            6
## 3          6          6            7            6            5            5
## 4          6          6            4            6            6            3
## 5          4          6            5            6            6            7
## 6          6          6            5            4            5            5
##   Residence Pay_Meth Insur_Type Gender Age Education   Region           Model
## 1         2        2  Collision   Male  18         2 European Ford Expedition
## 2         1        2  Collision   Male  21         2 European Ford Expedition
## 3         2        1  Collision Female  32         1 American     Toyota Rav4
## 4         2        3  Liability Female  24         2    Asian  Toyota Corolla
## 5         1        3  Liability Female  24         2    Asian  Toyota Corolla
## 6         1        3  Liability Female  25         2    Asian  Toyota Corolla
##   MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1  15   8  5.5      16     14          4
## 2  15   8  5.5      16     14          5
## 3  24   4  8.2      10      8          4
## 4  26   4  8.0       7      6          6
## 5  26   4  8.0       7      6          5
## 6  26   4  8.0       7      6          6
# Save the merged data (Car_Total) to a file
## save as CSV

write.csv(Car_Total, "Car_Total", row.names=FALSE)
View(Car_Total)
# Using readxl Package
## Load the package
library(readxl)

# Replace "file_path.xlsx" with the path to your file
Car_data_1 <-read_excel("C:/Users/LMNig/OneDrive/Documents/School/Copy of Car_Survey_1.xlsx")
Car_data_2 <-read_excel ("C:/Users/LMNig/OneDrive/Documents/School/Copy of Car_Survey_2.xlsx")
#Summary of Key Analysis Variables

# Create a summary of the selected variables
summary(Car_Total[c("Att_1", "Att_2")])
##      Att_1           Att_2      
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.000  
##  Median :6.000   Median :6.000  
##  Mean   :4.882   Mean   :5.287  
##  3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000  
##  NA's   :4
# Create a summary of the selected variables
summary(Car_Total[c("Valu_Percp_1", "Valu_Percp_2")])
##   Valu_Percp_1    Valu_Percp_2  
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:5.000   1st Qu.:4.000  
##  Median :6.000   Median :5.000  
##  Mean   :5.411   Mean   :5.114  
##  3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000  
##  NA's   :4       NA's   :1
# Create a summary of the selected variables
summary(Car_Total[c("WOM_1", "WOM_2")])
##      WOM_1           WOM_2     
##  Min.   :1.000   Min.   :1.00  
##  1st Qu.:4.000   1st Qu.:4.00  
##  Median :6.000   Median :6.00  
##  Mean   :5.286   Mean   :5.35  
##  3rd Qu.:7.000   3rd Qu.:6.00  
##  Max.   :7.000   Max.   :7.00  
##  NA's   :1       NA's   :3
# Create a summary of the selected variables
summary(Car_Total[c("Futu_Pur_1", "Futu_Pur_2")])
##    Futu_Pur_1      Futu_Pur_2   
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000  
##  Median :6.000   Median :6.000  
##  Mean   :5.321   Mean   :5.371  
##  3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :9.000   Max.   :7.000  
##  NA's   :5       NA's   :2
# Create a summary of the selected variables
summary(Car_Total[c("Post.Satis")])
##    Post.Satis  
##  Min.   :2.00  
##  1st Qu.:5.00  
##  Median :6.00  
##  Mean   :5.28  
##  3rd Qu.:6.00  
##  Max.   :7.00

Replacing Na Values

# Count NA values before replacement
na_counts_before <- colSums(is.na(Car_Total))
cat("NA counts before replacement:\n")
## NA counts before replacement:
print(na_counts_before)
##         Resp        Att_1        Att_2        Enj_1        Enj_2    Perform_1 
##            0            4            0            4            4            2 
##    Perform_2    Perform_3        WOM_1        WOM_2   Futu_Pur_1   Futu_Pur_2 
##            4            1            1            3            5            2 
## Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2    Residence     Pay_Meth 
##            4            1            3            4            5            0 
##   Insur_Type       Gender          Age    Education       Region        Model 
##            0            0            0            0            0            0 
##          MPG          Cyl         acc1      C_cost.       H_Cost   Post.Satis 
##            0            0            0            0            0            0
# Identifying numeric columns
numeric_cols <- sapply(Car_Total, is.numeric)

# Replacing NA values with the calculated means for all numeric columns
Car_Total[, numeric_cols] <- lapply(Car_Total[, numeric_cols], function(x) {
  mean_val <- mean(x, na.rm = TRUE)   
  x[is.na(x)] <- mean_val              
  return(x)                            
})

#Check for any remaining NA's
any(is.na(Car_Total)) #False=No remaining Na values
## [1] FALSE

Manipulating Data

Creation of “Parent” grouping for each car make and model.

#Group by Car Make
library(stringr) #import library

#Seperate model col into two, delimit using space
Car_Total [c('Make', 'Model_v1')] <- str_split_fixed(Car_Total$Model, " ", 2)
#See the two new columns ("Make", and "Make_v1" in Car_Total data file)
View(Car_Total)

#Group by Parent Company
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Car_Total <- Car_Total %>% #Call dataframe and create new column with new groupings
mutate(Parent = case_when(Make == "Buick" ~ "General Motors",
                          Make == "Chevrolet" ~ "General Motors",
                          Make == "Chrysler" ~ "Chrysler",
                          Make == "Dodge" ~ "Chrysler",
                          Make == "Fiat" ~ "Chrysler",
                          Make == "Ford" ~ "Ford",
                          Make == "Honda" ~ "Honda",
                          Make == "Kia" ~ "Kia",
                          Make == "Lincoln" ~ "Ford",
                          Make == "Toyota" ~ "Toyota",
                          TRUE ~ "Check"))
                      
#Check if the grouping is correct
count(Car_Total, Car_Total$Make, Car_Total$Parent, name = "Freq")
##    Car_Total$Make Car_Total$Parent Freq
## 1           Buick   General Motors   31
## 2       Chevrolet   General Motors   64
## 3        Chrysler         Chrysler  169
## 4           Dodge         Chrysler   41
## 5            Fiat         Chrysler   18
## 6            Ford             Ford  202
## 7           Honda            Honda  159
## 8             Kia              Kia   34
## 9         Lincoln             Ford   39
## 10         Toyota           Toyota  292
table(Car_Total$Make)
## 
##     Buick Chevrolet  Chrysler     Dodge      Fiat      Ford     Honda       Kia 
##        31        64       169        41        18       202       159        34 
##   Lincoln    Toyota 
##        39       292

Age Categories

# Convert Age to numeric
Car_Total$Age <- as.numeric(as.character(Car_Total$Age))

# Create age categories... 1=(18-29yr), 2=(30-49yr), 3=(50yr+)
Car_Total$Age_Category <- cut(Car_Total$Age,
                               breaks = c(17, 29, 49, Inf), 
                               labels = c("1", "2", "3"),
                               right = TRUE)  

# Result
table(Car_Total$Age_Category) 
## 
##   1   2   3 
## 443 375 231

Mutate

# Creation of new variables using the average of each

##Average of Att_1 and Att_2 to create new variable "Attitude"
Car_Total$Attitude <- rowMeans(Car_Total[, c("Att_1", "Att_2")], na.rm = TRUE)

# Result
summary(Car_Total$Attitude)  
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.000   5.000   5.085   6.000   7.000
##Average of Futu_Pur_1 and Futu_Pur_2 to create new variable "FPI"
Car_Total$FPI <- rowMeans(Car_Total[, c("Futu_Pur_1", "Futu_Pur_2")], na.rm = TRUE)

# Result
summary(Car_Total$FPI) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.500   6.000   5.346   6.000   7.000
##Average of Valu_Percp_1 and Valu_Percp_2 to create new variable "ValuePercp"
Car_Total$ValuePercp <- rowMeans(Car_Total[, c("Valu_Percp_1", "Valu_Percp_2")], na.rm = TRUE)

# Result
summary(Car_Total$ValuePercp) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.500   5.500   5.263   6.000   7.000
##Average of WOM_1 and WOM_2 to create new variable "WOM"
Car_Total$WOM <- rowMeans(Car_Total[, c("WOM_1", "WOM_2")], na.rm = TRUE)

# Result
summary(Car_Total$WOM)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.500   5.500   5.318   6.500   7.000
# Calculate the average of Post.Satis to create new variable "satisfactionmean"
Satisfactionmean <- mean(Car_Total$Post.Satis, na.rm = TRUE)

# Display the result
print(Satisfactionmean)
## [1] 5.280267

Market Segmentation and Target Audience

# Group by Car Make
library(stringr)

# Seperate model col into two, delimit using space
Car_Total [c('Make', 'Model_v1')] <- str_split_fixed(Car_Total$Model, " ", 2)
#See the two new columns ("Make", and "Make_v1" in Car_Total data file)
View(Car_Total)

library(ggplot2)
library(dplyr)

# Age Distribution for Toyota Car Models

## Filter for specific Toyota models
toyota_data <- Car_Total %>%
  filter(Model_v1 %in% c("Corolla", "Rav4", "Highlander"))

## Create a bar chart with Model on the x-axis and fill by Age_Category
ggplot(toyota_data, aes(x = Model_v1, fill = Age_Category)) +
  geom_bar(position = "dodge") +
  labs(title = "Age Distribution by Toyota Model",
       x = "Toyota Model",
       y = "Count")

# Region Distribution of Toyota car models

## Filter for specific Toyota models
toyota_data <- Car_Total %>%
  filter(Model_v1 %in% c("Corolla", "Rav4", "Highlander"))

## Create a bar chart with Model on the x-axis and fill by Region
ggplot(toyota_data, aes(x = Model_v1, fill = Region)) +
  geom_bar(position = "dodge") +
  labs(title = "Model Region Distribution",
       x = "Toyota Model",
       y = "Count")

Key Information Need 1: How do young adults perceive the value of different Toyota car models? What are young adults future purchase intention of different Toyota car models?

- Which Toyota car model ranks highest in value perception amongst young adults?

- What is the average future purchase intention rating among young adults for different Toyota car models?

### - Which Toyota car model ranks highest in value perception amongst young adults?

install.packages("dplyr")
## Warning: package 'dplyr' is in use and will not be installed
library(dplyr)

# The combined average value perception for each Toyota model specifically for young adults (Age Category "1")
average_value_perception_young_adults <- Car_Total %>%
  filter(Model_v1 %in% c("Corolla", "Rav4", "Highlander"), Age_Category == "1") %>% 
  group_by(Model_v1) %>%  
  summarize( Average_Valu_Perception = mean((Valu_Percp_1 + Valu_Percp_2) / 2, na.rm = TRUE))

# View the result
print(average_value_perception_young_adults)
## # A tibble: 3 × 2
##   Model_v1   Average_Valu_Perception
##   <chr>                        <dbl>
## 1 Corolla                       5.11
## 2 Highlander                    5.2 
## 3 Rav4                          5.64
# Bar chart to show the average value perception for each Toyota model among young adults
ggplot(average_value_perception_young_adults, aes(x = Model_v1, y = Average_Valu_Perception, fill = Model_v1)) +
  geom_bar(stat = "identity") +  
  labs(title = "Average Value Perception of Toyota Models Among Young Adults (18-29)",
       x = "Toyota Model",
       y = "Average Value Perception")

### - What is the average future purchase intention rating among young adults for different Toyota car models?

# Calculate the combined average future purchase intention for each Toyota model specifically for young adults (Age Category "1")
average_future_purchase_intention_young_adults <- Car_Total %>%
  filter(Model_v1 %in% c("Corolla", "Rav4", "Highlander"), Age_Category == "1") %>%  
  group_by(Model_v1) %>%  
  summarize(
    Average_Futu_Pur = mean((Futu_Pur_1 + Futu_Pur_2) / 2, na.rm = TRUE))

# View the result
print(average_future_purchase_intention_young_adults)
## # A tibble: 3 × 2
##   Model_v1   Average_Futu_Pur
##   <chr>                 <dbl>
## 1 Corolla                5.72
## 2 Highlander             5.16
## 3 Rav4                   5.79
# Create a bar chart to show the average future purchase intention for each Toyota model among young adults
ggplot(average_future_purchase_intention_young_adults, aes(x = Model_v1, y = Average_Futu_Pur, fill = Model_v1)) +
  geom_bar(stat = "identity") +  
  labs(title = "Average Future Purchase Intention of Toyota Models Among Young Adults (18-29)",
       x = "Toyota Model",
       y = "Average Future Purchase Intention")

Key Information Need 2: How do young adults across different regions percieve Toyota car brand compared to competitors?

- How does the average attitude rating for Toyota compare to its competitors among young adults across different regions?

- What is the average level of positive word-of-mouth among young adults for Toyota compared to its competitors across different regions?

### - How does the average attitude rating for Toyota compare to its competitors among young adults across different regions?

# Calculate the combined average attitude for each model for young adults (Age_Category == "1")
average_attitude_young_adults <- Car_Total %>%
  filter(Age_Category == "1") %>%  # Filter for young adults
  group_by(Model_v1) %>%  
  summarize(
    Average_Attitude = mean((Att_1 + Att_2) / 2, na.rm = TRUE)  # Combine both attitude mean variables
  )

# View the result
print(average_attitude_young_adults)
## # A tibble: 13 × 2
##    Model_v1     Average_Attitude
##    <chr>                   <dbl>
##  1 "500x"                   4.38
##  2 "CRV"                    5.41
##  3 "Camaro"                 5.90
##  4 "Corolla"                5.08
##  5 "Encore"                 5.5 
##  6 "Expedition"             4.98
##  7 "Explorer"               4.22
##  8 "Highlander"             5.07
##  9 "Jeep"                   4.80
## 10 "Journey"                5.15
## 11 "Navigator "             5.25
## 12 "Pilot"                  5.71
## 13 "Rav4"                   4.94
# Calculate the average attitude for young adults across all regions and competitor parent companies
average_attitude_by_region <- Car_Total %>%
  filter(Age_Category == "1", 
         Parent %in% c("General Motors", "Chrysler", "Ford", "Honda", "Kia", "Toyota")) %>%
  group_by(Region, Parent) %>%  
  summarize(Average_Attitude = mean((Att_1 + Att_2) / 2, na.rm = TRUE))
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
# View the result
print(average_attitude_by_region)
## # A tibble: 17 × 3
## # Groups:   Region [4]
##    Region         Parent         Average_Attitude
##    <chr>          <chr>                     <dbl>
##  1 American       Chrysler                   4.89
##  2 American       Ford                       4.94
##  3 American       General Motors             6.14
##  4 American       Honda                      5.67
##  5 American       Toyota                     4.98
##  6 Asian          Chrysler                   5.5 
##  7 Asian          Honda                      5.61
##  8 Asian          Toyota                     5.14
##  9 European       Chrysler                   4.23
## 10 European       Ford                       4.87
## 11 European       General Motors             5.1 
## 12 European       Honda                      5.20
## 13 European       Toyota                     4.96
## 14 Middle Eastern Chrysler                   4.72
## 15 Middle Eastern Ford                       4.7 
## 16 Middle Eastern General Motors             5.86
## 17 Middle Eastern Honda                      5.53
ggplot(average_attitude_by_region, aes(x = Parent, y = Average_Attitude, color = Region, group = Region)) +
  geom_line() +  
  geom_point(size = 3) + 
  labs(title = "Average Attitude Ratings of Young Adults of Competitors Across Regions",
       x = "Parent Company",
       y = "Average Attitude Rating") +
  theme_minimal() 

### - What is the average level of positive word-of-mouth among young adults for Toyota compared to its competitors across different regions?

# Calculate the combined average word-of-mouth for young adults across regions and competitors
average_wom_young_adults <- Car_Total %>%
  filter(Age_Category == "1") %>%  # Filter for young adults
  mutate(Combined_WOM = (WOM_1 + WOM_2) / 2) %>%  # Combine both WOM mean variables
  group_by(Region, Parent) %>%  # Group by Region and Parent Company
  summarize(Average_WOM = mean(Combined_WOM, na.rm = TRUE), .groups = 'drop')

# View the result
print(average_wom_young_adults)
## # A tibble: 17 × 3
##    Region         Parent         Average_WOM
##    <chr>          <chr>                <dbl>
##  1 American       Chrysler              5.34
##  2 American       Ford                  5.24
##  3 American       General Motors        4.71
##  4 American       Honda                 5.18
##  5 American       Toyota                5.20
##  6 Asian          Chrysler              5.5 
##  7 Asian          Honda                 4.70
##  8 Asian          Toyota                5.26
##  9 European       Chrysler              4.46
## 10 European       Ford                  5.78
## 11 European       General Motors        4.35
## 12 European       Honda                 5.16
## 13 European       Toyota                5.67
## 14 Middle Eastern Chrysler              4.91
## 15 Middle Eastern Ford                  5.38
## 16 Middle Eastern General Motors        5.96
## 17 Middle Eastern Honda                 5.25
# Create a grouped bar chart to show average WOM across regions and companies
ggplot(average_wom_young_adults, aes(x = Region, y = Average_WOM, fill = Parent)) +
  geom_bar(stat = "identity", position = "dodge") +  #Dodge to group up bars
  labs(title = "Average Positive Word-of-Mouth Among Young Adults by Region and Company",
       x = "Region",
       y = "Average Word-of-Mouth",
       fill = "Parent Company")

Key Information Need 3: What is the average post-purchase satisfaction level among young adults regarding their current car purchases, and how does it vary by Parent company and Region?

## Key Information Need 3: What is the average post-purchase satisfaction level among young adults regarding their current car purchases, and how does it vary by Parent company and Region?

# Calculate the average post-purchase satisfaction for young adults grouped by Parent company
# Calculate the average post-purchase satisfaction for young adults (Age_Category == "1") by Parent company and Region

average_post_satisfaction_young_adults <- Car_Total %>%
  filter(Age_Category == "1") %>%  # Filter for young adults
  group_by(Parent, Region) %>%  # Group by Parent company and Region
  summarize(
    Average_Post_Satisfaction = mean(Post.Satis, na.rm = TRUE))
## `summarise()` has grouped output by 'Parent'. You can override using the
## `.groups` argument.
# View the result
print(average_post_satisfaction_young_adults)
## # A tibble: 17 × 3
## # Groups:   Parent [5]
##    Parent         Region         Average_Post_Satisfaction
##    <chr>          <chr>                              <dbl>
##  1 Chrysler       American                            4.96
##  2 Chrysler       Asian                               4.6 
##  3 Chrysler       European                            5.08
##  4 Chrysler       Middle Eastern                      5.20
##  5 Ford           American                            4.06
##  6 Ford           European                            4.13
##  7 Ford           Middle Eastern                      5.3 
##  8 General Motors American                            4.14
##  9 General Motors European                            4.8 
## 10 General Motors Middle Eastern                      5.12
## 11 Honda          American                            5.04
## 12 Honda          Asian                               5.44
## 13 Honda          European                            5.68
## 14 Honda          Middle Eastern                      5.78
## 15 Toyota         American                            5.5 
## 16 Toyota         Asian                               5.79
## 17 Toyota         European                            5.55
ggplot(average_post_satisfaction_young_adults, aes(x = Parent, y = Average_Post_Satisfaction, color = Region)) +
  geom_point(size = 4) +
  geom_line(aes(group = Region), size = 1) +  
  labs(title = "Average Post-Purchase Satisfaction Among Young Adults by Parent and Region",
       x = "Parent Company",
       y = "Average Post-Purchase Satisfaction")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.