Setup working directory

getwd()

## [1] "C:/Users/LMNig/OneDrive/Documents/School"

setwd("C:/Users/LMNig/OneDrive/Documents/School")

Survey Overview

# Load data file using import dataset

## Import car_survey_1 using "Import Dataset" function
## Import car_survey_2 using "Import Dataset" function

# Load data file using read.csv
## Using read.csv

# Read excel.csv file (save excel file as CSV UTF-8
Car1<-read.csv("C:/Users/LMNig/OneDrive/Documents/School/Car_Survey_1a.csv")
## Display the structure of Car1  (data frame)
str(Car1)

## 'data.frame':    1049 obs. of  22 variables:
##  $ Resp        : chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Att_1       : int  6 7 7 4 6 6 1 6 3 6 ...
##  $ Att_2       : int  6 5 7 1 6 6 1 5 2 6 ...
##  $ Enj_1       : int  6 5 7 1 6 6 1 5 3 4 ...
##  $ Enj_2       : int  6 2 5 1 5 5 1 3 2 4 ...
##  $ Perform_1   : int  5 2 5 1 5 5 2 5 2 4 ...
##  $ Perform_2   : int  6 6 5 1 2 5 2 5 3 4 ...
##  $ Perform_3   : int  3 7 3 1 1 7 2 2 1 1 ...
##  $ WOM_1       : int  3 5 6 7 7 5 2 4 6 5 ...
##  $ WOM_2       : int  3 5 6 7 7 5 3 6 6 6 ...
##  $ Futu_Pur_1  : int  3 6 7 3 7 7 5 4 7 6 ...
##  $ Futu_Pur_2  : int  3 6 7 3 6 7 2 4 7 6 ...
##  $ Valu_Percp_1: int  5 6 5 6 6 7 2 4 6 6 ...
##  $ Valu_Percp_2: int  2 7 7 5 5 7 2 4 6 6 ...
##  $ Pur_Proces_1: int  6 7 7 5 6 7 2 4 6 6 ...
##  $ Pur_Proces_2: int  4 6 7 4 7 7 6 4 6 6 ...
##  $ Residence   : int  2 2 1 2 1 2 2 1 2 1 ...
##  $ Pay_Meth    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Collision" ...
##  $ Gender      : chr  "Male" "Male" "Male" "Male" ...
##  $ Age         : int  18 18 19 19 19 19 19 21 21 21 ...
##  $ Education   : int  2 2 2 2 2 2 2 2 2 2 ...

## Display the first few rows of Car1(data frame)
head(Car1, n = 5)

##   Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1     6     6     6     6         5         6         3     3     3
## 2 Res2     7     5     5     2         2         6         7     5     5
## 3 Res3     7     7     7     5         5         5         3     6     6
## 4 Res4     4     1     1     1         1         1         1     7     7
## 5 Res5     6     6     6     5         5         2         1     7     7
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            7            7            6
## 3          7          7            5            7            7            7
## 4          3          3            6            5            5            4
## 5          7          6            6            5            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education
## 1         2        2  Collision   Male  18         2
## 2         2        2  Collision   Male  18         2
## 3         1        2  Collision   Male  19         2
## 4         2        2  Collision   Male  19         2
## 5         1        2  Collision Female  19         2

# Read excel.csv file (save excel file as CSV UTF-8
Car2<-read.csv("C:/Users/LMNig/OneDrive/Documents/School/Car_Survey_2b.csv")
## Display the structure of Car1 (data frame)
str(Car2)

## 'data.frame':    1049 obs. of  9 variables:
##  $ Respondents: chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Region     : chr  "European" "European" "European" "European" ...
##  $ Model      : chr  "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
##  $ MPG        : int  15 15 15 15 15 15 15 15 15 15 ...
##  $ Cyl        : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ acc1       : num  5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
##  $ C_cost.    : num  16 16 16 16 16 16 16 16 16 16 ...
##  $ H_Cost     : num  14 14 14 14 14 14 14 14 14 14 ...
##  $ Post.Satis : int  4 3 5 5 5 3 3 6 3 5 ...

## Display the first few rows of Car1 (data frame)
head(Car2,n=5)

##   Respondents   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1        Res1 European Ford Expedition  15   8  5.5      16     14          4
## 2        Res2 European Ford Expedition  15   8  5.5      16     14          3
## 3        Res3 European Ford Expedition  15   8  5.5      16     14          5
## 4        Res4 European Ford Expedition  15   8  5.5      16     14          5
## 5        Res5 European Ford Expedition  15   8  5.5      16     14          5

Merging Data

# Create a Master Dataset (combined car_survey1a and car_survey_2b)

# Rename unique ID in Car2 to match with Car1
## Renaming the first column in Car2 as "Resp"
## (to match with the first column name in Car1)
names(Car2)[1]<-c("Resp")
head(Car2, n=1)

##   Resp   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1 Res1 European Ford Expedition  15   8  5.5      16     14          4

# Merge Car_Survey_1a and Car_Survey_2b into one dataset
Car_Total<-merge(Car1,Car2, by="Resp")
str(Car_Total)

## 'data.frame':    1049 obs. of  30 variables:
##  $ Resp        : chr  "Res1" "Res10" "Res100" "Res1000" ...
##  $ Att_1       : int  6 6 6 6 6 3 2 7 2 6 ...
##  $ Att_2       : int  6 6 7 6 6 1 2 7 1 6 ...
##  $ Enj_1       : int  6 4 7 7 7 4 1 7 2 6 ...
##  $ Enj_2       : int  6 4 3 6 6 3 2 6 1 5 ...
##  $ Perform_1   : int  5 4 5 6 6 5 2 5 2 5 ...
##  $ Perform_2   : int  6 4 6 6 6 6 2 6 2 5 ...
##  $ Perform_3   : int  3 1 6 6 6 6 1 5 2 5 ...
##  $ WOM_1       : int  3 5 3 6 4 2 6 6 7 3 ...
##  $ WOM_2       : int  3 6 5 6 4 6 7 6 7 3 ...
##  $ Futu_Pur_1  : int  3 6 6 6 4 6 6 6 7 6 ...
##  $ Futu_Pur_2  : int  3 6 6 6 6 6 5 7 7 6 ...
##  $ Valu_Percp_1: int  5 6 7 4 5 5 4 6 4 5 ...
##  $ Valu_Percp_2: int  2 6 6 6 6 4 4 5 6 6 ...
##  $ Pur_Proces_1: int  6 6 5 6 6 5 4 5 6 6 ...
##  $ Pur_Proces_2: int  4 6 5 3 7 5 5 5 7 5 ...
##  $ Residence   : int  2 1 2 2 1 1 1 2 1 2 ...
##  $ Pay_Meth    : int  2 2 1 3 3 3 3 3 3 3 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Liability" ...
##  $ Gender      : chr  "Male" "Male" "Female" "Female" ...
##  $ Age         : int  18 21 32 24 24 25 26 26 27 27 ...
##  $ Education   : int  2 2 1 2 2 2 2 2 2 2 ...
##  $ Region      : chr  "European" "European" "American" "Asian" ...
##  $ Model       : chr  "Ford Expedition" "Ford Expedition" "Toyota Rav4" "Toyota Corolla" ...
##  $ MPG         : int  15 15 24 26 26 26 26 26 26 26 ...
##  $ Cyl         : int  8 8 4 4 4 4 4 4 4 4 ...
##  $ acc1        : num  5.5 5.5 8.2 8 8 8 8 8 8 8 ...
##  $ C_cost.     : num  16 16 10 7 7 7 7 7 7 7 ...
##  $ H_Cost      : num  14 14 8 6 6 6 6 6 6 6 ...
##  $ Post.Satis  : int  4 5 4 6 5 6 5 6 7 6 ...

head(Car_Total)

##      Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1    Res1     6     6     6     6         5         6         3     3     3
## 2   Res10     6     6     4     4         4         4         1     5     6
## 3  Res100     6     7     7     3         5         6         6     3     5
## 4 Res1000     6     6     7     6         6         6         6     6     6
## 5 Res1001     6     6     7     6         6         6         6     4     4
## 6 Res1002     3     1     4     3         5         6         6     2     6
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            6            6            6
## 3          6          6            7            6            5            5
## 4          6          6            4            6            6            3
## 5          4          6            5            6            6            7
## 6          6          6            5            4            5            5
##   Residence Pay_Meth Insur_Type Gender Age Education   Region           Model
## 1         2        2  Collision   Male  18         2 European Ford Expedition
## 2         1        2  Collision   Male  21         2 European Ford Expedition
## 3         2        1  Collision Female  32         1 American     Toyota Rav4
## 4         2        3  Liability Female  24         2    Asian  Toyota Corolla
## 5         1        3  Liability Female  24         2    Asian  Toyota Corolla
## 6         1        3  Liability Female  25         2    Asian  Toyota Corolla
##   MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1  15   8  5.5      16     14          4
## 2  15   8  5.5      16     14          5
## 3  24   4  8.2      10      8          4
## 4  26   4  8.0       7      6          6
## 5  26   4  8.0       7      6          5
## 6  26   4  8.0       7      6          6

# Save the merged data (Car_Total) to a file
## save as CSV

write.csv(Car_Total, "Car_Total", row.names=FALSE)
View(Car_Total)

# Using readxl Package
## Load the package
library(readxl)

# Replace "file_path.xlsx" with the path to your file
Car_data_1 <-read_excel("C:/Users/LMNig/OneDrive/Documents/School/Copy of Car_Survey_1.xlsx")
Car_data_2 <-read_excel ("C:/Users/LMNig/OneDrive/Documents/School/Copy of Car_Survey_2.xlsx")

#Summary of Key Analysis Variables

# Create a summary of the selected variables
summary(Car_Total[c("Att_1", "Att_2")])

##      Att_1           Att_2      
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.000  
##  Median :6.000   Median :6.000  
##  Mean   :4.882   Mean   :5.287  
##  3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000  
##  NA's   :4

# Create a summary of the selected variables
summary(Car_Total[c("Valu_Percp_1", "Valu_Percp_2")])

##   Valu_Percp_1    Valu_Percp_2  
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:5.000   1st Qu.:4.000  
##  Median :6.000   Median :5.000  
##  Mean   :5.411   Mean   :5.114  
##  3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000  
##  NA's   :4       NA's   :1

# Create a summary of the selected variables
summary(Car_Total[c("WOM_1", "WOM_2")])

##      WOM_1           WOM_2     
##  Min.   :1.000   Min.   :1.00  
##  1st Qu.:4.000   1st Qu.:4.00  
##  Median :6.000   Median :6.00  
##  Mean   :5.286   Mean   :5.35  
##  3rd Qu.:7.000   3rd Qu.:6.00  
##  Max.   :7.000   Max.   :7.00  
##  NA's   :1       NA's   :3

# Create a summary of the selected variables
summary(Car_Total[c("Futu_Pur_1", "Futu_Pur_2")])

##    Futu_Pur_1      Futu_Pur_2   
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:5.000  
##  Median :6.000   Median :6.000  
##  Mean   :5.321   Mean   :5.371  
##  3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :9.000   Max.   :7.000  
##  NA's   :5       NA's   :2

# Create a summary of the selected variables
summary(Car_Total[c("Post.Satis")])

##    Post.Satis  
##  Min.   :2.00  
##  1st Qu.:5.00  
##  Median :6.00  
##  Mean   :5.28  
##  3rd Qu.:6.00  
##  Max.   :7.00

Replacing Na Values

# Count NA values before replacement
na_counts_before <- colSums(is.na(Car_Total))
cat("NA counts before replacement:\n")

## NA counts before replacement:

print(na_counts_before)

##         Resp        Att_1        Att_2        Enj_1        Enj_2    Perform_1 
##            0            4            0            4            4            2 
##    Perform_2    Perform_3        WOM_1        WOM_2   Futu_Pur_1   Futu_Pur_2 
##            4            1            1            3            5            2 
## Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2    Residence     Pay_Meth 
##            4            1            3            4            5            0 
##   Insur_Type       Gender          Age    Education       Region        Model 
##            0            0            0            0            0            0 
##          MPG          Cyl         acc1      C_cost.       H_Cost   Post.Satis 
##            0            0            0            0            0            0

# Identifying numeric columns
numeric_cols <- sapply(Car_Total, is.numeric)

# Replacing NA values with the calculated means for all numeric columns
Car_Total[, numeric_cols] <- lapply(Car_Total[, numeric_cols], function(x) {
  mean_val <- mean(x, na.rm = TRUE)   
  x[is.na(x)] <- mean_val              
  return(x)                            
})

#Check for any remaining NA's
any(is.na(Car_Total)) #False=No remaining Na values

## [1] FALSE

Manipulating Data

Creation of “Parent” grouping for each car make and model.

#Group by Car Make
library(stringr) #import library

#Seperate model col into two, delimit using space
Car_Total [c('Make', 'Model_v1')] <- str_split_fixed(Car_Total$Model, " ", 2)
#See the two new columns ("Make", and "Make_v1" in Car_Total data file)
View(Car_Total)

#Group by Parent Company
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Car_Total <- Car_Total %>% #Call dataframe and create new column with new groupings
mutate(Parent = case_when(Make == "Buick" ~ "General Motors",
                          Make == "Chevrolet" ~ "General Motors",
                          Make == "Chrysler" ~ "Chrysler",
                          Make == "Dodge" ~ "Chrysler",
                          Make == "Fiat" ~ "Chrysler",
                          Make == "Ford" ~ "Ford",
                          Make == "Honda" ~ "Honda",
                          Make == "Kia" ~ "Kia",
                          Make == "Lincoln" ~ "Ford",
                          Make == "Toyota" ~ "Toyota",
                          TRUE ~ "Check"))
                      
#Check if the grouping is correct
count(Car_Total, Car_Total$Make, Car_Total$Parent, name = "Freq")

##    Car_Total$Make Car_Total$Parent Freq
## 1           Buick   General Motors   31
## 2       Chevrolet   General Motors   64
## 3        Chrysler         Chrysler  169
## 4           Dodge         Chrysler   41
## 5            Fiat         Chrysler   18
## 6            Ford             Ford  202
## 7           Honda            Honda  159
## 8             Kia              Kia   34
## 9         Lincoln             Ford   39
## 10         Toyota           Toyota  292

table(Car_Total$Make)

## 
##     Buick Chevrolet  Chrysler     Dodge      Fiat      Ford     Honda       Kia 
##        31        64       169        41        18       202       159        34 
##   Lincoln    Toyota 
##        39       292

Age Categories

# Convert Age to numeric
Car_Total$Age <- as.numeric(as.character(Car_Total$Age))

# Create age categories... 1=(18-29yr), 2=(30-49yr), 3=(50yr+)
Car_Total$Age_Category <- cut(Car_Total$Age,
                               breaks = c(17, 29, 49, Inf), 
                               labels = c("1", "2", "3"),
                               right = TRUE)  

# Result
table(Car_Total$Age_Category)

## 
##   1   2   3 
## 443 375 231

Mutate

# Creation of new variables using the average of each

##Average of Att_1 and Att_2 to create new variable "Attitude"
Car_Total$Attitude <- rowMeans(Car_Total[, c("Att_1", "Att_2")], na.rm = TRUE)

# Result
summary(Car_Total$Attitude)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.000   5.000   5.085   6.000   7.000

##Average of Futu_Pur_1 and Futu_Pur_2 to create new variable "FPI"
Car_Total$FPI <- rowMeans(Car_Total[, c("Futu_Pur_1", "Futu_Pur_2")], na.rm = TRUE)

# Result
summary(Car_Total$FPI)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.500   6.000   5.346   6.000   7.000

##Average of Valu_Percp_1 and Valu_Percp_2 to create new variable "ValuePercp"
Car_Total$ValuePercp <- rowMeans(Car_Total[, c("Valu_Percp_1", "Valu_Percp_2")], na.rm = TRUE)

# Result
summary(Car_Total$ValuePercp)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.500   5.500   5.263   6.000   7.000

##Average of WOM_1 and WOM_2 to create new variable "WOM"
Car_Total$WOM <- rowMeans(Car_Total[, c("WOM_1", "WOM_2")], na.rm = TRUE)

# Result
summary(Car_Total$WOM)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.500   5.500   5.318   6.500   7.000

# Calculate the average of Post.Satis to create new variable "satisfactionmean"
Satisfactionmean <- mean(Car_Total$Post.Satis, na.rm = TRUE)

# Display the result
print(Satisfactionmean)

## [1] 5.280267

Market Segmentation and Target Audience

# Group by Car Make
library(stringr)

# Seperate model col into two, delimit using space
Car_Total [c('Make', 'Model_v1')] <- str_split_fixed(Car_Total$Model, " ", 2)
#See the two new columns ("Make", and "Make_v1" in Car_Total data file)
View(Car_Total)

library(ggplot2)
library(dplyr)

# Age Distribution for Toyota Car Models

## Filter for specific Toyota models
toyota_data <- Car_Total %>%
  filter(Model_v1 %in% c("Corolla", "Rav4", "Highlander"))

## Create a bar chart with Model on the x-axis and fill by Age_Category
ggplot(toyota_data, aes(x = Model_v1, fill = Age_Category)) +
  geom_bar(position = "dodge") +
  labs(title = "Age Distribution by Toyota Model",
       x = "Toyota Model",
       y = "Count")

# Region Distribution of Toyota car models

## Filter for specific Toyota models
toyota_data <- Car_Total %>%
  filter(Model_v1 %in% c("Corolla", "Rav4", "Highlander"))

## Create a bar chart with Model on the x-axis and fill by Region
ggplot(toyota_data, aes(x = Model_v1, fill = Region)) +
  geom_bar(position = "dodge") +
  labs(title = "Model Region Distribution",
       x = "Toyota Model",
       y = "Count")

Key Information Need 1: How do young adults perceive the value of different Toyota car models? What are young adults future purchase intention of different Toyota car models?

- Which Toyota car model ranks highest in value perception amongst young adults?

- What is the average future purchase intention rating among young adults for different Toyota car models?

### - Which Toyota car model ranks highest in value perception amongst young adults?

install.packages("dplyr")

## Warning: package 'dplyr' is in use and will not be installed

library(dplyr)

# The combined average value perception for each Toyota model specifically for young adults (Age Category "1")
average_value_perception_young_adults <- Car_Total %>%
  filter(Model_v1 %in% c("Corolla", "Rav4", "Highlander"), Age_Category == "1") %>% 
  group_by(Model_v1) %>%  
  summarize( Average_Valu_Perception = mean((Valu_Percp_1 + Valu_Percp_2) / 2, na.rm = TRUE))

# View the result
print(average_value_perception_young_adults)

## # A tibble: 3 × 2
##   Model_v1   Average_Valu_Perception
##   <chr>                        <dbl>
## 1 Corolla                       5.11
## 2 Highlander                    5.2 
## 3 Rav4                          5.64

# Bar chart to show the average value perception for each Toyota model among young adults
ggplot(average_value_perception_young_adults, aes(x = Model_v1, y = Average_Valu_Perception, fill = Model_v1)) +
  geom_bar(stat = "identity") +  
  labs(title = "Average Value Perception of Toyota Models Among Young Adults (18-29)",
       x = "Toyota Model",
       y = "Average Value Perception")

### - What is the average future purchase intention rating among young adults for different Toyota car models?

# Calculate the combined average future purchase intention for each Toyota model specifically for young adults (Age Category "1")
average_future_purchase_intention_young_adults <- Car_Total %>%
  filter(Model_v1 %in% c("Corolla", "Rav4", "Highlander"), Age_Category == "1") %>%  
  group_by(Model_v1) %>%  
  summarize(
    Average_Futu_Pur = mean((Futu_Pur_1 + Futu_Pur_2) / 2, na.rm = TRUE))

# View the result
print(average_future_purchase_intention_young_adults)

## # A tibble: 3 × 2
##   Model_v1   Average_Futu_Pur
##   <chr>                 <dbl>
## 1 Corolla                5.72
## 2 Highlander             5.16
## 3 Rav4                   5.79

# Create a bar chart to show the average future purchase intention for each Toyota model among young adults
ggplot(average_future_purchase_intention_young_adults, aes(x = Model_v1, y = Average_Futu_Pur, fill = Model_v1)) +
  geom_bar(stat = "identity") +  
  labs(title = "Average Future Purchase Intention of Toyota Models Among Young Adults (18-29)",
       x = "Toyota Model",
       y = "Average Future Purchase Intention")

Key Information Need 2: How do young adults across different regions percieve Toyota car brand compared to competitors?

- How does the average attitude rating for Toyota compare to its competitors among young adults across different regions?

- What is the average level of positive word-of-mouth among young adults for Toyota compared to its competitors across different regions?

### - How does the average attitude rating for Toyota compare to its competitors among young adults across different regions?

# Calculate the combined average attitude for each model for young adults (Age_Category == "1")
average_attitude_young_adults <- Car_Total %>%
  filter(Age_Category == "1") %>%  # Filter for young adults
  group_by(Model_v1) %>%  
  summarize(
    Average_Attitude = mean((Att_1 + Att_2) / 2, na.rm = TRUE)  # Combine both attitude mean variables
  )

# View the result
print(average_attitude_young_adults)

## # A tibble: 13 × 2
##    Model_v1     Average_Attitude
##    <chr>                   <dbl>
##  1 "500x"                   4.38
##  2 "CRV"                    5.41
##  3 "Camaro"                 5.90
##  4 "Corolla"                5.08
##  5 "Encore"                 5.5 
##  6 "Expedition"             4.98
##  7 "Explorer"               4.22
##  8 "Highlander"             5.07
##  9 "Jeep"                   4.80
## 10 "Journey"                5.15
## 11 "Navigator "             5.25
## 12 "Pilot"                  5.71
## 13 "Rav4"                   4.94

# Calculate the average attitude for young adults across all regions and competitor parent companies
average_attitude_by_region <- Car_Total %>%
  filter(Age_Category == "1", 
         Parent %in% c("General Motors", "Chrysler", "Ford", "Honda", "Kia", "Toyota")) %>%
  group_by(Region, Parent) %>%  
  summarize(Average_Attitude = mean((Att_1 + Att_2) / 2, na.rm = TRUE))

## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.

# View the result
print(average_attitude_by_region)

## # A tibble: 17 × 3
## # Groups:   Region [4]
##    Region         Parent         Average_Attitude
##    <chr>          <chr>                     <dbl>
##  1 American       Chrysler                   4.89
##  2 American       Ford                       4.94
##  3 American       General Motors             6.14
##  4 American       Honda                      5.67
##  5 American       Toyota                     4.98
##  6 Asian          Chrysler                   5.5 
##  7 Asian          Honda                      5.61
##  8 Asian          Toyota                     5.14
##  9 European       Chrysler                   4.23
## 10 European       Ford                       4.87
## 11 European       General Motors             5.1 
## 12 European       Honda                      5.20
## 13 European       Toyota                     4.96
## 14 Middle Eastern Chrysler                   4.72
## 15 Middle Eastern Ford                       4.7 
## 16 Middle Eastern General Motors             5.86
## 17 Middle Eastern Honda                      5.53

ggplot(average_attitude_by_region, aes(x = Parent, y = Average_Attitude, color = Region, group = Region)) +
  geom_line() +  
  geom_point(size = 3) + 
  labs(title = "Average Attitude Ratings of Young Adults of Competitors Across Regions",
       x = "Parent Company",
       y = "Average Attitude Rating") +
  theme_minimal()

### - What is the average level of positive word-of-mouth among young adults for Toyota compared to its competitors across different regions?

# Calculate the combined average word-of-mouth for young adults across regions and competitors
average_wom_young_adults <- Car_Total %>%
  filter(Age_Category == "1") %>%  # Filter for young adults
  mutate(Combined_WOM = (WOM_1 + WOM_2) / 2) %>%  # Combine both WOM mean variables
  group_by(Region, Parent) %>%  # Group by Region and Parent Company
  summarize(Average_WOM = mean(Combined_WOM, na.rm = TRUE), .groups = 'drop')

# View the result
print(average_wom_young_adults)

## # A tibble: 17 × 3
##    Region         Parent         Average_WOM
##    <chr>          <chr>                <dbl>
##  1 American       Chrysler              5.34
##  2 American       Ford                  5.24
##  3 American       General Motors        4.71
##  4 American       Honda                 5.18
##  5 American       Toyota                5.20
##  6 Asian          Chrysler              5.5 
##  7 Asian          Honda                 4.70
##  8 Asian          Toyota                5.26
##  9 European       Chrysler              4.46
## 10 European       Ford                  5.78
## 11 European       General Motors        4.35
## 12 European       Honda                 5.16
## 13 European       Toyota                5.67
## 14 Middle Eastern Chrysler              4.91
## 15 Middle Eastern Ford                  5.38
## 16 Middle Eastern General Motors        5.96
## 17 Middle Eastern Honda                 5.25

# Create a grouped bar chart to show average WOM across regions and companies
ggplot(average_wom_young_adults, aes(x = Region, y = Average_WOM, fill = Parent)) +
  geom_bar(stat = "identity", position = "dodge") +  #Dodge to group up bars
  labs(title = "Average Positive Word-of-Mouth Among Young Adults by Region and Company",
       x = "Region",
       y = "Average Word-of-Mouth",
       fill = "Parent Company")

Key Information Need 3: What is the average post-purchase satisfaction level among young adults regarding their current car purchases, and how does it vary by Parent company and Region?

## Key Information Need 3: What is the average post-purchase satisfaction level among young adults regarding their current car purchases, and how does it vary by Parent company and Region?

# Calculate the average post-purchase satisfaction for young adults grouped by Parent company
# Calculate the average post-purchase satisfaction for young adults (Age_Category == "1") by Parent company and Region

average_post_satisfaction_young_adults <- Car_Total %>%
  filter(Age_Category == "1") %>%  # Filter for young adults
  group_by(Parent, Region) %>%  # Group by Parent company and Region
  summarize(
    Average_Post_Satisfaction = mean(Post.Satis, na.rm = TRUE))

## `summarise()` has grouped output by 'Parent'. You can override using the
## `.groups` argument.

# View the result
print(average_post_satisfaction_young_adults)

## # A tibble: 17 × 3
## # Groups:   Parent [5]
##    Parent         Region         Average_Post_Satisfaction
##    <chr>          <chr>                              <dbl>
##  1 Chrysler       American                            4.96
##  2 Chrysler       Asian                               4.6 
##  3 Chrysler       European                            5.08
##  4 Chrysler       Middle Eastern                      5.20
##  5 Ford           American                            4.06
##  6 Ford           European                            4.13
##  7 Ford           Middle Eastern                      5.3 
##  8 General Motors American                            4.14
##  9 General Motors European                            4.8 
## 10 General Motors Middle Eastern                      5.12
## 11 Honda          American                            5.04
## 12 Honda          Asian                               5.44
## 13 Honda          European                            5.68
## 14 Honda          Middle Eastern                      5.78
## 15 Toyota         American                            5.5 
## 16 Toyota         Asian                               5.79
## 17 Toyota         European                            5.55

ggplot(average_post_satisfaction_young_adults, aes(x = Parent, y = Average_Post_Satisfaction, color = Region)) +
  geom_point(size = 4) +
  geom_line(aes(group = Region), size = 1) +  
  labs(title = "Average Post-Purchase Satisfaction Among Young Adults by Parent and Region",
       x = "Parent Company",
       y = "Average Post-Purchase Satisfaction")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Project 1 - MKTG 3P98

Logan Nigro

2024-10-08

Setup working directory

Survey Overview

Merging Data

Replacing Na Values

Manipulating Data

Creation of “Parent” grouping for each car make and model.

Age Categories

Mutate

Market Segmentation and Target Audience

Key Information Need 1: How do young adults perceive the value of different Toyota car models? What are young adults future purchase intention of different Toyota car models?

- Which Toyota car model ranks highest in value perception amongst young adults?

- What is the average future purchase intention rating among young adults for different Toyota car models?

Key Information Need 2: How do young adults across different regions percieve Toyota car brand compared to competitors?

- How does the average attitude rating for Toyota compare to its competitors among young adults across different regions?

- What is the average level of positive word-of-mouth among young adults for Toyota compared to its competitors across different regions?

Key Information Need 3: What is the average post-purchase satisfaction level among young adults regarding their current car purchases, and how does it vary by Parent company and Region?