This is Project 1 of MKTG 3P98

Load Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(ggplot2)

Load Data

# Set working directory
setwd("/Users/connorweir/Desktop/mktg 3p98 R")

#open data file "Car_Survey_1
car1<-read.csv("Car_Survey_1.csv")
str(car1)
## 'data.frame':    1180 obs. of  23 variables:
##  $ Resp        : chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Att_1       : int  6 7 7 4 6 6 1 6 3 6 ...
##  $ Att_2       : int  6 5 7 1 6 6 1 5 2 6 ...
##  $ Enj_1       : int  6 5 7 1 6 6 1 5 3 4 ...
##  $ Enj_2       : int  6 2 5 1 5 5 1 3 2 4 ...
##  $ Perform_1   : int  5 2 5 1 5 5 2 5 2 4 ...
##  $ Perform_2   : int  6 6 5 1 2 5 2 5 3 4 ...
##  $ Perform_3   : int  3 7 3 1 1 7 2 2 1 1 ...
##  $ WOM_1       : int  3 5 6 7 7 5 2 4 6 5 ...
##  $ WOM_2       : int  3 5 6 7 7 5 3 6 6 6 ...
##  $ Futu_Pur_1  : int  3 6 7 3 7 7 5 4 7 6 ...
##  $ Futu_Pur_2  : int  3 6 7 3 6 7 2 4 7 6 ...
##  $ Valu_Percp_1: int  5 6 5 6 6 7 2 4 6 6 ...
##  $ Valu_Percp_2: int  2 7 7 5 5 7 2 4 6 6 ...
##  $ Pur_Proces_1: int  6 7 7 5 6 7 2 4 6 6 ...
##  $ Pur_Proces_2: int  4 6 7 4 7 7 6 4 6 6 ...
##  $ Residence   : int  2 2 1 2 1 2 2 1 2 1 ...
##  $ Pay_Meth    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Insur_Type  : chr  "Collision" "Collision" "Collision" "Collision" ...
##  $ Gender      : chr  "Male" "Male" "Male" "Male" ...
##  $ Age         : int  18 18 19 19 19 19 19 21 21 21 ...
##  $ Education   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ X           : logi  NA NA NA NA NA NA ...
head(car1, n=5)
##   Resp Att_1 Att_2 Enj_1 Enj_2 Perform_1 Perform_2 Perform_3 WOM_1 WOM_2
## 1 Res1     6     6     6     6         5         6         3     3     3
## 2 Res2     7     5     5     2         2         6         7     5     5
## 3 Res3     7     7     7     5         5         5         3     6     6
## 4 Res4     4     1     1     1         1         1         1     7     7
## 5 Res5     6     6     6     5         5         2         1     7     7
##   Futu_Pur_1 Futu_Pur_2 Valu_Percp_1 Valu_Percp_2 Pur_Proces_1 Pur_Proces_2
## 1          3          3            5            2            6            4
## 2          6          6            6            7            7            6
## 3          7          7            5            7            7            7
## 4          3          3            6            5            5            4
## 5          7          6            6            5            6            7
##   Residence Pay_Meth Insur_Type Gender Age Education  X
## 1         2        2  Collision   Male  18         2 NA
## 2         2        2  Collision   Male  18         2 NA
## 3         1        2  Collision   Male  19         2 NA
## 4         2        2  Collision   Male  19         2 NA
## 5         1        2  Collision Female  19         2 NA
View(car1)

car2<-read.csv("Car_Survey_2.csv")
str(car2)
## 'data.frame':    1049 obs. of  9 variables:
##  $ Respondents: chr  "Res1" "Res2" "Res3" "Res4" ...
##  $ Region     : chr  "European" "European" "European" "European" ...
##  $ Model      : chr  "Ford Expedition" "Ford Expedition" "Ford Expedition" "Ford Expedition" ...
##  $ MPG        : int  15 15 15 15 15 15 15 15 15 15 ...
##  $ Cyl        : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ acc1       : num  5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 ...
##  $ C_cost.    : num  16 16 16 16 16 16 16 16 16 16 ...
##  $ H_Cost     : num  14 14 14 14 14 14 14 14 14 14 ...
##  $ Post.Satis : int  4 3 5 5 5 3 3 6 3 5 ...
head(car2, n=5)
##   Respondents   Region           Model MPG Cyl acc1 C_cost. H_Cost Post.Satis
## 1        Res1 European Ford Expedition  15   8  5.5      16     14          4
## 2        Res2 European Ford Expedition  15   8  5.5      16     14          3
## 3        Res3 European Ford Expedition  15   8  5.5      16     14          5
## 4        Res4 European Ford Expedition  15   8  5.5      16     14          5
## 5        Res5 European Ford Expedition  15   8  5.5      16     14          5
View(car2)

Data Cleaning

# Standardize column names
colnames(car1) <- str_replace_all(colnames(car1), " ", "_")
colnames(car2) <- str_replace_all(colnames(car2), " ", "_")

# Rename key column for merging
colnames(car2)[1] <- "Resp"

# Function to replace NA values with column means (only for numeric columns)
replace_na_with_mean <- function(df) {
  for (col in names(df)) {
    if (is.numeric(df[[col]])) {  # Check if column is numeric
      df[[col]][is.na(df[[col]])] <- mean(df[[col]], na.rm = TRUE)
    }
  }
  return(df)
}

# Apply the function to both datasets
car1 <- replace_na_with_mean(car1)
car2 <- replace_na_with_mean(car2)


# Merge datasets
car_total <- merge(car1, car2, by="Resp")

Data Transformation

# Extract brand from Model column
car_total <- car_total %>% separate(Model, into = c("Brand", "Model_Type"), sep = " ", extra = "merge")

# Convert Age to categories
car_total$Age_Group <- cut(as.numeric(car_total$Age), breaks = c(0, 30, 50, 100), labels = c("Young Adults", "Adults", "Mature Adults"), right = FALSE)

Data Visualization

Car Purchasing by Brand

ggplot(car_total, aes(x=Brand, fill=Brand)) +
  geom_bar() +
  labs(title="Car Purchases by Brand", x="Brand", y="Count") +
  theme_minimal()

Car Purchasing by Region

ggplot(car_total, aes(x=Region, fill=Region)) +
  geom_bar() +
  labs(title="Car Purchases by Region", x="Region", y="Count") +
  theme_minimal()

Competitor Analysis - Toyota vs. Honda vs. Ford

competitor_data <- car_total %>% filter(Brand %in% c("Toyota", "Honda", "Ford"))
ggplot(competitor_data, aes(x=Brand, fill=Region)) +
  geom_bar() +
  labs(title="Competitor Analysis: Toyota vs Honda vs Ford", x="Brand", y="Count") +
  theme_minimal()

Consumer Attitude Towards Car Brands Across the Regions

attitude_data <- car_total %>% group_by(Brand, Region) %>% summarise(Attitude_Mean = mean(Att_1, na.rm=TRUE))
## `summarise()` has grouped output by 'Brand'. You can override using the
## `.groups` argument.
ggplot(attitude_data, aes(x=Region, y=Attitude_Mean, group=Brand, color=Brand)) +
  geom_line() + geom_point() +
  labs(title="Consumer Attitude Toward Car Brands Across Regions", x="Region", y="Average Attitude Score") +
  theme_minimal()

Summary and Insights

Conclusion

This analysis provides us insights into the consumer purchasing trends, the competitor positioning, and the market opportunities. These findings can help us shape the strategic planning to better increase the sales and marketing efforts.