Should the company invest its brand-building dollars into sponsoring a NASCAR race or an NCAA College Football Bowl game to maximize appeal among its target customers (especially heavy users)?
# Load necessary libraries
install.packages("readr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(readr)
# Load data from CSV file
data <- read_csv("segmentation_analysis (1).csv")
## Rows: 10 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Consumer, NASCAR, College
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Descriptive statistics for NASCAR
mean(data$NASCAR)
## [1] 4.4
sd(data$NASCAR)
## [1] 1.776388
summary(data$NASCAR)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.0 3.0 4.5 4.4 5.0 8.0
# Descriptive statistics for NCAA College Football
mean(data$College)
## [1] 4.2
sd(data$College)
## [1] 2.347576
summary(data$College)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.25 4.00 4.20 5.00 9.00
# Load necessary libraries
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ggplot2)
# Create scatter plot
ggplot(data, aes(x = NASCAR, y = College)) +
geom_point() +
labs(title = "NASCAR vs NCAA College Football Ratings",
x = "NASCAR Rating",
y = "NCAA College Football Rating")
# Perform K-means clustering (choosing 3 clusters as an example)
set.seed(123)
kmeans_result <- kmeans(data[, c("NASCAR", "College")], centers = 3)
# Add cluster results to the data
data$Cluster <- kmeans_result$cluster
# Visualize the clusters
ggplot(data, aes(x = NASCAR, y = College, color = as.factor(Cluster))) +
geom_point() +
labs(title = "K-means Clusters for NASCAR vs NCAA College Football Ratings",
x = "NASCAR Rating",
y = "NCAA College Football Rating") +
scale_color_manual(values = c("red", "blue", "green"))
# Add centroids to data
centroids <- as.data.frame(kmeans_result$centers)
# Run k-means clustering
set.seed(123)
kmeans_result1 <- kmeans(data[, c("NASCAR", "College")], centers = 3)
# Add the cluster assignment to the data
data$Cluster1 <- as.factor(kmeans_result1$cluster)
# Create centroid data frame
centroids <- as.data.frame(kmeans_result$centers)
data$Cluster <- as.factor(kmeans_result1$cluster)
# Plot with color-coded clusters and centroids
ggplot(data, aes(x = NASCAR, y = College, color = Cluster)) +
geom_point(size = 4) +
geom_point(data = centroids, aes(x = NASCAR, y = College),
color = "black", size = 5, shape = 4, stroke = 1.5) +
labs(title = "K-means Clustering with Centroids: NASCAR vs NCAA Ratings",
subtitle = "Visual segmentation of consumer preferences",
x = "NASCAR Rating",
y = "NCAA College Football Rating") +
scale_color_manual(values = c("red", "blue", "green")) +
theme_minimal()