1. Should the company invest its brand-building dollars into sponsoring a NASCAR race or an NCAA College Football Bowl game to maximize appeal among its target customers (especially heavy users)?

# Load necessary libraries
install.packages("readr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(readr)

# Load data from CSV file
data <- read_csv("segmentation_analysis (1).csv")
## Rows: 10 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Consumer, NASCAR, College
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Descriptive statistics for NASCAR
mean(data$NASCAR)
## [1] 4.4
sd(data$NASCAR)
## [1] 1.776388
summary(data$NASCAR)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     2.0     3.0     4.5     4.4     5.0     8.0
# Descriptive statistics for NCAA College Football
mean(data$College)
## [1] 4.2
sd(data$College)
## [1] 2.347576
summary(data$College)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    3.25    4.00    4.20    5.00    9.00
# Load necessary libraries
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ggplot2)
# Create scatter plot
ggplot(data, aes(x = NASCAR, y = College)) +
  geom_point() +
  labs(title = "NASCAR vs NCAA College Football Ratings",
       x = "NASCAR Rating",
       y = "NCAA College Football Rating")

# Perform K-means clustering (choosing 3 clusters as an example)
set.seed(123)
kmeans_result <- kmeans(data[, c("NASCAR", "College")], centers = 3)

# Add cluster results to the data
data$Cluster <- kmeans_result$cluster


# Visualize the clusters
ggplot(data, aes(x = NASCAR, y = College, color = as.factor(Cluster))) +
  geom_point() +
  labs(title = "K-means Clusters for NASCAR vs NCAA College Football Ratings",
       x = "NASCAR Rating",
       y = "NCAA College Football Rating") +
  scale_color_manual(values = c("red", "blue", "green"))

# Add centroids to data
centroids <- as.data.frame(kmeans_result$centers)
# Run k-means clustering
set.seed(123)
kmeans_result1 <- kmeans(data[, c("NASCAR", "College")], centers = 3)
# Add the cluster assignment to the data
data$Cluster1 <- as.factor(kmeans_result1$cluster) 
# Create centroid data frame
centroids <- as.data.frame(kmeans_result$centers)
data$Cluster <- as.factor(kmeans_result1$cluster)
# Plot with color-coded clusters and centroids
ggplot(data, aes(x = NASCAR, y = College, color = Cluster)) +
  geom_point(size = 4) +
  geom_point(data = centroids, aes(x = NASCAR, y = College),
             color = "black", size = 5, shape = 4, stroke = 1.5) +
  labs(title = "K-means Clustering with Centroids: NASCAR vs NCAA Ratings",
       subtitle = "Visual segmentation of consumer preferences",
       x = "NASCAR Rating",
       y = "NCAA College Football Rating") +
  scale_color_manual(values = c("red", "blue", "green")) +
  theme_minimal()