Reflection

This lab helped me understand how AI can be used as a support tool when writing code and analyzing data. My first prompt gave me a general R Markdown template, but it was not very specific to the dataset I was working with. After improving my prompt, I was able to generate a much more useful and structured analysis that included reflection, visualizations, and even clustering. This showed me that better prompts lead to better AI outputs, and that I need to be clear and detailed when asking for help.

One challenge I faced was that the AI did not initially know the structure of my dataset, so I had to adjust the code to match the actual variables. I also noticed that AI-generated code is not always perfect and needs to be checked carefully. However, there were many opportunities as well. AI helped me organize my work, think of useful analyses, and save time writing code. Overall, this lab showed me that AI is a helpful assistant, but I still need to understand the data and make decisions myself.

library(tidyverse)
library(janitor)
library(corrplot)
customers <- read.csv("customer_segmentation.csv") %>%
  clean_names()

head(customers)
##   id cs_helpful recommend come_again all_products profesionalism limitation
## 1  1          2         2          2            2              2          2
## 2  2          1         2          1            1              1          1
## 3  3          2         1          1            1              1          2
## 4  4          3         3          2            4              1          2
## 5  5          2         1          3            5              2          1
## 6  6          1         1          3            2              1          1
##   online_grocery delivery pick_up find_items other_shops gender age education
## 1              2        3       4          1           2      1   2         2
## 2              2        3       3          1           2      1   2         2
## 3              3        3       2          1           3      1   2         2
## 4              3        3       2          2           2      1   3         5
## 5              2        3       1          2           3      2   4         2
## 6              1        2       1          1           4      1   2         5
dim(customers)
## [1] 22 15
names(customers)
##  [1] "id"             "cs_helpful"     "recommend"      "come_again"    
##  [5] "all_products"   "profesionalism" "limitation"     "online_grocery"
##  [9] "delivery"       "pick_up"        "find_items"     "other_shops"   
## [13] "gender"         "age"            "education"
str(customers)
## 'data.frame':    22 obs. of  15 variables:
##  $ id            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ cs_helpful    : int  2 1 2 3 2 1 2 1 1 1 ...
##  $ recommend     : int  2 2 1 3 1 1 1 1 1 1 ...
##  $ come_again    : int  2 1 1 2 3 3 1 1 1 1 ...
##  $ all_products  : int  2 1 1 4 5 2 2 2 2 1 ...
##  $ profesionalism: int  2 1 1 1 2 1 2 1 2 1 ...
##  $ limitation    : int  2 1 2 2 1 1 1 2 1 1 ...
##  $ online_grocery: int  2 2 3 3 2 1 2 1 2 3 ...
##  $ delivery      : int  3 3 3 3 3 2 2 1 1 2 ...
##  $ pick_up       : int  4 3 2 2 1 1 2 2 3 2 ...
##  $ find_items    : int  1 1 1 2 2 1 1 2 1 1 ...
##  $ other_shops   : int  2 2 3 2 3 4 1 4 1 1 ...
##  $ gender        : int  1 1 1 1 2 1 1 1 2 2 ...
##  $ age           : int  2 2 2 3 4 2 2 2 2 2 ...
##  $ education     : int  2 2 2 5 2 5 3 2 1 2 ...
summary(customers)
##        id          cs_helpful      recommend       come_again   
##  Min.   : 1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 6.25   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :11.50   Median :1.000   Median :1.000   Median :1.000  
##  Mean   :11.50   Mean   :1.591   Mean   :1.318   Mean   :1.455  
##  3rd Qu.:16.75   3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :22.00   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##   all_products   profesionalism    limitation  online_grocery     delivery    
##  Min.   :1.000   Min.   :1.000   Min.   :1.0   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.250   1st Qu.:1.000   1st Qu.:1.0   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :1.000   Median :1.0   Median :2.000   Median :3.000  
##  Mean   :2.091   Mean   :1.409   Mean   :1.5   Mean   :2.273   Mean   :2.409  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.0   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :5.000   Max.   :3.000   Max.   :4.0   Max.   :3.000   Max.   :3.000  
##     pick_up        find_items     other_shops        gender     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.250   1st Qu.:1.000  
##  Median :2.000   Median :1.000   Median :2.000   Median :1.000  
##  Mean   :2.455   Mean   :1.455   Mean   :2.591   Mean   :1.273  
##  3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:3.750   3rd Qu.:1.750  
##  Max.   :5.000   Max.   :3.000   Max.   :5.000   Max.   :2.000  
##       age          education    
##  Min.   :2.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :2.500  
##  Mean   :2.455   Mean   :3.182  
##  3rd Qu.:3.000   3rd Qu.:5.000  
##  Max.   :4.000   Max.   :5.000
satisfaction_vars <- customers %>%
  select(cs_helpful, recommend, come_again)

summary(satisfaction_vars)
##    cs_helpful      recommend       come_again   
##  Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :1.000   Median :1.000   Median :1.000  
##  Mean   :1.591   Mean   :1.318   Mean   :1.455  
##  3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :3.000   Max.   :3.000   Max.   :3.000
satisfaction_long <- satisfaction_vars %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "value")

ggplot(satisfaction_long, aes(x = value)) +
  geom_bar() +
  facet_wrap(~variable) +
  labs(title = "Customer Satisfaction Ratings (1–3 Scale)")

service_vars <- customers %>%
  select(online_grocery, delivery, pick_up, find_items)

service_long <- service_vars %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "value")

ggplot(service_long, aes(x = value)) +
  geom_bar() +
  facet_wrap(~variable) +
  labs(title = "Service Usage Ratings")

ggplot(customers, aes(x = factor(gender))) +
  geom_bar() +
  labs(title = "Gender Distribution")

ggplot(customers, aes(x = factor(age))) +
  geom_bar() +
  labs(title = "Age Distribution")

ggplot(customers, aes(x = factor(education))) +
  geom_bar() +
  labs(title = "Education Levels")

numeric_data <- customers %>% select(where(is.numeric))

cor_matrix <- cor(numeric_data)

corrplot(cor_matrix, method = "color", tl.cex = 0.7)

set.seed(123)

scaled_data <- scale(numeric_data)

kmeans_result <- kmeans(scaled_data, centers = 3)

customers$cluster <- factor(kmeans_result$cluster)

table(customers$cluster)
## 
##  1  2  3 
## 10  4  8
ggplot(customers, aes(x = cs_helpful, y = recommend, color = cluster)) +
  geom_point(size = 3) +
  labs(title = "Customer Segments")