Reflection

This lab gave me more knowledge on using AI tools to assist with writing an R Markdown file for data analysis. Initially, I asked the AI to give me a basic dataset , but the output was at times hard to understand. After refining my prompt, I was able to get a much more detailed and structured response that I can easily copy and paste it into my R markdown. This taught me how important it is to ask clear and specific questions when working with AI.

One of the biggest challenges I faced was understanding how to interpret the dataset variables and how to speak to the AI for It can give me the data I wanted. However, AI helped me explore different approaches like summary statistic and rewording my own words to be more understandable. This lab showed me that AI is a powerful assistant for coding and structuring analysis, but still needs a human to be clear and know what data they want.

Create Dataset (Embedded)

data <- data.frame(
ID = 1:22,
CS_helpful = c(2,1,2,3,2,1,2,1,1,1,1,1,1,1,1,1,1,2,3,2,2,3),
Recommend = c(2,2,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,2),
Come_age = c(2,1,1,2,3,3,1,1,1,1,1,1,1,1,1,2,1,1,2,3,1,1),
All_Produ = c(2,1,1,4,5,2,2,2,2,1,2,2,1,2,4,2,2,1,3,1,2,2),
Profession = c(2,1,1,1,2,1,2,1,2,1,1,1,1,1,1,1,1,1,2,3,2,2),
Limitation = c(2,1,2,2,1,1,1,2,1,1,1,1,1,1,1,1,2,3,4,1,1,2),
Online_gr = c(2,2,3,3,2,1,2,1,2,3,2,3,1,3,2,3,2,3,1,3,3,3),
delivery = c(3,3,3,3,3,2,2,1,1,2,2,2,2,3,2,1,3,3,3,3,3,3),
Pick_up = c(4,3,2,2,2,1,2,2,3,2,2,3,2,3,2,3,5,3,1,1,4,3),
Find_item = c(1,1,1,2,1,1,1,2,1,1,1,1,1,3,2,1,2,1,3,1,1,2),
other_shop = c(2,2,3,2,3,4,1,4,1,1,3,3,1,1,5,5,5,2,2,4,2,1),
Gender = c(1,1,1,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,2,2,1,1),
Age = c(2,2,2,3,4,2,2,2,2,2,4,3,4,3,2,3,2,2,2,2,2,2),
Education = c(2,2,2,5,5,5,3,2,1,2,5,1,5,5,5,5,1,5,2,3,2,5)
)

head(data)
##   ID CS_helpful Recommend Come_age All_Produ Profession Limitation Online_gr
## 1  1          2         2        2         2          2          2         2
## 2  2          1         2        1         1          1          1         2
## 3  3          2         1        1         1          1          2         3
## 4  4          3         3        2         4          1          2         3
## 5  5          2         1        3         5          2          1         2
## 6  6          1         1        3         2          1          1         1
##   delivery Pick_up Find_item other_shop Gender Age Education
## 1        3       4         1          2      1   2         2
## 2        3       3         1          2      1   2         2
## 3        3       2         1          3      1   2         2
## 4        3       2         2          2      1   3         5
## 5        3       2         1          3      1   4         5
## 6        2       1         1          4      1   2         5

Data Overview

str(data)
## 'data.frame':    22 obs. of  15 variables:
##  $ ID        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CS_helpful: num  2 1 2 3 2 1 2 1 1 1 ...
##  $ Recommend : num  2 2 1 3 1 1 1 1 1 1 ...
##  $ Come_age  : num  2 1 1 2 3 3 1 1 1 1 ...
##  $ All_Produ : num  2 1 1 4 5 2 2 2 2 1 ...
##  $ Profession: num  2 1 1 1 2 1 2 1 2 1 ...
##  $ Limitation: num  2 1 2 2 1 1 1 2 1 1 ...
##  $ Online_gr : num  2 2 3 3 2 1 2 1 2 3 ...
##  $ delivery  : num  3 3 3 3 3 2 2 1 1 2 ...
##  $ Pick_up   : num  4 3 2 2 2 1 2 2 3 2 ...
##  $ Find_item : num  1 1 1 2 1 1 1 2 1 1 ...
##  $ other_shop: num  2 2 3 2 3 4 1 4 1 1 ...
##  $ Gender    : num  1 1 1 1 1 1 1 1 2 1 ...
##  $ Age       : num  2 2 2 3 4 2 2 2 2 2 ...
##  $ Education : num  2 2 2 5 5 5 3 2 1 2 ...
summary(data)
##        ID          CS_helpful      Recommend        Come_age    
##  Min.   : 1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 6.25   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :11.50   Median :1.000   Median :1.000   Median :1.000  
##  Mean   :11.50   Mean   :1.591   Mean   :1.318   Mean   :1.455  
##  3rd Qu.:16.75   3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :22.00   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##    All_Produ       Profession      Limitation    Online_gr        delivery    
##  Min.   :1.000   Min.   :1.000   Min.   :1.0   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.250   1st Qu.:1.000   1st Qu.:1.0   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :1.000   Median :1.0   Median :2.000   Median :3.000  
##  Mean   :2.091   Mean   :1.409   Mean   :1.5   Mean   :2.273   Mean   :2.409  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.0   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :5.000   Max.   :3.000   Max.   :4.0   Max.   :3.000   Max.   :3.000  
##     Pick_up      Find_item       other_shop        Gender           Age       
##  Min.   :1.0   Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :2.000  
##  1st Qu.:2.0   1st Qu.:1.000   1st Qu.:1.250   1st Qu.:1.000   1st Qu.:2.000  
##  Median :2.0   Median :1.000   Median :2.000   Median :1.000   Median :2.000  
##  Mean   :2.5   Mean   :1.409   Mean   :2.591   Mean   :1.182   Mean   :2.455  
##  3rd Qu.:3.0   3rd Qu.:2.000   3rd Qu.:3.750   3rd Qu.:1.000   3rd Qu.:3.000  
##  Max.   :5.0   Max.   :3.000   Max.   :5.000   Max.   :2.000   Max.   :4.000  
##    Education    
##  Min.   :1.000  
##  1st Qu.:2.000  
##  Median :3.000  
##  Mean   :3.318  
##  3rd Qu.:5.000  
##  Max.   :5.000

Missing Values

colSums(is.na(data))
##         ID CS_helpful  Recommend   Come_age  All_Produ Profession Limitation 
##          0          0          0          0          0          0          0 
##  Online_gr   delivery    Pick_up  Find_item other_shop     Gender        Age 
##          0          0          0          0          0          0          0 
##  Education 
##          0

Exploratory Data Analysis (Base R)

Age Distribution

hist(data$Age, col="blue", main="Age Distribution", xlab="Age")

Gender Distribution

barplot(table(data$Gender), col="green", main="Gender Distribution")

Online Grocery Usage

barplot(table(data$Online_gr), col="purple", main="Online Grocery Usage")

Correlation Analysis

cor_matrix <- cor(data)
print(cor_matrix)
##                     ID CS_helpful   Recommend   Come_age   All_Produ
## ID          1.00000000  0.1548278 -0.08509414 -0.1290804 -0.11705779
## CS_helpful  0.15482785  1.0000000  0.48809623  0.2714620  0.29345435
## Recommend  -0.08509414  0.4880962  1.00000000  0.3808907  0.02515624
## Come_age   -0.12908035  0.2714620  0.38089069  1.0000000  0.36875582
## All_Produ  -0.11705779  0.2934543  0.02515624  0.3687558  1.00000000
## Profession  0.25465839  0.5144280  0.39143306  0.4269581  0.08951478
## Limitation  0.19664246  0.6067448  0.04594474  0.0000000  0.05576720
## Online_gr   0.23893106  0.2074960  0.29678764 -0.1451439 -0.14833305
## delivery    0.09489449  0.5903614  0.41510987  0.1676677  0.07197937
## Pick_up     0.11958327 -0.1602627 -0.10922064 -0.4460565 -0.13257075
## Find_item   0.31375090  0.2611412  0.01508223 -0.1055927  0.34782619
## other_shop  0.09671790 -0.3089838 -0.05968695  0.3259435  0.21734201
## Gender      0.24148723  0.1045592  0.13572976  0.1930220 -0.04118680
## Age        -0.10922184 -0.1676677 -0.11789474  0.1269841  0.30821382
## Education   0.12265028  0.1129691  0.07943369  0.2673682  0.30902467
##             Profession  Limitation   Online_gr    delivery     Pick_up
## ID          0.25465839  0.19664246  0.23893106  0.09489449  0.11958327
## CS_helpful  0.51442802  0.60674478  0.20749595  0.59036145 -0.16026270
## Recommend   0.39143306  0.04594474  0.29678764  0.41510987 -0.10922064
## Come_age    0.42695809  0.00000000 -0.14514393  0.16766768 -0.44605651
## All_Produ   0.08951478  0.05576720 -0.14833305  0.07197937 -0.13257075
## Profession  1.00000000  0.05030388  0.05734345  0.25471679 -0.11958327
## Limitation  0.05030388  1.00000000 -0.15480679  0.36404687 -0.02934836
## Online_gr   0.05734345 -0.15480679  1.00000000  0.29971638  0.30667450
## delivery    0.25471679  0.36404687  0.29971638  1.00000000  0.16026270
## Pick_up    -0.11958327 -0.02934836  0.30667450  0.16026270  1.00000000
## Find_item  -0.08256603  0.49037714 -0.13551538  0.22573223 -0.03532525
## other_shop -0.19082180 -0.06351171 -0.11262158 -0.19968341 -0.01677568
## Gender      0.48297445  0.15044516 -0.17149859 -0.10455917 -0.35764085
## Age        -0.22837293 -0.32166527 -0.06111323 -0.09581010 -0.12744472
## Education  -0.18955043 -0.12642451  0.04117613  0.04482901 -0.32916127
##              Find_item  other_shop      Gender         Age   Education
## ID          0.31375090  0.09671790  0.24148723 -0.10922184  0.12265028
## CS_helpful  0.26114121 -0.30898381  0.10455917 -0.16766768  0.11296910
## Recommend   0.01508223 -0.05968695  0.13572976 -0.11789474  0.07943369
## Come_age   -0.10559274  0.32594355  0.19302201  0.12698413  0.26736821
## All_Produ   0.34782619  0.21734201 -0.04118680  0.30821382  0.30902467
## Profession -0.08256603 -0.19082180  0.48297445 -0.22837293 -0.18955043
## Limitation  0.49037714 -0.06351171  0.15044516 -0.32166527 -0.12642451
## Online_gr  -0.13551538 -0.11262158 -0.17149859 -0.06111323  0.04117613
## delivery    0.22573223 -0.19968341 -0.10455917 -0.09581010  0.04482901
## Pick_up    -0.03532525 -0.01677568 -0.35764085 -0.12744472 -0.32916127
## Find_item   1.00000000 -0.01621583  0.06584864 -0.10559274  0.09288374
## other_shop -0.01621583  1.00000000 -0.03127100 -0.04178763  0.03847873
## Gender      0.06584864 -0.03127100  1.00000000  0.02969569 -0.16673337
## Age        -0.10559274 -0.04178763  0.02969569  1.00000000  0.50265224
## Education   0.09288374  0.03847873 -0.16673337  0.50265224  1.00000000

Clustering (K-means)

set.seed(123)

scaled_data <- scale(data)

kmeans_result <- kmeans(scaled_data, centers = 3)

print(kmeans_result)
## K-means clustering with 3 clusters of sizes 7, 4, 11
## 
## Cluster means:
##           ID CS_helpful  Recommend   Come_age  All_Produ  Profession
## 1 -0.1429982 -0.6103089 -0.4922862  0.3516900  0.4511971 -0.45099444
## 2  0.7314910  1.5788425  1.4416954  0.7385489  0.3841273  1.00098765
## 3 -0.1749978 -0.1857462 -0.2109798 -0.4923660 -0.4268081 -0.07699905
##    Limitation  Online_gr   delivery    Pick_up  Find_item other_shop
## 1 -0.44543540 -0.7278037 -0.7518298 -0.4941518 -0.1852153  0.6990291
## 2  0.93541435  0.2961992  0.8049001 -0.7412278  0.8870840 -0.2430399
## 3 -0.05669178  0.3554390  0.1857462  0.5839976 -0.2047117 -0.3564586
##        Gender        Age  Education
## 1 -0.09869275  0.7385489  0.7622130
## 2  0.80599083 -0.2769559  0.2626278
## 3 -0.23028309 -0.3692745 -0.5805457
## 
## Clustering vector:
##  [1] 3 3 3 2 1 1 3 1 3 3 1 3 1 3 1 1 3 3 2 2 3 2
## 
## Within cluster sum of squares by cluster:
## [1] 69.96574 59.02489 93.91363
##  (between_SS / total_SS =  29.2 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Cluster Insights

aggregate(data, by = list(cluster = kmeans_result$cluster), mean)
##   cluster       ID CS_helpful Recommend Come_age All_Produ Profession
## 1       1 10.57143   1.142857  1.000000 1.714286  2.571429   1.142857
## 2       2 16.25000   2.750000  2.250000 2.000000  2.500000   2.000000
## 3       3 10.36364   1.454545  1.181818 1.090909  1.636364   1.363636
##   Limitation Online_gr delivery  Pick_up Find_item other_shop   Gender      Age
## 1   1.142857  1.714286 1.857143 2.000000  1.285714   3.571429 1.142857 3.000000
## 2   2.250000  2.500000 3.000000 1.750000  2.000000   2.250000 1.500000 2.250000
## 3   1.454545  2.545455 2.545455 3.090909  1.272727   2.090909 1.090909 2.181818
##   Education
## 1  4.571429
## 2  3.750000
## 3  2.363636