This lab gave me more knowledge on using AI tools to assist with writing an R Markdown file for data analysis. Initially, I asked the AI to give me a basic dataset , but the output was at times hard to understand. After refining my prompt, I was able to get a much more detailed and structured response that I can easily copy and paste it into my R markdown. This taught me how important it is to ask clear and specific questions when working with AI.
One of the biggest challenges I faced was understanding how to interpret the dataset variables and how to speak to the AI for It can give me the data I wanted. However, AI helped me explore different approaches like summary statistic and rewording my own words to be more understandable. This lab showed me that AI is a powerful assistant for coding and structuring analysis, but still needs a human to be clear and know what data they want.
data <- data.frame(
ID = 1:22,
CS_helpful = c(2,1,2,3,2,1,2,1,1,1,1,1,1,1,1,1,1,2,3,2,2,3),
Recommend = c(2,2,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,2),
Come_age = c(2,1,1,2,3,3,1,1,1,1,1,1,1,1,1,2,1,1,2,3,1,1),
All_Produ = c(2,1,1,4,5,2,2,2,2,1,2,2,1,2,4,2,2,1,3,1,2,2),
Profession = c(2,1,1,1,2,1,2,1,2,1,1,1,1,1,1,1,1,1,2,3,2,2),
Limitation = c(2,1,2,2,1,1,1,2,1,1,1,1,1,1,1,1,2,3,4,1,1,2),
Online_gr = c(2,2,3,3,2,1,2,1,2,3,2,3,1,3,2,3,2,3,1,3,3,3),
delivery = c(3,3,3,3,3,2,2,1,1,2,2,2,2,3,2,1,3,3,3,3,3,3),
Pick_up = c(4,3,2,2,2,1,2,2,3,2,2,3,2,3,2,3,5,3,1,1,4,3),
Find_item = c(1,1,1,2,1,1,1,2,1,1,1,1,1,3,2,1,2,1,3,1,1,2),
other_shop = c(2,2,3,2,3,4,1,4,1,1,3,3,1,1,5,5,5,2,2,4,2,1),
Gender = c(1,1,1,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,2,2,1,1),
Age = c(2,2,2,3,4,2,2,2,2,2,4,3,4,3,2,3,2,2,2,2,2,2),
Education = c(2,2,2,5,5,5,3,2,1,2,5,1,5,5,5,5,1,5,2,3,2,5)
)
head(data)
## ID CS_helpful Recommend Come_age All_Produ Profession Limitation Online_gr
## 1 1 2 2 2 2 2 2 2
## 2 2 1 2 1 1 1 1 2
## 3 3 2 1 1 1 1 2 3
## 4 4 3 3 2 4 1 2 3
## 5 5 2 1 3 5 2 1 2
## 6 6 1 1 3 2 1 1 1
## delivery Pick_up Find_item other_shop Gender Age Education
## 1 3 4 1 2 1 2 2
## 2 3 3 1 2 1 2 2
## 3 3 2 1 3 1 2 2
## 4 3 2 2 2 1 3 5
## 5 3 2 1 3 1 4 5
## 6 2 1 1 4 1 2 5
str(data)
## 'data.frame': 22 obs. of 15 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ CS_helpful: num 2 1 2 3 2 1 2 1 1 1 ...
## $ Recommend : num 2 2 1 3 1 1 1 1 1 1 ...
## $ Come_age : num 2 1 1 2 3 3 1 1 1 1 ...
## $ All_Produ : num 2 1 1 4 5 2 2 2 2 1 ...
## $ Profession: num 2 1 1 1 2 1 2 1 2 1 ...
## $ Limitation: num 2 1 2 2 1 1 1 2 1 1 ...
## $ Online_gr : num 2 2 3 3 2 1 2 1 2 3 ...
## $ delivery : num 3 3 3 3 3 2 2 1 1 2 ...
## $ Pick_up : num 4 3 2 2 2 1 2 2 3 2 ...
## $ Find_item : num 1 1 1 2 1 1 1 2 1 1 ...
## $ other_shop: num 2 2 3 2 3 4 1 4 1 1 ...
## $ Gender : num 1 1 1 1 1 1 1 1 2 1 ...
## $ Age : num 2 2 2 3 4 2 2 2 2 2 ...
## $ Education : num 2 2 2 5 5 5 3 2 1 2 ...
summary(data)
## ID CS_helpful Recommend Come_age
## Min. : 1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.: 6.25 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :11.50 Median :1.000 Median :1.000 Median :1.000
## Mean :11.50 Mean :1.591 Mean :1.318 Mean :1.455
## 3rd Qu.:16.75 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:2.000
## Max. :22.00 Max. :3.000 Max. :3.000 Max. :3.000
## All_Produ Profession Limitation Online_gr delivery
## Min. :1.000 Min. :1.000 Min. :1.0 Min. :1.000 Min. :1.000
## 1st Qu.:1.250 1st Qu.:1.000 1st Qu.:1.0 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :1.000 Median :1.0 Median :2.000 Median :3.000
## Mean :2.091 Mean :1.409 Mean :1.5 Mean :2.273 Mean :2.409
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.0 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :5.000 Max. :3.000 Max. :4.0 Max. :3.000 Max. :3.000
## Pick_up Find_item other_shop Gender Age
## Min. :1.0 Min. :1.000 Min. :1.000 Min. :1.000 Min. :2.000
## 1st Qu.:2.0 1st Qu.:1.000 1st Qu.:1.250 1st Qu.:1.000 1st Qu.:2.000
## Median :2.0 Median :1.000 Median :2.000 Median :1.000 Median :2.000
## Mean :2.5 Mean :1.409 Mean :2.591 Mean :1.182 Mean :2.455
## 3rd Qu.:3.0 3rd Qu.:2.000 3rd Qu.:3.750 3rd Qu.:1.000 3rd Qu.:3.000
## Max. :5.0 Max. :3.000 Max. :5.000 Max. :2.000 Max. :4.000
## Education
## Min. :1.000
## 1st Qu.:2.000
## Median :3.000
## Mean :3.318
## 3rd Qu.:5.000
## Max. :5.000
colSums(is.na(data))
## ID CS_helpful Recommend Come_age All_Produ Profession Limitation
## 0 0 0 0 0 0 0
## Online_gr delivery Pick_up Find_item other_shop Gender Age
## 0 0 0 0 0 0 0
## Education
## 0
hist(data$Age, col="blue", main="Age Distribution", xlab="Age")
barplot(table(data$Gender), col="green", main="Gender Distribution")
barplot(table(data$Online_gr), col="purple", main="Online Grocery Usage")
cor_matrix <- cor(data)
print(cor_matrix)
## ID CS_helpful Recommend Come_age All_Produ
## ID 1.00000000 0.1548278 -0.08509414 -0.1290804 -0.11705779
## CS_helpful 0.15482785 1.0000000 0.48809623 0.2714620 0.29345435
## Recommend -0.08509414 0.4880962 1.00000000 0.3808907 0.02515624
## Come_age -0.12908035 0.2714620 0.38089069 1.0000000 0.36875582
## All_Produ -0.11705779 0.2934543 0.02515624 0.3687558 1.00000000
## Profession 0.25465839 0.5144280 0.39143306 0.4269581 0.08951478
## Limitation 0.19664246 0.6067448 0.04594474 0.0000000 0.05576720
## Online_gr 0.23893106 0.2074960 0.29678764 -0.1451439 -0.14833305
## delivery 0.09489449 0.5903614 0.41510987 0.1676677 0.07197937
## Pick_up 0.11958327 -0.1602627 -0.10922064 -0.4460565 -0.13257075
## Find_item 0.31375090 0.2611412 0.01508223 -0.1055927 0.34782619
## other_shop 0.09671790 -0.3089838 -0.05968695 0.3259435 0.21734201
## Gender 0.24148723 0.1045592 0.13572976 0.1930220 -0.04118680
## Age -0.10922184 -0.1676677 -0.11789474 0.1269841 0.30821382
## Education 0.12265028 0.1129691 0.07943369 0.2673682 0.30902467
## Profession Limitation Online_gr delivery Pick_up
## ID 0.25465839 0.19664246 0.23893106 0.09489449 0.11958327
## CS_helpful 0.51442802 0.60674478 0.20749595 0.59036145 -0.16026270
## Recommend 0.39143306 0.04594474 0.29678764 0.41510987 -0.10922064
## Come_age 0.42695809 0.00000000 -0.14514393 0.16766768 -0.44605651
## All_Produ 0.08951478 0.05576720 -0.14833305 0.07197937 -0.13257075
## Profession 1.00000000 0.05030388 0.05734345 0.25471679 -0.11958327
## Limitation 0.05030388 1.00000000 -0.15480679 0.36404687 -0.02934836
## Online_gr 0.05734345 -0.15480679 1.00000000 0.29971638 0.30667450
## delivery 0.25471679 0.36404687 0.29971638 1.00000000 0.16026270
## Pick_up -0.11958327 -0.02934836 0.30667450 0.16026270 1.00000000
## Find_item -0.08256603 0.49037714 -0.13551538 0.22573223 -0.03532525
## other_shop -0.19082180 -0.06351171 -0.11262158 -0.19968341 -0.01677568
## Gender 0.48297445 0.15044516 -0.17149859 -0.10455917 -0.35764085
## Age -0.22837293 -0.32166527 -0.06111323 -0.09581010 -0.12744472
## Education -0.18955043 -0.12642451 0.04117613 0.04482901 -0.32916127
## Find_item other_shop Gender Age Education
## ID 0.31375090 0.09671790 0.24148723 -0.10922184 0.12265028
## CS_helpful 0.26114121 -0.30898381 0.10455917 -0.16766768 0.11296910
## Recommend 0.01508223 -0.05968695 0.13572976 -0.11789474 0.07943369
## Come_age -0.10559274 0.32594355 0.19302201 0.12698413 0.26736821
## All_Produ 0.34782619 0.21734201 -0.04118680 0.30821382 0.30902467
## Profession -0.08256603 -0.19082180 0.48297445 -0.22837293 -0.18955043
## Limitation 0.49037714 -0.06351171 0.15044516 -0.32166527 -0.12642451
## Online_gr -0.13551538 -0.11262158 -0.17149859 -0.06111323 0.04117613
## delivery 0.22573223 -0.19968341 -0.10455917 -0.09581010 0.04482901
## Pick_up -0.03532525 -0.01677568 -0.35764085 -0.12744472 -0.32916127
## Find_item 1.00000000 -0.01621583 0.06584864 -0.10559274 0.09288374
## other_shop -0.01621583 1.00000000 -0.03127100 -0.04178763 0.03847873
## Gender 0.06584864 -0.03127100 1.00000000 0.02969569 -0.16673337
## Age -0.10559274 -0.04178763 0.02969569 1.00000000 0.50265224
## Education 0.09288374 0.03847873 -0.16673337 0.50265224 1.00000000
set.seed(123)
scaled_data <- scale(data)
kmeans_result <- kmeans(scaled_data, centers = 3)
print(kmeans_result)
## K-means clustering with 3 clusters of sizes 7, 4, 11
##
## Cluster means:
## ID CS_helpful Recommend Come_age All_Produ Profession
## 1 -0.1429982 -0.6103089 -0.4922862 0.3516900 0.4511971 -0.45099444
## 2 0.7314910 1.5788425 1.4416954 0.7385489 0.3841273 1.00098765
## 3 -0.1749978 -0.1857462 -0.2109798 -0.4923660 -0.4268081 -0.07699905
## Limitation Online_gr delivery Pick_up Find_item other_shop
## 1 -0.44543540 -0.7278037 -0.7518298 -0.4941518 -0.1852153 0.6990291
## 2 0.93541435 0.2961992 0.8049001 -0.7412278 0.8870840 -0.2430399
## 3 -0.05669178 0.3554390 0.1857462 0.5839976 -0.2047117 -0.3564586
## Gender Age Education
## 1 -0.09869275 0.7385489 0.7622130
## 2 0.80599083 -0.2769559 0.2626278
## 3 -0.23028309 -0.3692745 -0.5805457
##
## Clustering vector:
## [1] 3 3 3 2 1 1 3 1 3 3 1 3 1 3 1 1 3 3 2 2 3 2
##
## Within cluster sum of squares by cluster:
## [1] 69.96574 59.02489 93.91363
## (between_SS / total_SS = 29.2 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
aggregate(data, by = list(cluster = kmeans_result$cluster), mean)
## cluster ID CS_helpful Recommend Come_age All_Produ Profession
## 1 1 10.57143 1.142857 1.000000 1.714286 2.571429 1.142857
## 2 2 16.25000 2.750000 2.250000 2.000000 2.500000 2.000000
## 3 3 10.36364 1.454545 1.181818 1.090909 1.636364 1.363636
## Limitation Online_gr delivery Pick_up Find_item other_shop Gender Age
## 1 1.142857 1.714286 1.857143 2.000000 1.285714 3.571429 1.142857 3.000000
## 2 2.250000 2.500000 3.000000 1.750000 2.000000 2.250000 1.500000 2.250000
## 3 1.454545 2.545455 2.545455 3.090909 1.272727 2.090909 1.090909 2.181818
## Education
## 1 4.571429
## 2 3.750000
## 3 2.363636