Rpubs ini berisi kode-kode pada Modul 1:Implementasi Principal Component Analysis (PCA) dan Factor Analysis (FA) pada Pola Perilaku Digital dan Indikator Kesehatan Mental Pengguna Media Sosial. Dataset yang digunakan berasal dari Kaggle: https://www.kaggle.com/datasets/souvikahmed071/social-media-and-mental-health/data
{install.packages("readr")}
library(readr)
Library readr digunakan untuk membaca data yang sudah diimport.
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Library dpylr berguna untuk mentransformasi data terstruktur secara konsisten.
install.packages("psych")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(psych)
Library psych berguna untuk analisis faktor dengan rotasi.
library(readr)
smmh <- read_csv("smmh.csv")
## Rows: 481 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Timestamp, 2. Gender, 3. Relationship Status, 4. Occupation Status...
## dbl (13): 1. What is your age?, 9. How often do you find yourself using Soci...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(smmh)
## # A tibble: 481 × 21
## Timestamp `1. What is your age?` `2. Gender` 3. Relationship Statu…¹
## <chr> <dbl> <chr> <chr>
## 1 4/18/2022 19:18:47 21 Male In a relationship
## 2 4/18/2022 19:19:28 21 Female Single
## 3 4/18/2022 19:25:59 21 Female Single
## 4 4/18/2022 19:29:43 21 Female Single
## 5 4/18/2022 19:33:31 21 Female Single
## 6 4/18/2022 19:33:48 22 Female Single
## 7 4/18/2022 19:36:12 21 Female Married
## 8 4/18/2022 19:36:17 21 Female In a relationship
## 9 4/18/2022 19:39:36 21 Female In a relationship
## 10 4/18/2022 19:41:34 20 Male Single
## # ℹ 471 more rows
## # ℹ abbreviated name: ¹​`3. Relationship Status`
## # ℹ 17 more variables: `4. Occupation Status` <chr>,
## # `5. What type of organizations are you affiliated with?` <chr>,
## # `6. Do you use social media?` <chr>,
## # `7. What social media platforms do you commonly use?` <chr>,
## # `8. What is the average time you spend on social media every day?` <chr>, …
smmh_baru <- mutate(smmh,duration_time = case_when(`8. What is the average time you spend on social media every day?` == "Less than 1 hour" ~ 0.5,`8. What is the average time you spend on social media every day?` == "Between 1 and 2 hours" ~ 1.5,`8. What is the average time you spend on social media every day?` == "Between 2 and 3 hours" ~ 2.5,`8. What is the average time you spend on social media every day?` == "Between 3 and 4 hours" ~ 3.5,`8. What is the average time you spend on social media every day?` == "Between 4 and 5 hours" ~ 4.5,`8. What is the average time you spend on social media every day?` == "More than 5 hours" ~ 6,TRUE ~ 3))
Setelah kode tersebut di jalankan, akan terbentuk kolom baru bernama duration_time yang berisi durasi penggunaan media sosial responden dalam bentuk numerik.
smmh_baru$AgeGroup <- cut(smmh_baru$`1. What is your age?`,breaks = c(0, 18, 25, 35, 100),labels = c("<18", "18-25", "26-35",">35"))
age_table <- table(smmh_baru$AgeGroup)
barplot(age_table,
main = "Distribusi Kelompok Usia",col = "darkseagreen", xlab = "Kelompok Usia", ylab = "Jumlah")
Untuk variabel Age dikelompokkan menjadi 4 yaitu rentang usia kurang
dari 18 tahun (Kelompok 1), rentang usia 18-25 tahun (Kelompok 2),
rentang usia 26-35 tahun (Kelompok 3), rentang usia lebih dari 35 tahun
(Kelompok 4).
smmh_baru <- smmh_baru %>%
mutate(Gender_clean = case_when(
`2. Gender` == "Male" ~ "Male",
`2. Gender` == "Female" ~ "Female",
TRUE ~ "Lainnya" ))
table(smmh_baru$Gender_clean)
##
## Female Lainnya Male
## 263 7 211
Kode di atas berguna untuk menstandarisasi jawaban responden tentang jenis kelamin. Jika terdapat jawaban selain Male dan Female, maka akan dikelompokkan ke kategori lainnya.
gender_freq <- table(smmh_baru$Gender_clean)
pie(gender_freq,main = "Distribusi Jenis Kelamin Responden", col = c("pink","gray", "lightblue"),labels = paste(names(gender_freq), "\n", round(prop.table(gender_freq)*100, 1), "%"))
duration_table<- table(smmh_baru$duration_time)
barplot(duration_table, main="Durasi Penggunaan Media Sosial per Hari",col="gray",xlab="Kategori Durasi", ylab="Jumlah Responden")
smmh_baru_df<-select(smmh_baru,Age=2,Duration=duration_time,NoPurpose = 10,SocialDistraction = 11,Restless = 12,EasyDistracted = 13,Worry = 14,ConcDifficult = 15,SelfCompare = 16,GeneralCompare = 17,Validation = 18,Depressed = 19,DailyFluct = 20,SleepDifficult = 21)
print(smmh_baru_df)
## # A tibble: 481 × 14
## Age Duration NoPurpose SocialDistraction Restless EasyDistracted Worry
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 21 2.5 5 3 2 5 2
## 2 21 6 4 3 2 4 5
## 3 21 3.5 3 2 1 2 5
## 4 21 6 4 2 1 3 5
## 5 21 2.5 3 5 4 4 5
## 6 22 2.5 4 4 2 3 4
## 7 21 3.5 4 3 2 2 4
## 8 21 6 5 2 3 3 3
## 9 21 6 5 2 3 3 1
## 10 20 3 1 1 1 1 1
## # ℹ 471 more rows
## # ℹ 7 more variables: ConcDifficult <dbl>, SelfCompare <dbl>,
## # GeneralCompare <dbl>, Validation <dbl>, Depressed <dbl>, DailyFluct <dbl>,
## # SleepDifficult <dbl>
statistika_deskriptif<- describe(smmh_baru_df)
print(statistika_deskriptif[, c("mean", "median", "sd", "min", "max")])
## mean median sd min max
## Age 26.14 22.0 9.92 13.0 91
## Duration 3.71 3.5 1.56 1.5 6
## NoPurpose 3.55 4.0 1.10 1.0 5
## SocialDistraction 3.32 3.0 1.33 1.0 5
## Restless 2.59 2.0 1.26 1.0 5
## EasyDistracted 3.35 3.0 1.18 1.0 5
## Worry 3.56 4.0 1.28 1.0 5
## ConcDifficult 3.25 3.0 1.35 1.0 5
## SelfCompare 2.83 3.0 1.41 1.0 5
## GeneralCompare 2.78 3.0 1.06 1.0 5
## Validation 2.46 2.0 1.25 1.0 5
## Depressed 3.26 3.0 1.31 1.0 5
## DailyFluct 3.17 3.0 1.26 1.0 5
## SleepDifficult 3.20 3.0 1.46 1.0 5
uji_kmo<-KMO(cor(smmh_baru_df))
print(uji_kmo)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = cor(smmh_baru_df))
## Overall MSA = 0.89
## MSA for each item =
## Age Duration NoPurpose SocialDistraction
## 0.86 0.87 0.88 0.89
## Restless EasyDistracted Worry ConcDifficult
## 0.90 0.87 0.90 0.90
## SelfCompare GeneralCompare Validation Depressed
## 0.86 0.54 0.81 0.90
## DailyFluct SleepDifficult
## 0.94 0.92
uji_bartlett<-cortest.bartlett(cor(smmh_baru_df), n=nrow(smmh_baru_df))
print(uji_bartlett)
## $chisq
## [1] 2109.073
##
## $p.value
## [1] 0
##
## $df
## [1] 91
pca_awal<-principal(smmh_baru_df,nfaktor=ncol(smmh_baru_df),rotate = "none")
print(pca_awal$values)
## [1] 5.0240494 1.2469209 1.1594737 0.9842560 0.9358306 0.7816929 0.7074656
## [8] 0.6117743 0.5499719 0.4912978 0.4505729 0.4103346 0.3483129 0.2980466
plot(pca_awal$values, type = "b",main = "Scree Plot",xlab = "Jumlah Komponen", ylab = "Eigenvalues")
abline(h = 1, col = "red", lty = 2)
analisis_pca<-prcomp(smmh_baru_df, center=TRUE,scale=TRUE)
print(analisis_pca)
## Standard deviations (1, .., p=14):
## [1] 2.2414391 1.1166561 1.0767886 0.9920968 0.9673834 0.8841340 0.8411097
## [8] 0.7821600 0.7416009 0.7009264 0.6712473 0.6405736 0.5901804 0.5459364
##
## Rotation (n x k) = (14 x 14):
## PC1 PC2 PC3 PC4
## Age 0.17924468 0.410682095 0.26283658 -0.37140336
## Duration -0.20145938 -0.378847497 0.13442345 0.18892174
## NoPurpose -0.24791598 -0.302342778 0.27260558 0.06791104
## SocialDistraction -0.31921842 -0.118286265 0.26448902 -0.25512215
## Restless -0.29367548 -0.022768151 0.35250804 -0.27033701
## EasyDistracted -0.33552390 -0.115057565 0.06336205 -0.35443700
## Worry -0.31290859 0.072511374 -0.30749292 0.09350089
## ConcDifficult -0.34852111 0.025306782 -0.07352840 -0.16648502
## SelfCompare -0.24779590 0.465651206 -0.15510033 -0.04891722
## GeneralCompare -0.04719736 0.161030431 0.59850248 0.58316679
## Validation -0.18370906 0.552782356 0.18630858 0.13671087
## Depressed -0.32126116 0.093347770 -0.27837396 0.20974331
## DailyFluct -0.31341254 0.075194035 -0.08641578 0.01713745
## SleepDifficult -0.21657987 -0.007674802 -0.19598999 0.34066012
## PC5 PC6 PC7 PC8 PC9
## Age -0.2764864754 0.26624842 0.54546066 0.18357779 -0.03853742
## Duration 0.5158049178 0.05024540 0.53898955 -0.20635817 0.19908875
## NoPurpose 0.0007780264 0.55652120 -0.16559662 0.42768602 -0.32205619
## SocialDistraction -0.0453933803 0.06876903 -0.24553213 -0.23202554 -0.24288820
## Restless 0.0980012223 -0.13112007 0.23059180 -0.02630167 0.06784161
## EasyDistracted -0.2217196891 -0.17940494 -0.07881116 -0.12129729 0.07310675
## Worry -0.0338385920 -0.27039658 0.21625674 0.18429917 -0.37384128
## ConcDifficult -0.2224176355 -0.28148408 -0.03529965 -0.14523182 0.06831081
## SelfCompare 0.3273586178 0.06641885 0.15706826 0.01494808 -0.13383500
## GeneralCompare -0.2512924228 -0.38834156 0.04327693 0.11302234 -0.02362436
## Validation 0.3220512235 0.21443573 -0.35186467 -0.29842640 0.05807038
## Depressed -0.0081588243 -0.02154396 0.07741182 0.27128661 -0.22540685
## DailyFluct -0.0608890564 0.10225138 -0.11563033 0.46390767 0.74843281
## SleepDifficult -0.5179365365 0.44530120 0.21712396 -0.47501420 0.09482045
## PC10 PC11 PC12 PC13
## Age -0.293032292 -0.14613294 0.047263923 0.01242570
## Duration -0.239681526 -0.25051716 0.008902317 -0.02211111
## NoPurpose 0.057466470 -0.04099619 -0.288019318 -0.23679990
## SocialDistraction -0.008233724 -0.23800940 0.432738071 0.49777231
## Restless 0.273796980 0.74448303 0.016572680 -0.02467883
## EasyDistracted -0.087847269 -0.22763685 -0.151186155 -0.07794889
## Worry -0.216409102 0.09150589 -0.465648093 0.46792128
## ConcDifficult -0.109111897 -0.17550749 -0.143871062 -0.56625197
## SelfCompare 0.648301968 -0.33403628 0.006454678 -0.06958529
## GeneralCompare 0.111634850 -0.16732127 0.024691708 0.01377240
## Validation -0.421794246 0.15438870 -0.184721833 -0.03659063
## Depressed -0.271442437 0.16986585 0.655958058 -0.26060329
## DailyFluct 0.005765312 -0.06129021 0.028128608 0.26513267
## SleepDifficult 0.165867461 0.12760884 -0.031062469 0.06817968
## PC14
## Age -0.050306524
## Duration -0.006655278
## NoPurpose -0.020351677
## SocialDistraction -0.278215976
## Restless -0.022919115
## EasyDistracted 0.740581930
## Worry -0.088325965
## ConcDifficult -0.552279692
## SelfCompare 0.064628717
## GeneralCompare 0.055970344
## Validation 0.064309462
## Depressed 0.186908336
## DailyFluct -0.102886281
## SleepDifficult 0.028980254
FA<- fa(smmh_baru_df, nfactors = 3, rotate = 'varimax',fm='minres')
print(FA$loadings, cutoff=0.4)
##
## Loadings:
## MR1 MR3 MR2
## Age -0.402
## Duration
## NoPurpose 0.499
## SocialDistraction 0.704
## Restless 0.603
## EasyDistracted 0.422 0.619
## Worry 0.693
## ConcDifficult 0.550 0.462
## SelfCompare 0.584
## GeneralCompare
## Validation 0.600
## Depressed 0.714
## DailyFluct 0.501
## SleepDifficult 0.416
##
## MR1 MR3 MR2
## SS loadings 2.463 2.119 1.135
## Proportion Var 0.176 0.151 0.081
## Cumulative Var 0.176 0.327 0.408
fa.diagram(FA)