1. Membaca Data

# Membaca dataset
data <- read.csv("college_students_habits_1M.csv")

# Melihat struktur data
str(data)
## 'data.frame':    1000000 obs. of  42 variables:
##  $ study_hours             : num  3.015 3.665 2.704 3.445 0.193 ...
##  $ attendance              : num  67 73.3 72.3 74.8 55.1 ...
##  $ assignment_completion   : num  51.6 69.7 92.8 85.2 64.5 ...
##  $ midterm_score           : num  57.2 57.6 44.6 52 32.8 ...
##  $ final_score             : num  61.7 62.1 46.5 55.4 32.4 ...
##  $ project_score           : num  65.4 65.7 53.6 60.6 42.6 ...
##  $ backlogs                : int  4 3 2 2 5 3 1 0 3 2 ...
##  $ sleep_hours             : num  5.99 6.95 6.7 6.5 6.55 ...
##  $ stress                  : num  4.29 1.84 3.86 5.07 1 ...
##  $ anxiety                 : num  58.1 41.9 56.6 65.2 30.7 ...
##  $ depression              : num  53.1 36.9 51.6 60.2 25.7 ...
##  $ motivation              : num  4.5 6.25 7.3 6.51 6.28 ...
##  $ concentration           : num  5.27 6.37 4.53 4.46 5.47 ...
##  $ time_management         : num  4.77 5.98 7.52 7.01 5.63 ...
##  $ self_discipline         : num  4.77 5.98 7.52 7.01 5.63 ...
##  $ social_media_hours      : num  4.03 2.14 2 2.09 4 ...
##  $ gaming_hours            : num  2.69 1.42 1.33 1.4 2.67 ...
##  $ netflix_hours           : num  2.69 1.42 1.33 1.4 2.67 ...
##  $ screen_time             : num  9.4 4.98 4.66 4.88 9.33 ...
##  $ physical_activity       : num  3.31 4.58 4.67 4.6 3.33 ...
##  $ junk_food_frequency     : num  3.69 2.42 2.33 2.4 3.67 ...
##  $ caffeine_mg             : num  213 151 169 183 180 ...
##  $ late_night_frequency    : num  2.69 1.42 1.33 1.4 2.67 ...
##  $ procrastination_score   : num  6.91 4.44 2.81 3.38 6.03 ...
##  $ family_income           : num  20108 47697 35156 23898 22091 ...
##  $ parental_education_level: int  3 4 2 2 2 2 4 2 2 3 ...
##  $ internet_quality        : num  5.05 6.59 4.73 4.41 4.11 ...
##  $ library_visits          : num  1.814 1.837 0.971 1.469 0.188 ...
##  $ online_courses_completed: int  0 0 1 1 0 0 2 4 0 1 ...
##  $ part_time_hours         : num  5.87 4.59 5.36 4.93 7.48 ...
##  $ peer_study_group        : int  0 0 0 0 0 0 1 1 0 0 ...
##  $ relationship_status     : int  1 0 0 0 1 0 0 0 1 0 ...
##  $ hostel_student          : int  0 0 1 1 1 1 0 1 1 0 ...
##  $ extracurricular_hours   : num  1.96 3.15 5.55 4.54 4.45 ...
##  $ phone_unlocks_per_day   : num  73.7 48.5 46.6 47.9 73.3 ...
##  $ previous_gpa            : num  5.72 5.76 4.46 5.2 3.28 ...
##  $ class_participation     : num  3.59 4.82 5.49 5.48 2.82 ...
##  $ weekly_study_sessions   : num  2.81 2.84 1.97 2.47 1.19 ...
##  $ group_study_hours       : num  1.814 1.837 0.971 1.469 0.188 ...
##  $ financial_stress        : num  5.49 2.88 5.7 6.6 4.6 ...
##  $ gpa                     : num  0.547 0.707 0.868 0.729 0.371 ...
##  $ performance_level       : chr  "Low" "Low" "Low" "Low" ...

2. Mengambil Variabel Numerik

# Memilih hanya variabel numerik
data_num <- data[, sapply(data, is.numeric)]

3. Menghitung Parameter Statistik

# Menghitung rata-rata dan standar deviasi
mean_values <- apply(data_num, 2, mean, na.rm = TRUE)
sd_values <- apply(data_num, 2, sd, na.rm = TRUE)

mean_values
##              study_hours               attendance    assignment_completion 
##             4.042036e+00             7.487637e+01             6.989670e+01 
##            midterm_score              final_score            project_score 
##             6.001131e+01             6.485611e+01             6.797272e+01 
##                 backlogs              sleep_hours                   stress 
##             2.550791e+00             6.499550e+00             3.211822e+00 
##                  anxiety               depression               motivation 
##             4.998615e+01             4.499145e+01             6.001923e+00 
##            concentration          time_management          self_discipline 
##             6.002853e+00             6.001461e+00             6.001461e+00 
##       social_media_hours             gaming_hours            netflix_hours 
##             3.014901e+00             2.009934e+00             2.009934e+00 
##              screen_time        physical_activity      junk_food_frequency 
##             7.034769e+00             3.998554e+00             3.001828e+00 
##              caffeine_mg     late_night_frequency    procrastination_score 
##             1.821618e+02             2.009934e+00             5.003990e+00 
##            family_income parental_education_level         internet_quality 
##             2.546035e+04             2.501398e+00             5.001144e+00 
##           library_visits online_courses_completed          part_time_hours 
##             2.010449e+00             9.633180e-01             4.999938e+00 
##         peer_study_group      relationship_status           hostel_student 
##             5.007440e-01             3.092280e-01             4.998580e-01 
##    extracurricular_hours    phone_unlocks_per_day             previous_gpa 
##             2.999902e+00             6.006881e+01             6.001131e+00 
##      class_participation    weekly_study_sessions        group_study_hours 
##             5.008589e+00             3.002335e+00             2.511193e+00 
##         financial_stress                      gpa 
##             4.998476e+00             8.311957e-01
sd_values
##              study_hours               attendance    assignment_completion 
##             2.212739e+00             1.289286e+01             1.468279e+01 
##            midterm_score              final_score            project_score 
##             1.496727e+01             1.760637e+01             1.387704e+01 
##                 backlogs              sleep_hours                   stress 
##             1.650993e+00             4.356498e-01             1.756919e+00 
##                  anxiety               depression               motivation 
##             1.499979e+01             1.499120e+01             9.737576e-01 
##            concentration          time_management          self_discipline 
##             1.096555e+00             9.992839e-01             9.992839e-01 
##       social_media_hours             gaming_hours            netflix_hours 
##             1.469558e+00             9.797051e-01             9.797051e-01 
##              screen_time        physical_activity      junk_food_frequency 
##             3.428968e+00             9.997763e-01             9.985228e-01 
##              caffeine_mg     late_night_frequency    procrastination_score 
##             2.745490e+01             9.797051e-01             1.657632e+00 
##            family_income parental_education_level         internet_quality 
##             1.478761e+04             1.032773e+00             1.000158e+00 
##           library_visits online_courses_completed          part_time_hours 
##             9.813614e-01             1.207893e+00             1.611714e+00 
##         peer_study_group      relationship_status           hostel_student 
##             4.999997e-01             4.621756e-01             5.000002e-01 
##    extracurricular_hours    phone_unlocks_per_day             previous_gpa 
##             9.985150e-01             1.988418e+01             1.496727e+00 
##      class_participation    weekly_study_sessions        group_study_hours 
##             1.713582e+00             1.000212e+00             1.415504e+00 
##         financial_stress                      gpa 
##             1.340340e+00             2.952901e-01

Membangkitkan Data Baru

# ingin membuat 1000 data baru
set.seed(123)

n <- 1000

data_baru <- data.frame(
  study_hours = rnorm(n, mean_values["study_hours"], sd_values["study_hours"]),
  attendance = rnorm(n, mean_values["attendance"], sd_values["attendance"]),
  sleep_hours = rnorm(n, mean_values["sleep_hours"], sd_values["sleep_hours"]),
  stress = rnorm(n, mean_values["stress"], sd_values["stress"]),
  motivation = rnorm(n, mean_values["motivation"], sd_values["motivation"]),
  social_media_hours = rnorm(n, mean_values["social_media_hours"], sd_values["social_media_hours"]),
  physical_activity = rnorm(n, mean_values["physical_activity"], sd_values["physical_activity"])
)

5. Melihat Data Hasil Bangkitan

head(data_baru)
##   study_hours attendance sleep_hours    stress motivation social_media_hours
## 1    2.801850   62.03767    6.276670 2.9477439   6.193315           2.288684
## 2    3.532713   61.46837    6.602772 2.6359793   6.634975           4.671965
## 3    7.491050   74.64455    6.263607 0.6675131   6.655318           1.329392
## 4    4.198053   73.17225    7.030707 1.9867495   4.751464           5.191343
## 5    4.328116   42.00804    6.575413 7.7771585   4.028983           4.361297
## 6    7.837027   88.29234    6.231509 3.1460868   8.149376           3.507395
##   physical_activity
## 1          3.299482
## 2          4.994782
## 3          3.305963
## 4          3.895094
## 5          4.602285
## 6          3.390645
summary(data_baru)
##   study_hours       attendance      sleep_hours        stress      
##  Min.   :-2.175   Min.   : 35.58   Min.   :5.259   Min.   :-2.286  
##  1st Qu.: 2.652   1st Qu.: 66.45   1st Qu.:6.214   1st Qu.: 2.088  
##  Median : 4.062   Median : 75.58   Median :6.478   Median : 3.197  
##  Mean   : 4.078   Mean   : 75.42   Mean   :6.491   Mean   : 3.196  
##  3rd Qu.: 5.513   3rd Qu.: 84.59   3rd Qu.:6.779   3rd Qu.: 4.355  
##  Max.   :11.214   Max.   :118.59   Max.   :7.990   Max.   : 8.298  
##    motivation    social_media_hours physical_activity
##  Min.   :2.947   Min.   :-1.779     Min.   :0.5855   
##  1st Qu.:5.314   1st Qu.: 2.103     1st Qu.:3.3112   
##  Median :5.970   Median : 3.057     Median :3.9601   
##  Mean   :5.971   Mean   : 3.066     Mean   :3.9657   
##  3rd Qu.:6.621   3rd Qu.: 4.073     3rd Qu.:4.6840   
##  Max.   :9.357   Max.   : 8.475     Max.   :7.2737

Simpan Data Bangkitan

write.csv(data_baru, "data_bangkitan_mahasiswa.csv", row.names = FALSE)