library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.5.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
df <- read.csv("df_mahasiswa.csv")
df
## X id_mahasiswa jenis_kelamin jam_belajar_per_hari frekuensi_login_lms
## 1 1 MHS001 L 4 1
## 2 2 MHS002 P 4 2
## 3 3 MHS003 P 2 6
## 4 4 MHS004 P 5 3
## 5 5 MHS005 L 3 2
## 6 6 MHS005 L 3 7
## 7 7 MHS005 P 1 7
## 8 8 MHS008 L 3 4
## 9 9 MHS009 P 2 6
## 10 10 MHS010 L 1 5
## 11 11 MHS011 L 1 1
## 12 12 MHS012 P 1 2
## 13 13 MHS013 P 2 6
## 14 14 MHS014 L 4 2
## 15 15 MHS015 L 1 2
## 16 16 MHS016 L 5 1
## 17 17 MHS017 L 2 7
## 18 18 MHS018 L 5 7
## 19 19 MHS019 L 5 2
## 20 20 MHS020 L 3 1
## 21 21 MHS021 L 1 6
## 22 22 MHS022 P 4 7
## 23 23 MHS023 L 4 4
## 24 24 MHS024 P 5 4
## 25 25 MHS025 L 3 1
## 26 26 MHS026 L 4 6
## 27 27 MHS027 L 2 3
## 28 28 MHS028 P 2 6
## 29 29 MHS029 L 2 2
## 30 30 MHS030 P 5 3
## 31 31 MHS031 P 2 7
## 32 32 MHS032 L 1 2
## 33 33 MHS033 P 1 7
## 34 34 MHS034 P 5 4
## 35 35 MHS035 L 4 1
## 36 36 MHS036 P 3 1
## 37 37 MHS037 L 4 6
## 38 38 MHS038 L 4 1
## 39 39 MHS039 P 1 1
## 40 40 MHS040 L 4 2
## 41 41 MHS041 L 5 7
## 42 42 MHS042 P 4 6
## 43 43 MHS043 P 4 5
## 44 44 MHS044 P 2 7
## 45 45 MHS045 P 5 2
## 46 46 MHS046 L 1 3
## 47 47 MHS047 L 4 5
## 48 48 MHS048 L 5 4
## 49 49 MHS049 P 4 5
## 50 50 MHS050 P 2 4
## 51 51 MHS051 L 4 6
## 52 52 MHS052 L 4 4
## 53 53 MHS053 L 3 4
## 54 54 MHS054 L 5 4
## 55 55 MHS055 P 2 7
## motivasi_belajar ipk
## 1 82 3.12
## 2 73 3.45
## 3 71 3.07
## 4 98 3.43
## 5 81 2.83
## 6 61 2.83
## 7 44 2.83
## 8 69 3.08
## 9 44 2.82
## 10 46 2.93
## 11 56 2.77
## 12 35 2.71
## 13 59 3.11
## 14 81 3.31
## 15 44 2.90
## 16 90 3.46
## 17 52 NA
## 18 92 3.89
## 19 84 3.09
## 20 73 2.73
## 21 47 NA
## 22 72 3.69
## 23 83 3.54
## 24 81 NA
## 25 70 2.65
## 26 89 3.32
## 27 59 NA
## 28 53 3.14
## 29 48 2.89
## 30 87 3.40
## 31 71 3.22
## 32 39 2.66
## 33 46 3.11
## 34 87 3.56
## 35 91 3.13
## 36 66 2.95
## 37 68 3.67
## 38 90 3.29
## 39 52 2.52
## 40 80 3.17
## 41 90 3.73
## 42 73 3.05
## 43 91 3.29
## 44 63 3.15
## 45 96 3.33
## 46 43 4.00
## 47 82 3.13
## 48 92 3.70
## 49 73 3.22
## 50 57 3.05
## 51 92 3.65
## 52 88 3.30
## 53 80 3.29
## 54 100 3.41
## 55 71 0.50
summary(df)
## X id_mahasiswa jenis_kelamin jam_belajar_per_hari
## Min. : 1.0 Length:55 Length:55 Min. :1.000
## 1st Qu.:14.5 Class :character Class :character 1st Qu.:2.000
## Median :28.0 Mode :character Mode :character Median :3.000
## Mean :28.0 Mean :3.127
## 3rd Qu.:41.5 3rd Qu.:4.000
## Max. :55.0 Max. :5.000
##
## frekuensi_login_lms motivasi_belajar ipk
## Min. :1.000 Min. : 35.0 Min. :0.500
## 1st Qu.:2.000 1st Qu.: 56.5 1st Qu.:2.915
## Median :4.000 Median : 73.0 Median :3.140
## Mean :4.018 Mean : 71.0 Mean :3.139
## 3rd Qu.:6.000 3rd Qu.: 87.0 3rd Qu.:3.405
## Max. :7.000 Max. :100.0 Max. :4.000
## NA's :4
is.na(df)
## X id_mahasiswa jenis_kelamin jam_belajar_per_hari frekuensi_login_lms
## [1,] FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE FALSE
## [42,] FALSE FALSE FALSE FALSE FALSE
## [43,] FALSE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE FALSE
## [45,] FALSE FALSE FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE FALSE
## [52,] FALSE FALSE FALSE FALSE FALSE
## [53,] FALSE FALSE FALSE FALSE FALSE
## [54,] FALSE FALSE FALSE FALSE FALSE
## [55,] FALSE FALSE FALSE FALSE FALSE
## motivasi_belajar ipk
## [1,] FALSE FALSE
## [2,] FALSE FALSE
## [3,] FALSE FALSE
## [4,] FALSE FALSE
## [5,] FALSE FALSE
## [6,] FALSE FALSE
## [7,] FALSE FALSE
## [8,] FALSE FALSE
## [9,] FALSE FALSE
## [10,] FALSE FALSE
## [11,] FALSE FALSE
## [12,] FALSE FALSE
## [13,] FALSE FALSE
## [14,] FALSE FALSE
## [15,] FALSE FALSE
## [16,] FALSE FALSE
## [17,] FALSE TRUE
## [18,] FALSE FALSE
## [19,] FALSE FALSE
## [20,] FALSE FALSE
## [21,] FALSE TRUE
## [22,] FALSE FALSE
## [23,] FALSE FALSE
## [24,] FALSE TRUE
## [25,] FALSE FALSE
## [26,] FALSE FALSE
## [27,] FALSE TRUE
## [28,] FALSE FALSE
## [29,] FALSE FALSE
## [30,] FALSE FALSE
## [31,] FALSE FALSE
## [32,] FALSE FALSE
## [33,] FALSE FALSE
## [34,] FALSE FALSE
## [35,] FALSE FALSE
## [36,] FALSE FALSE
## [37,] FALSE FALSE
## [38,] FALSE FALSE
## [39,] FALSE FALSE
## [40,] FALSE FALSE
## [41,] FALSE FALSE
## [42,] FALSE FALSE
## [43,] FALSE FALSE
## [44,] FALSE FALSE
## [45,] FALSE FALSE
## [46,] FALSE FALSE
## [47,] FALSE FALSE
## [48,] FALSE FALSE
## [49,] FALSE FALSE
## [50,] FALSE FALSE
## [51,] FALSE FALSE
## [52,] FALSE FALSE
## [53,] FALSE FALSE
## [54,] FALSE FALSE
## [55,] FALSE FALSE
colSums(is.na(df))
## X id_mahasiswa jenis_kelamin
## 0 0 0
## jam_belajar_per_hari frekuensi_login_lms motivasi_belajar
## 0 0 0
## ipk
## 4
df$ipk[is.na(df$ipk)] <- median(df$ipk, na.rm = TRUE)
df
## X id_mahasiswa jenis_kelamin jam_belajar_per_hari frekuensi_login_lms
## 1 1 MHS001 L 4 1
## 2 2 MHS002 P 4 2
## 3 3 MHS003 P 2 6
## 4 4 MHS004 P 5 3
## 5 5 MHS005 L 3 2
## 6 6 MHS005 L 3 7
## 7 7 MHS005 P 1 7
## 8 8 MHS008 L 3 4
## 9 9 MHS009 P 2 6
## 10 10 MHS010 L 1 5
## 11 11 MHS011 L 1 1
## 12 12 MHS012 P 1 2
## 13 13 MHS013 P 2 6
## 14 14 MHS014 L 4 2
## 15 15 MHS015 L 1 2
## 16 16 MHS016 L 5 1
## 17 17 MHS017 L 2 7
## 18 18 MHS018 L 5 7
## 19 19 MHS019 L 5 2
## 20 20 MHS020 L 3 1
## 21 21 MHS021 L 1 6
## 22 22 MHS022 P 4 7
## 23 23 MHS023 L 4 4
## 24 24 MHS024 P 5 4
## 25 25 MHS025 L 3 1
## 26 26 MHS026 L 4 6
## 27 27 MHS027 L 2 3
## 28 28 MHS028 P 2 6
## 29 29 MHS029 L 2 2
## 30 30 MHS030 P 5 3
## 31 31 MHS031 P 2 7
## 32 32 MHS032 L 1 2
## 33 33 MHS033 P 1 7
## 34 34 MHS034 P 5 4
## 35 35 MHS035 L 4 1
## 36 36 MHS036 P 3 1
## 37 37 MHS037 L 4 6
## 38 38 MHS038 L 4 1
## 39 39 MHS039 P 1 1
## 40 40 MHS040 L 4 2
## 41 41 MHS041 L 5 7
## 42 42 MHS042 P 4 6
## 43 43 MHS043 P 4 5
## 44 44 MHS044 P 2 7
## 45 45 MHS045 P 5 2
## 46 46 MHS046 L 1 3
## 47 47 MHS047 L 4 5
## 48 48 MHS048 L 5 4
## 49 49 MHS049 P 4 5
## 50 50 MHS050 P 2 4
## 51 51 MHS051 L 4 6
## 52 52 MHS052 L 4 4
## 53 53 MHS053 L 3 4
## 54 54 MHS054 L 5 4
## 55 55 MHS055 P 2 7
## motivasi_belajar ipk
## 1 82 3.12
## 2 73 3.45
## 3 71 3.07
## 4 98 3.43
## 5 81 2.83
## 6 61 2.83
## 7 44 2.83
## 8 69 3.08
## 9 44 2.82
## 10 46 2.93
## 11 56 2.77
## 12 35 2.71
## 13 59 3.11
## 14 81 3.31
## 15 44 2.90
## 16 90 3.46
## 17 52 3.14
## 18 92 3.89
## 19 84 3.09
## 20 73 2.73
## 21 47 3.14
## 22 72 3.69
## 23 83 3.54
## 24 81 3.14
## 25 70 2.65
## 26 89 3.32
## 27 59 3.14
## 28 53 3.14
## 29 48 2.89
## 30 87 3.40
## 31 71 3.22
## 32 39 2.66
## 33 46 3.11
## 34 87 3.56
## 35 91 3.13
## 36 66 2.95
## 37 68 3.67
## 38 90 3.29
## 39 52 2.52
## 40 80 3.17
## 41 90 3.73
## 42 73 3.05
## 43 91 3.29
## 44 63 3.15
## 45 96 3.33
## 46 43 4.00
## 47 82 3.13
## 48 92 3.70
## 49 73 3.22
## 50 57 3.05
## 51 92 3.65
## 52 88 3.30
## 53 80 3.29
## 54 100 3.41
## 55 71 0.50
colSums(is.na(df))
## X id_mahasiswa jenis_kelamin
## 0 0 0
## jam_belajar_per_hari frekuensi_login_lms motivasi_belajar
## 0 0 0
## ipk
## 0
summary(df)
## X id_mahasiswa jenis_kelamin jam_belajar_per_hari
## Min. : 1.0 Length:55 Length:55 Min. :1.000
## 1st Qu.:14.5 Class :character Class :character 1st Qu.:2.000
## Median :28.0 Mode :character Mode :character Median :3.000
## Mean :28.0 Mean :3.127
## 3rd Qu.:41.5 3rd Qu.:4.000
## Max. :55.0 Max. :5.000
## frekuensi_login_lms motivasi_belajar ipk
## Min. :1.000 Min. : 35.0 Min. :0.500
## 1st Qu.:2.000 1st Qu.: 56.5 1st Qu.:2.940
## Median :4.000 Median : 73.0 Median :3.140
## Mean :4.018 Mean : 71.0 Mean :3.139
## 3rd Qu.:6.000 3rd Qu.: 87.0 3rd Qu.:3.365
## Max. :7.000 Max. :100.0 Max. :4.000
Q1 <- quantile(df$ipk, 0.25)
Q3 <- quantile(df$ipk, 0.75)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
outliersa <- df$ipk < lower_bound
outliers <- df$ipk > upper_bound
sum(outliersa)
## [1] 1
sum(outliers)
## [1] 0
boxplot(df$ipk, main = "Boxplot IPK", col = "lightblue")
# Menangani outlier dengan winsorizing (mengganti nilai ekstrem dengan batas)
df$ipk[outliersa] <- ifelse(df$ipk[outliersa] < lower_bound, lower_bound, upper_bound)
boxplot(df$ipk, main = "Boxplot IPK", col = "lightblue")
sum(duplicated(df$id_mahasiswa))
## [1] 2
df <- df[!duplicated(df$id_mahasiswa), ]
summary(df)
## X id_mahasiswa jenis_kelamin jam_belajar_per_hari
## Min. : 1.00 Length:53 Length:53 Min. :1.00
## 1st Qu.:16.00 Class :character Class :character 1st Qu.:2.00
## Median :29.00 Mode :character Mode :character Median :4.00
## Mean :28.81 Mean :3.17
## 3rd Qu.:42.00 3rd Qu.:4.00
## Max. :55.00 Max. :5.00
## frekuensi_login_lms motivasi_belajar ipk
## Min. :1.000 Min. : 35.0 Min. :2.303
## 1st Qu.:2.000 1st Qu.: 57.0 1st Qu.:3.050
## Median :4.000 Median : 73.0 Median :3.140
## Mean :3.906 Mean : 71.7 Mean :3.184
## 3rd Qu.:6.000 3rd Qu.: 87.0 3rd Qu.:3.400
## Max. :7.000 Max. :100.0 Max. :4.000