library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data_mahasiswa <- read.csv("df_mahasiswa.csv")
head(data_mahasiswa)
## X id_mahasiswa jenis_kelamin jam_belajar_per_hari frekuensi_login_lms
## 1 1 MHS001 L 4 1
## 2 2 MHS002 P 4 2
## 3 3 MHS003 P 2 6
## 4 4 MHS004 P 5 3
## 5 5 MHS005 L 3 2
## 6 6 MHS006 L 3 7
## motivasi_belajar ipk
## 1 82 3.12
## 2 73 3.45
## 3 71 3.07
## 4 98 3.43
## 5 81 2.83
## 6 61 3.10
str(data_mahasiswa)
## 'data.frame': 55 obs. of 7 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ id_mahasiswa : chr "MHS001" "MHS002" "MHS003" "MHS004" ...
## $ jenis_kelamin : chr "L" "P" "P" "P" ...
## $ jam_belajar_per_hari: int 4 4 2 5 3 3 1 3 2 1 ...
## $ frekuensi_login_lms : int 1 2 6 3 2 7 7 4 6 5 ...
## $ motivasi_belajar : int 82 73 71 98 81 61 44 69 44 46 ...
## $ ipk : num 3.12 3.45 3.07 3.43 2.83 3.1 2.98 3.08 2.82 2.93 ...
colSums(is.na(data_mahasiswa))
## X id_mahasiswa jenis_kelamin
## 0 0 0
## jam_belajar_per_hari frekuensi_login_lms motivasi_belajar
## 0 0 0
## ipk
## 0
data_mahasiswa$jenis_kelamin
## [1] "L" "P" "P" "P" "L" "L" "P" "L" "P" "L" "L" "P" "P" "L" "L" "L" "L" "L" "L"
## [20] "L" "L" "P" "L" "P" "L" "L" "L" "P" "L" "P" "P" "L" "P" "P" "L" "P" "L" "L"
## [39] "P" "L" "L" "P" "P" "P" "P" "L" "L" "L" "P" "P" "L" "L" "L" "L" "P"
table(data_mahasiswa$jenis_kelamin)
##
## L P
## 32 23
aggregate(ipk ~ jenis_kelamin, data = data_mahasiswa, mean)
## jenis_kelamin ipk
## 1 L 3.158125
## 2 P 3.176957
data_mahasiswa$id_mahasiswa[which.max(data_mahasiswa$motivasi_belajar)]
## [1] "MHS054"
dataf <- subset(data_mahasiswa, frekuensi_login_lms > 5)
dataf
## X id_mahasiswa jenis_kelamin jam_belajar_per_hari frekuensi_login_lms
## 3 3 MHS003 P 2 6
## 6 6 MHS006 L 3 7
## 7 7 MHS007 P 1 7
## 9 9 MHS009 P 2 6
## 13 13 MHS013 P 2 6
## 17 17 MHS017 L 2 7
## 18 18 MHS018 L 5 7
## 21 21 MHS021 L 1 6
## 22 22 MHS022 P 4 7
## 26 26 MHS026 L 4 6
## 28 28 MHS028 P 2 6
## 31 31 MHS031 P 2 7
## 33 33 MHS033 P 1 7
## 37 37 MHS037 L 4 6
## 41 41 MHS041 L 5 7
## 42 42 MHS042 P 4 6
## 44 44 MHS044 P 2 7
## 51 51 MHS051 L 4 6
## 55 55 MHS055 P 2 7
## motivasi_belajar ipk
## 3 71 3.07
## 6 61 3.10
## 7 44 2.98
## 9 44 2.82
## 13 59 3.11
## 17 52 3.06
## 18 92 3.89
## 21 47 2.96
## 22 72 3.69
## 26 89 3.32
## 28 53 3.14
## 31 71 3.22
## 33 46 3.11
## 37 68 3.67
## 41 90 3.73
## 42 73 3.05
## 44 63 3.15
## 51 92 3.65
## 55 71 3.21
rata_belajar_4 <- mean(data_mahasiswa$ipk[data_mahasiswa$jam_belajar >= 4])
rata_belajar_kurang4 <- mean(data_mahasiswa$ipk[data_mahasiswa$jam_belajar < 4])
rata_belajar_4 > rata_belajar_kurang4
## [1] TRUE
mahasiswa_ipk_tinggi <- data_mahasiswa[which.max(data_mahasiswa$ipk),
c("id_mahasiswa", "ipk", "motivasi_belajar")]
mahasiswa_ipk_tinggi
## id_mahasiswa ipk motivasi_belajar
## 18 MHS018 3.89 92
mahasiswa_lolos <- subset(data_mahasiswa, ipk > 3.5 & motivasi_belajar > 85)
table(mahasiswa_lolos$jenis_kelamin)
##
## L P
## 4 1