2 Lakukan pengolahan, analisis deskriptif dan interpretasi setiap output dengan

menggunakan data StudentsPerformance.csv, yang dapat di download di laman google classroom. Data terdiri 8 variabel yang terdiri dari gender, race.ethnicity, parental.level.of.education, lunch, test.preparation.course, math.score, reading.score, writing.score. Instruksi sbb :

SP = read.csv("C:/Users/ASUS/Downloads/StudentsPerformance.csv", header=TRUE)
head(SP)

##   gender race.ethnicity parental.level.of.education        lunch
## 1 female        group B           bachelor's degree     standard
## 2 female        group C                some college     standard
## 3 female        group B             master's degree     standard
## 4   male        group A          associate's degree free/reduced
## 5   male        group C                some college     standard
## 6 female        group B          associate's degree     standard
##   test.preparation.course math.score reading.score writing.score
## 1                    none         72            72            74
## 2               completed         69            90            88
## 3                    none         90            95            93
## 4                    none         47            57            44
## 5                    none         76            78            75
## 6                    none         71            83            78

(a) Buatlah Digram Frame dengan menambahkan kolom baru nilai total skor dan rata-

rata skor

menambahkan kolom nilai total skor

SP$nilai.total.skor <- rowSums(SP[6:8])
head(SP)

##   gender race.ethnicity parental.level.of.education        lunch
## 1 female        group B           bachelor's degree     standard
## 2 female        group C                some college     standard
## 3 female        group B             master's degree     standard
## 4   male        group A          associate's degree free/reduced
## 5   male        group C                some college     standard
## 6 female        group B          associate's degree     standard
##   test.preparation.course math.score reading.score writing.score
## 1                    none         72            72            74
## 2               completed         69            90            88
## 3                    none         90            95            93
## 4                    none         47            57            44
## 5                    none         76            78            75
## 6                    none         71            83            78
##   nilai.total.skor
## 1              218
## 2              247
## 3              278
## 4              148
## 5              229
## 6              232

menambahkan kolom rata-rata skor

SP$rata.rata.skor <- rowMeans(SP[6:8])
head(SP)

##   gender race.ethnicity parental.level.of.education        lunch
## 1 female        group B           bachelor's degree     standard
## 2 female        group C                some college     standard
## 3 female        group B             master's degree     standard
## 4   male        group A          associate's degree free/reduced
## 5   male        group C                some college     standard
## 6 female        group B          associate's degree     standard
##   test.preparation.course math.score reading.score writing.score
## 1                    none         72            72            74
## 2               completed         69            90            88
## 3                    none         90            95            93
## 4                    none         47            57            44
## 5                    none         76            78            75
## 6                    none         71            83            78
##   nilai.total.skor rata.rata.skor
## 1              218       72.66667
## 2              247       82.33333
## 3              278       92.66667
## 4              148       49.33333
## 5              229       76.33333
## 6              232       77.33333

(b) Hitung nilai summary, skewness, kurtosis pada data

summary(SP$math.score)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   57.00   66.00   66.09   77.00  100.00

summary(SP$writing.score)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.00   57.75   69.00   68.05   79.00  100.00

summary(SP$reading.score)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   17.00   59.00   70.00   69.17   79.00  100.00

library(e1071)
skewness(SP$math.score)

## [1] -0.2780989

skewness(SP$writing.score)

## [1] -0.2885762

skewness(SP$reading.score)

## [1] -0.2583277

kurtosis(SP$math.score)

## [1] 0.2610652

kurtosis(SP$writing.score)

## [1] -0.04511069

kurtosis(SP$reading.score)

## [1] -0.07976785

(c) Buatlah Dagram Pie Chart pada (i) gender, (ii) race.ethnicity, (iii)

parental.level.of.education, (iv) lunch dan (v) test.preparation.course dengan menampilkan label persentase (%)

(i) gender

gender = table(SP$gender) # tabulate the frequencies
piepercent = round(100*gender/sum(gender),1)
pie(gender, labels = piepercent, main = "pie chart gender", col = cm.colors(length(gender)))
legend("topright", c("female", "male"), cex = 0.8,
   fill = cm.colors(length(gender)))

### (ii) race.ethnicity

race = table(SP$race.ethnicity)
piepercent = round(100*race/sum(race),1)
pie(race, labels = piepercent, main = "pie chart race", col = cm.colors(length(race)))
legend("topright", c("group A" ,"group B", "group C", "group D", "group E" ), cex = 0.8,
   fill = cm.colors(length(race)))

### (iii) parental.level.of.education

level = table(SP$parental.level.of.education)
piepercent = round(100*level/sum(level),1)
pie(level, labels = piepercent, main = "pie chart level", col = cm.colors(length(level)))
legend("topright", c("associate's degree", "bachelor's degree", "high school", "master's degree", "some college", "some high school"), cex = 0.8,
   fill = cm.colors(length(level)))

### (iv) lunch

lunchh = table(SP$lunch)
piepercent = round(100*lunchh/sum(lunchh),1)
pie(lunchh, labels = piepercent, main = "pie chart lunch", col = cm.colors(length(lunchh)))
legend("topright", c("free/reduced", "standard"), cex = 0.8,
   fill = cm.colors(length(lunchh)))

### (v) test.preparation.course

test = table(SP$test.preparation.course)
piepercent = round(100*test/sum(test),1)
pie(test, labels = piepercent, main = "pie chart test", col = cm.colors(length(test)))
legend("topright", c("completed", "none "), cex = 0.8,
   fill = cm.colors(length(test)))

(d) Buatlah diagram batang yang telah diurutkan pada data rata-rata skor terhadap

parental.level.of.education

library(ggplot2)
rataan = aggregate(rata.rata.skor~parental.level.of.education, data = SP , FUN = mean) 
rata = rataan[ , -1] 
rata.pare = rataan[ , -2]
ggplot(SP, aes(x = parental.level.of.education, y = rata.rata.skor , fill = gender ))+geom_bar(stat = "identity") + labs(title="DIAGRAM RATA-RATA SKOR TERHADAP PARENTAL EDUCATION") +scale_fill_brewer(palette = "Oranges")

race.ethnicity

library(ggplot2) 
rataan = aggregate(rata.rata.skor~race.ethnicity, data = SP , FUN = mean) 
rata = rataan[ , -1] 
rata.pare = rataan[ , -2]
ggplot(SP, aes(x = race.ethnicity, y = rata.rata.skor , fill = gender ))+geom_bar(stat = "identity") + labs(title="DIAGRAM RATA-RATA SKOR TERHADAP RACE ETHNICITY")+scale_fill_brewer(palette = "Oranges")

(e) Buatlah diagram batang cluster/stacked pada, gunakan ggplot

parental level of education dan gender terhadap total score

ggplot(SP, aes(x = parental.level.of.education, y = nilai.total.skor , fill = gender))+geom_bar(stat = "identity")+labs(title="DIAGRAM BATANG/CLUSTER") +scale_fill_brewer(palette = "Reds")

race.ethnicity dan gender terhadap total score

ggplot(SP, aes(x = race.ethnicity, y = nilai.total.skor , fill = gender))+geom_bar(stat = "identity")+labs(title="DIAGRAM BATANG/CLUSTER") +scale_fill_brewer(palette = "Reds")

(f) Buatlah visualisasi sebaran data skor matematika, skor writing dan skor reading pada

masing-masing jenis kelamin dengan menggunakan histogram.

MATH SCORE

library(ggplot2) 
ggplot(SP, mapping=aes(x=math.score,fill=gender))+geom_histogram(color="black",binwidth=5)+theme_dark()+scale_fill_brewer(palette = "Set2")+ggtitle(label="math score Sesuai Jenis Kelaminnya")+scale_fill_brewer(palette = "Reds")

## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

WRITING SCORE

library(ggplot2) 
ggplot(SP, mapping=aes(x=writing.score,fill=gender))+geom_histogram(color="black",binwidth=5)+theme_dark()+scale_fill_brewer(palette = "Set2")+ggtitle(label="writing score Sesuai Jenis Kelaminnya")+scale_fill_brewer(palette = "Reds")

## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

READING SCORE

library(ggplot2) 
ggplot(SP, mapping=aes(x=reading.score,fill=gender))+geom_histogram(color="black",binwidth=5)+theme_dark()+scale_fill_brewer(palette = "Set2")+ggtitle(label="reading score Sesuai Jenis Kelaminnya")+scale_fill_brewer(palette = "Reds")

## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

(g) Buatlah visualisasi boxplot pada masing-masing skor kemampuan (metamatika,

writing, reading) terhadap parental education level di masing-masing gender

ggplot(SP, mapping=aes(x=parental.level.of.education, y= math.score, fill=gender))+geom_violin(fill="Black", alpha=0.3, trim = F,width=1)+geom_boxplot() + labs(title=" boxplot skor mat - parental education gender") + coord_flip()+scale_fill_brewer(palette="RdBu")

(h) Buatlah scatter plot antara skor matematika, writing dan reading

plot(SP$math.score, SP$reading.score, pch = 20, cex = 1, frame = FALSE, xlab = "MATH SCORE", ylab = "READING SCORE", main="Scatterplot Math & Reading")

plot(SP$reading.score, SP$writing.score, pch = 20, cex = 1, frame = FALSE, xlab = "READING SCORE" , ylab = "WRITING SCORE", main="Scatterplot Reading & Writing")

plot(SP$writing.score, SP$math.score, pch = 20, cex = 1, frame = FALSE, xlab = "WRITING SCORE", ylab = "MATH SCORE", main="Scatterplot Writing & Math")

UTS STATISTIKA MATEMATIKA

Moh. Aditya Sirojul Hilmi

2022-10-26

1 Buatlah histogram dan hitunglah nilai kurtosis dan skewness