\(f(x,λ)= λ.e^{xλ}\) ; \(x \geq 0\)
\(0\) ; lainnya
1. Menentukan CDF
PDF dari \(exponential (λ)\) \(f(x)=λ.e^{-λx}\), \(x\geq0\)
CDF dari \(exponential (λ)\) \(F(x)=1-e^{-λx}\), \(x\geq0\)
2. Mencari invers CDF \(F(x)\)
Invers CDF dari \(exponential(λ)\) \(F^{-1}(u)=-\frac{ln(1-u)}{λ}\), \(x\geq0\)
3. Memunculkan \(u∼U(0,1)\)
# Jumlah Amatan
n <- 10000
# memunculkan U(0,1)
u <- runif(n)
4. Mencari bilangan acak \(x\) dengan menghitung \(F^{-1}(u)\)
# Fungsi Inverse CDF
set.seed(10)
c <- seq(from = 0, to = 1)
x <- -(log(1-u)/c)
head(x)
## [1] Inf 0.2326468 Inf 0.2608092 Inf 0.8711166
# Mengecek jumlah amatan
length(x)
## [1] 10000
5. Membuat Histogram
hist(x,main="Exp dari Inverse Transform")
6. Menentukan Kurtosis dan Skewness
library(e1071)
kurtosis(x)
## [1] NaN
skewness(x)
## [1] NaN
Langkah pertama memanggil data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
StudentsPerformance.csv<-read.table("C:/Users/USER/Downloads/StudentsPerformance.csv",header=TRUE, sep=",")
data<-data.frame(StudentsPerformance.csv)
Untuk langkah selanjutnya menambah kolom nilai total score dan rata-rata score
library(dplyr)
data<-mutate(data, total.score = math.score + reading.score + writing.score,
rata_rata.score = (math.score + reading.score + writing.score) / 3)
head(data)
## gender race.ethnicity parental.level.of.education lunch
## 1 female group B bachelor's degree standard
## 2 female group C some college standard
## 3 female group B master's degree standard
## 4 male group A associate's degree free/reduced
## 5 male group C some college standard
## 6 female group B associate's degree standard
## test.preparation.course math.score reading.score writing.score total.score
## 1 none 72 72 74 218
## 2 completed 69 90 88 247
## 3 none 90 95 93 278
## 4 none 47 57 44 148
## 5 none 76 78 75 229
## 6 none 71 83 78 232
## rata_rata.score
## 1 72.66667
## 2 82.33333
## 3 92.66667
## 4 49.33333
## 5 76.33333
## 6 77.33333
summary
library(e1071)
summary(data)
## gender race.ethnicity parental.level.of.education
## Length:1000 Length:1000 Length:1000
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## lunch test.preparation.course math.score reading.score
## Length:1000 Length:1000 Min. : 0.00 Min. : 17.00
## Class :character Class :character 1st Qu.: 57.00 1st Qu.: 59.00
## Mode :character Mode :character Median : 66.00 Median : 70.00
## Mean : 66.09 Mean : 69.17
## 3rd Qu.: 77.00 3rd Qu.: 79.00
## Max. :100.00 Max. :100.00
## writing.score total.score rata_rata.score
## Min. : 10.00 Min. : 27.0 Min. : 9.00
## 1st Qu.: 57.75 1st Qu.:175.0 1st Qu.: 58.33
## Median : 69.00 Median :205.0 Median : 68.33
## Mean : 68.05 Mean :203.3 Mean : 67.77
## 3rd Qu.: 79.00 3rd Qu.:233.0 3rd Qu.: 77.67
## Max. :100.00 Max. :300.0 Max. :100.00
skewness
library(e1071)
skewness(data$math.score)
## [1] -0.2780989
skewness(data$reading.score)
## [1] -0.2583277
skewness(data$writing.score)
## [1] -0.2885762
kurtosis
library(e1071)
kurtosis(data$math.score)
## [1] 0.2610652
kurtosis(data$reading.score)
## [1] -0.07976785
kurtosis(data$writing.score)
## [1] -0.04511069
library(ggplot2)
b <- table(data$gender)
B <- round(100*b/sum(b), 1)
c <- table(data$race.ethnicity)
C <- round(100*c/sum(c), 1)
d <- table(data$parental.level.of.education)
D <- round(100*d/sum(d), 1)
e <- table(data$lunch)
E <- round(100*e/sum(e), 1)
a <- table(data$test.preparation.course)
A <- round(100*a/sum(a), 1)
pie(b, labels = B, main = 'Pie Chart of Gender', col = rainbow(length(b)))
legend('topright', c("Male", "Female"), cex = 0.8, fill = rainbow(length(b)))
pie(c, labels = C, main = 'Pie Chart of Race Ethnicity', col = rainbow(length(c)))
legend('topright', c("Group A", "Group B", "Group C", "Group D", "Group E"), cex = 0.8, fill = rainbow(length(c)))
pie(d, labels = D, main = 'Pie Chart of Parental Level of Education', col = rainbow(length(d)))
legend('topright', c("High School", "Some High School", "Some College", "Associate's Degree", "Bachelors Degree", "Master Degree"), cex = 0.8, fill = rainbow(length(d)))
pie(e, labels = E, main = 'Pie Chart of Lunch', col = rainbow(length(e)))
legend('topright', c("Standart", "Free/Reduced"), cex = 0.8, fill = rainbow(length(e)))
pie(a, labels = A, main = 'Pie Chart of Preparation Courser', col = rainbow(length(a)))
legend('topright', c("None", "Completed"), cex = 0.8, fill = rainbow(length(a)))
parental.level.of.education
race.ethnicity
gunakan ggplot
syntax bantuan
rataan = aggregate(value~y, data=data, FUN = mean)
rataan
library("ggplot2")
## Mengurutkan data terendah ke tertinggi
rataan = aggregate(rata_rata.score~parental.level.of.education, data=data, FUN = mean)
rataan
## parental.level.of.education rata_rata.score
## 1 associate's degree 69.56907
## 2 bachelor's degree 71.92373
## 3 high school 63.09694
## 4 master's degree 73.59887
## 5 some college 68.47640
## 6 some high school 65.10801
parental_barchart <- ggplot(rataan,
aes(x = reorder(parental.level.of.education, rata_rata.score),
y = rata_rata.score)) +
geom_bar(stat = "identity",color='skyblue', fill='steelblue') +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Diagram Batang", subtitle="Diagram Batang rata-rata score terhadap Parental level of education",
caption="Sumber: Kaggle", y = "rata-rata score", x = "parental level of education")
parental_barchart
rataan = aggregate(rata_rata.score~race.ethnicity, data=data, FUN = mean)
rataan
## race.ethnicity rata_rata.score
## 1 group A 62.99251
## 2 group B 65.46842
## 3 group C 67.13166
## 4 group D 69.17939
## 5 group E 72.75238
race_barchart <- ggplot(rataan,
aes(x = reorder(race.ethnicity, rata_rata.score),
y = rata_rata.score)) +
geom_bar(stat = "identity",color='skyblue', fill='steelblue') +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Diagram Batang", subtitle="Diagram Batang rata-rata score terhadap Prace ethnicity",
caption="Sumber: Kaggle", y = "rata-rata score", x = "race ethnicity")
race_barchart
parental level of education dan gender terhadap total score ;
race.ethnicity dan gender terhadap total score ;
library(ggplot2)
ggplot(data, aes(fill = gender, y = total.score, x = parental.level.of.education)) + geom_bar(position = 'stack', stat = 'identity') + ggtitle('parental level of education dan gender terhadap total score') + xlab('')
ggplot(data, aes(fill = gender, y = total.score, x = race.ethnicity)) + geom_bar(position = 'stack', stat = 'identity') + ggtitle('parental level of education dan gender terhadap total score') + xlab('')
#### F Buatlah visualisasi sebaran data skor matematika, skor writing
dan skor reading pada masing-masing jenis kelamin dengan menggunakan
histogram
library(ggplot2)
ggplot(data,
mapping=aes(x= math.score, fill= gender))+geom_histogram(color="black",binwidth=5)+
theme_dark()+scale_fill_brewer(palette = "Set3")+ggtitle(label="math score Sesuai Jenis Kelaminnya")+scale_fill_brewer(palette = "#FF1234")
## Warning in pal_name(palette, type): Unknown palette #FF1234
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
ggplot(data,
mapping=aes(x= writing.score, fill= gender))+geom_histogram(color="black",binwidth=5)+
theme_dark()+scale_fill_brewer(palette = "Set3")+ggtitle(label="writing score Sesuai Jenis Kelaminnya")+scale_fill_brewer(palette = "#FF2235")
## Warning in pal_name(palette, type): Unknown palette #FF2235
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
ggplot(data,
mapping=aes(x= reading.score, fill= gender))+geom_histogram(color="black",binwidth=5)+
theme_dark()+scale_fill_brewer(palette = "Set3")+ggtitle(label="reading score Sesuai Jenis Kelaminnya")+scale_fill_brewer(palette = "#FF3236")
## Warning in pal_name(palette, type): Unknown palette #FF3236
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
library(ggplot2)
ggplot(data,
mapping=aes(x=parental.level.of.education, y= math.score, fill=gender))+
geom_violin(fill="#2D2D2D", alpha=0.3, trim=F,width=1.02)+geom_boxplot()+ labs(title="boxplot pada skor mat terhadap parental education di masing' gender")+coord_flip()+scale_fill_brewer(palette="RdPu")
## Warning: position_dodge requires non-overlapping x intervals
ggplot(data,
mapping=aes(x=parental.level.of.education, y= writing.score, fill=gender))+
geom_violin(fill="#2D2D2D", alpha=0.3, trim=F,width=1.02)+geom_boxplot()+ labs(title="boxplot pada skor writing terhadap parental education di masing' gender")+coord_flip()+scale_fill_brewer(palette="RdPu")
## Warning: position_dodge requires non-overlapping x intervals
ggplot(data,
mapping=aes(x=parental.level.of.education, y= reading.score, fill=gender))+
geom_violin(fill="#2D2D2D", alpha=0.3, trim=F,width=1.02)+geom_boxplot()+ labs(title="boxplot pada skor reading terhadap parental education di masing' gender")+coord_flip()+scale_fill_brewer(palette="RdPu")
## Warning: position_dodge requires non-overlapping x intervals
Gambar diatas merupakan hasil output dari boxplot dari skor math reading dan writing terhadap parental level of education pada masing2 gender. dari hasil diatas bisa diambil kesimpulan kalau data yang didalam skor tersebut ada yang outlier
pairs(~math.score + writing.score + reading.score, data = data, main = "Scatterplot Matrix", col = "blue")
Gambar diatas merupakan hasil output dariscatter plot antara skor matematika writing dan reading. dari hasil diatas data ketiganya saling berpengaruh dan tidak ada data yang outlier.