Sebelum mencari histogram, boxplot, dan lain-lain, perlu menginstal beberapa packages
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(devtools)
## Loading required package: usethis
library(ggthemr)
A. Input Data
Data <- read.csv("D:/Semester 4/AED/Kuis UTS 1.csv")
head(Data)
## wbcode country gdp50 gdp90 gdp95 lnd100km pop100km lnd100cr pop100cr
## 1 AGO Angola 986 654 2055.1880 0.1187576 0.2655837 14 30
## 2 ARG Argentina 4987 6581 9287.2120 0.1230889 0.1889420 20 30
## 3 AUT Austria 3731 16792 21269.8500 0.0075848 0.0057634 62 71
## 4 BDI Burundi 320 599 656.9504 0.0000000 0.0000000 0 0
## 5 BEL Belgium 5346 16807 21695.3800 0.4892515 0.7138057 99 99
## 6 BEN Benin 1087 1107 1206.5740 0.1089616 0.4924399 11 49
## dens65c dens65i dens95c dens95i landlock landlneu airdist tropicar
## 1 9.291969 3.462699 19.32299 7.20081 0 0 6830 1.0000
## 2 12.498500 7.530861 19.44353 11.71553 0 0 8570 0.0268
## 3 66.783080 88.049610 73.97482 97.53151 1 0 840 0.0000
## 4 0.000000 125.116800 0.00000 243.92520 1 1 6600 1.0000
## 5 420.711100 161.580900 451.02420 173.22310 0 0 190 0.0000
## 6 99.277840 12.513060 223.68160 28.19301 0 0 5040 1.0000
## troppop malfal66 malfal94 lhcpc south landarea open6590 newstate
## 1 0.7491 1 1 5.732350 1 1246700 0.0000000 2
## 2 0.0000 0 0 4.518682 1 2736690 0.0000000 0
## 3 0.0000 0 0 2.858482 0 82730 1.0000000 1
## 4 1.0000 1 1 -4.605170 1 25680 0.0000000 2
## 5 0.0000 0 0 -4.605170 0 32820 1.0000000 0
## 6 0.5141 1 1 1.053939 0 110620 0.0384615 2
## socialst lifex65 urbpop95 wardum pop95 zpolar zboreal zdestmp
## 1 1 36.0 31.0 1 10.7720 0.0000000 0.0000000 0.0221579
## 2 0 65.8 88.1 0 34.6650 0.0125004 0.0843879 0.2308541
## 3 0 69.9 64.3 0 8.0540 0.0228520 0.4829857 0.0000000
## 4 0 43.5 7.5 0 6.2640 0.0000000 0.0000000 0.0000000
## 5 0 70.9 97.0 0 10.1459 0.0000000 0.0000000 0.0000000
## 6 1 41.0 38.4 0 5.4750 0.0000000 0.0000000 0.0000000
## zdestrp zdrytemp zwettemp zsubtrop ztropics zwater eu safri sasia
## 1 0.0321074 0.0000000 0.0000000 0.8917290 0.0501317 0.0038740 0 1 0
## 2 0.0008983 0.3237756 0.0477848 0.2941360 0.0022708 0.0033919 0 0 0
## 3 0.0000000 0.0000000 0.4941623 0.0000000 0.0000000 0.0000000 1 0 0
## 4 0.0000000 0.0000000 0.0000000 0.9805556 0.0000000 0.0194444 0 1 0
## 5 0.0000000 0.0000000 0.9803160 0.0000000 0.0000000 0.0196840 1 0 0
## 6 0.0000000 0.0000000 0.0000000 0.0072161 0.9797114 0.0130726 0 1 0
## transit latam eseasia region
## 1 0 0 0 Sub-Saharan Africa
## 2 0 1 0 Latin America & Caribbean
## 3 0 0 0 Western Europe
## 4 0 0 0 Sub-Saharan Africa
## 5 0 0 0 Western Europe
## 6 0 0 0 Sub-Saharan Africa
B. Histogram
hist(Data$gdp90,
main="Histogram GDP per capita, 1990 ",
ylab="Frequency",
xlab="GDP per kapita,1990",
col="light blue",
breaks = seq(min(Data$gdp90), max(Data$gdp90), length.out = 22))
abline(v=median(Data$gdp90),col="red",lwd=2)
abline(v=mean(Data$gdp90), col="green", lwd=5)
Berdasarkan histogram diatas, terlihat bahwa data menjulur ke kanan. Dan terlihat bahwa GDP per kapita yang lebih banyak berada disekitar 0 sampai 2500 dengan frekuensi sekitar 36.Median pada histogram ditandai dengan warna merah dan mean ditandai dengan warna hiau.
hist(Data$gdp95,
main="Histogram GDP per capita, 1995 ",
ylab="Frequency",
xlab="GDP per kapita,1995",
col="white",
breaks = seq(min(Data$gdp95), max(Data$gdp95), length.out = 22))
abline(v=median(Data$gdp95),col="red",lwd=2)
abline(v=mean(Data$gdp95), col="green", lwd=5)
Berdasarkan histogram diatas, terlihat bahwa data menjulur ke kanan. Dan terlihat bahwa GDP per kapita yang lebih banyak berada disekitar 0 sampai 2700 dengan frekuensi sekitar 35.Median pada histogram ditandai dengan warna merah dan mean ditandai dengan warna hiau.
hist(Data$lifex65,
main="Histogram Life Expentancy, 1965(UN) ",
ylab="Frequency",
xlab="Life Expentancy, 1965(UN)",
col="pink",
breaks = seq(min(Data$lifex65), max(Data$lifex65), length.out = 22))
abline(v=median(Data$lifex65),col="red",lwd=2)
abline(v=mean(Data$lifex65), col="green", lwd=5)
Berdasarkan histogram diatas, terlihat bahwa data menjulur ke kanan. Dan terlihat bahwa Life Expentancy yang lebih banyak berada disekitar 70 dengan frekuensi sekitar 10.Median pada histogram ditandai dengan warna merah dan mean ditandai dengan warna hiau.
hist(Data$malfal94,
main="Histogram Palciparam Malaria Index, 1994 ",
ylab="Frequency",
xlab="Palciparam Malaria Index, 1994",
col="purple",
breaks = seq(min(Data$malfal94), max(Data$malfal94), length.out = 22))
abline(v=median(Data$malfal94),col="red",lwd=2)
abline(v=mean(Data$malfal94), col="green", lwd=5)
Berdasarkan histogram diatas, terlihat bahwa data menjulur ke kanan. Dan terlihat bahwa Palciparam Malaria Index yang lebih banyak berada disekitar 0.01 dengan frekuensi sekitar 45.Median pada histogram ditandai dengan warna merah dan mean ditandai dengan warna hiau.
hist(Data$urbpop95,
main="Histogram % Population urban, 1995(World Bank ",
ylab="Frequency",
xlab=" % Population urban, 1995(World Bank",
col="purple",
breaks = seq(min(Data$urbpop95), max(Data$urbpop95), length.out = 22))
abline(v=median(Data$urbpop95),col="red",lwd=2)
abline(v=mean(Data$urbpop95), col="green", lwd=5)
Berdasarkan histogram diatas, terlihat bahwa data menjulur ke kanan. Dan terlihat bahwa % Population urban yang lebih banyak berada disekitar 30 dengan frekuensi sekitar 10.Median pada histogram ditandai dengan warna merah dan mean ditandai dengan warna hiau.
C. Boxplot
boxplot(Data$gdp90 ~
reorder(Data$region,Data$gdp90,FUN = median),
main="GDP per capita pada tahun 1990 pada setiap benua",
xlab= "Benua",
ylab= "GDP per capita, 1990",
col = c("darkred", "red", "white", "blue", "purple", "pink"), ylim= c(0,20000))
boxplot(Data$gdp90 ~
reorder(Data$socialst,Data$gdp90,FUN = median),
main="GDP per capita pada tahun 1990 berdasarkan socialist",
xlab= "Socialist",
ylab= "GDP per capita tahun 1990",
col = c("blue", "red"), ylim= c(0,20000))
boxplot(Data$gdp90 ~
reorder(Data$landlock,Data$gdp90,FUN = median),
main="GDP per capita pada tahun 1990 berdasarkan landlock",
xlab= "Landlock",
ylab= "GDP per capita tahun 1990",
col = c("blue", "red"), ylim= c(0,20000))
boxplot(Data$gdp90 ~
reorder(Data$wardum,Data$gdp90,FUN = median),
main="GDP per capita pada tahun 1990 berdasarkan wardum",
xlab= "Wardum",
ylab= "GDP per capita tahun 1990",
col = c("blue", "red"), ylim= c(0,20000))
boxplot(Data$gdp90 ~
reorder(Data$south,Data$gdp90,FUN = median),
main="GDP per capita pada tahun 1990 berdasarkan south",
xlab= "South",
ylab= "GDP per capita tahun 1990",
col = c("blue", "red"), ylim= c(0,20000))
var1 <- Data$gdp90
set.seed(42);qqplot(
rnorm(n = length(var1), mean = mean(var1), sd = sd(var1)),
var1,
main="QQ-plot Terhadap Sebaran Normal",
xlab="rnorm()",
ylab = "GDP 90");qqline(
distribution = function(p) qnorm(p, mean = mean(var1), sd = sd(var1)),
var1,
lty = "longdash",
col = "blue")
b.log normal
set.seed(42);qqplot(
rlnorm(n = length(var1), meanlog = mean(log(var1)),
sdlog = sd(log(var1))),
var1,
main="QQ-plot Terhadap Sebaran Log Normal",
xlab="rlnorm()",
ylab = "GDP 90");qqline(
distribution = function(p) qlnorm(p, meanlog = mean(log(var1)),
sdlog = sd(log(var1))), var1,
col = "blue", lty="longdash")
c.Chi-Square
set.seed(42); qqplot(rchisq(n = length(var1), df = mean(var1)),
var1,
main="QQ-plot Terhadap Sebaran Chi-Square",
xlab="rchisq()",
ylab = "GDP 90");qqline(
distribution = function(p) qchisq(p, df = mean(var1)), var1, col = "blue")
d.Exponential
set.seed(42);qqplot(
rexp(n = length(var1), rate = 1/mean(var1)),
var1,
main="QQ-plot Terhadap Sebaran Eksponensial",
xlab="rexp()",
ylab = "GDP 90"); qqline(
distribution = function(p) qexp(p, rate = 1/mean(var1)), var1, col = "blue")
#Mencari sebaran data menggunakan Formal Sample Distribution Test ##Hipotesis: H0: Sebaran 1 == Sebaran 2, H1: Sebaran 1 != Sebaran 2 ##p-value > alpha, Terima H0 p-value < alpha, Tolak H0
##1. Mencari sebaran Kolmogorov-Smirnov Test ## Normal
set.seed(42)
ks.test(var1, "pnorm", mean = mean(var1), sd = sd(var1))
## Warning in ks.test(var1, "pnorm", mean = mean(var1), sd = sd(var1)): ties should
## not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: var1
## D = 0.23803, p-value = 4.223e-05
## alternative hypothesis: two-sided
set.seed(42)
ks.test(var1, "pchisq", df = mean(var1))
## Warning in ks.test(var1, "pchisq", df = mean(var1)): ties should not be present
## for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: var1
## D = 0.66905, p-value < 2.2e-16
## alternative hypothesis: two-sided
set.seed(42)
ks.test(log(var1), "pexp", rate = 1/mean(var1))
## Warning in ks.test(log(var1), "pexp", rate = 1/mean(var1)): ties should not be
## present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: log(var1)
## D = 0.99807, p-value < 2.2e-16
## alternative hypothesis: two-sided
set.seed(42); ks.test(var1, "plnorm", meanlog = mean(log(var1)), sdlog = sd(log(var1)))
## Warning in ks.test(var1, "plnorm", meanlog = mean(log(var1)), sdlog =
## sd(log(var1))): ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: var1
## D = 0.11254, p-value = 0.1802
## alternative hypothesis: two-sided
var2 <- Data$gdp95
set.seed(42);qqplot(
rnorm(n = length(var2), mean = mean(var2), sd = sd(var2)),
var2,
main="QQ-plot Terhadap Sebaran Normal",
xlab="rnorm()",
ylab = "GDP 95");qqline(
distribution = function(p) qnorm(p, mean = mean(var2), sd = sd(var2)),
var2,
lty = "longdash",
col = "blue")
set.seed(42);qqplot(
rlnorm(n = length(var2), meanlog = mean(log(var2)),
sdlog = sd(log(var2))),
var2,
main="QQ-plot Terhadap Sebaran Log Normal",
xlab="rlnorm()",
ylab = "GDP 95");qqline(
distribution = function(p) qlnorm(p, meanlog = mean(log(var2)),
sdlog = sd(log(var2))), var2,
col = "blue", lty="longdash")
set.seed(42); qqplot(rchisq(n = length(var2), df = mean(var2)),
var2,
main="QQ-plot Terhadap Sebaran Chi-Square",
xlab="rchisq()",
ylab = "GDP 90");qqline(
distribution = function(p) qchisq(p, df = mean(var1)), var1, col = "blue")
set.seed(42);qqplot(
rexp(n = length(var1), rate = 1/mean(var1)),
var1,
main="QQ-plot Terhadap Sebaran Eksponensial",
xlab="rexp()",
ylab = "GDP 90"); qqline(
distribution = function(p) qexp(p, rate = 1/mean(var1)), var1, col = "blue")
#Mencari sebaran data menggunakan Formal Sample Distribution Test ##Hipotesis: H0: Sebaran 1 == Sebaran 2, H1: Sebaran 1 != Sebaran 2 ##p-value > alpha, Terima H0 p-value < alpha, Tolak H0
##1. Mencari sebaran Kolmogorov-Smirnov Test ## Normal
set.seed(42)
ks.test(var1, "pnorm", mean = mean(var1), sd = sd(var1))
## Warning in ks.test(var1, "pnorm", mean = mean(var1), sd = sd(var1)): ties should
## not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: var1
## D = 0.23803, p-value = 4.223e-05
## alternative hypothesis: two-sided
set.seed(42)
ks.test(var1, "pchisq", df = mean(var1))
## Warning in ks.test(var1, "pchisq", df = mean(var1)): ties should not be present
## for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: var1
## D = 0.66905, p-value < 2.2e-16
## alternative hypothesis: two-sided
set.seed(42)
ks.test(log(var1), "pexp", rate = 1/mean(var1))
## Warning in ks.test(log(var1), "pexp", rate = 1/mean(var1)): ties should not be
## present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: log(var1)
## D = 0.99807, p-value < 2.2e-16
## alternative hypothesis: two-sided
set.seed(42); ks.test(var1, "plnorm", meanlog = mean(log(var1)), sdlog = sd(log(var1)))
## Warning in ks.test(var1, "plnorm", meanlog = mean(log(var1)), sdlog =
## sd(log(var1))): ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: var1
## D = 0.11254, p-value = 0.1802
## alternative hypothesis: two-sided