library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
Sheetdata <- read_excel ("C:/Users/ASUS/Documents/Kuliah/Semester 4/Analisis Eksplorasi Data/Praktikum/Praktikum 02 Histogram & Boxplot/Data untuk Eksplorasi.xlsx", sheet = "data", skip = 1)
head(Sheetdata)
## # A tibble: 6 x 38
## Country `GDP (USDbn)` `Real GDP growth ~ `Real GDP growth~ `Consumer prices (~
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CH 749. 1.89 -2.72 0.00116
## 2 DE 3794. 1.63 -4.77 1.21
## 3 DK 355. 2.69 -2.73 0.54
## 4 LU 73.1 3.23 -1.31 1.17
## 5 NL 910. 2.22 -3.74 1.18
## 6 NO 363. 1.47 -0.766 2.62
## # ... with 33 more variables: Consumer prices (annual avg. % growth) <dbl>,
## # Gross dom. inv. (% GDP avg 5yrs) <dbl>, Gross dom. inv. (% GDP) <dbl>,
## # Gross dom. svg. (% GDP avg 5yrs) <dbl>, Gross dom. svg. (% GDP) <dbl>,
## # Bank System Assets (% GDP avg 5yr) <dbl>, Bank System Assets (% GDP) <dbl>,
## # Loan-deposit ratio (% avg 5yr) <dbl>, Loan-deposit ratio (%) <dbl>,
## # Capital adequacy ratio (% avg 5yr) <dbl>, Capital adequacy ratio (%) <dbl>,
## # Non-performing loans (% of gross loans avg 5yr) <dbl>, ...
Sheetcountrycode <- read_excel("C:/Users/ASUS/Documents/Kuliah/Semester 4/Analisis Eksplorasi Data/Praktikum/Praktikum 02 Histogram & Boxplot/Data untuk Eksplorasi.xlsx", sheet = "country code", skip = 1)
head(Sheetcountrycode)
## # A tibble: 6 x 11
## name `alpha-2` `alpha-3` `country-code` `iso_3166-2` region `sub-region`
## <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 Afghanis~ AF AFG 4 ISO 3166-2:~ Asia Southern Asia
## 2 Ã…land Is~ AX ALA 248 ISO 3166-2:~ Europe Northern Eur~
## 3 Albania AL ALB 8 ISO 3166-2:~ Europe Southern Eur~
## 4 Algeria DZ DZA 12 ISO 3166-2:~ Africa Northern Afr~
## 5 American~ AS ASM 16 ISO 3166-2:~ Ocean~ Polynesia
## 6 Andorra AD AND 20 ISO 3166-2:~ Europe Southern Eur~
## # ... with 4 more variables: intermediate-region <chr>, region-code <dbl>,
## # sub-region-code <dbl>, intermediate-region-code <dbl>
colnames(Sheetcountrycode)[2] <- c("Country")
dataframe <- merge(Sheetcountrycode, Sheetdata, by = "Country")
View(dataframe)
hist(dataframe$`GDP (USDbn)`,
main = "Nominal Gross Domestic Product 115 Negara",
xlab = "GDP (USDbn)")
x8 <- as.numeric(dataframe$`Gross dom. inv. (% GDP)`)
hist (x8,
main = "Histogram Gross Domestic Investment dari 115 Negara",
cex.main = 1,
ylab = "Frequency",
xlab = "Gross dom. inv. (% GDP)",
col = "#ffcccc",
breaks = seq(min(x8, na.rm = T), max(x8, na.rm = T), length.out = 21+1))
abline(v=median(x8, na.rm = T),col="#333f66",lwd=2)
abline(v=mean(x8, na.rm = T), col="#b2b266", lwd=5)
# density plot
hist(x8, breaks = 20, freq = F, border = "white", col = "#ffcccc",
main = "Gross Domestic Investment dari 115 Negara",
xlab = "Gross dom. inv. (% GDP)")
lines(density(x8, na.rm=T, bw = 1.5), lwd = 4)
legend("topright", legend = c("Histogram", "Dugaan Sebaran Kepekatan Data"),
pch = c(15, NA), lty = c(NA, 1), lwd = c(NA, 2), pt.cex = c(1, NA),
col = c("#ffcccc", "black"), bty = "n", cex = .75)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.2
hdi <- as.numeric(dataframe$HDI)
## Warning: NAs introduced by coercion
ggplot(dataframe, aes(x=hdi)) +
geom_histogram(color="black", fill="#e0ccff") +
ggtitle("Histogram Indeks Pembangunan Manusia") +
xlab("HDI") +
ylab("Frequency") +
geom_vline(aes(xintercept=mean(hdi, na.rm = T)),
color="dark blue", linetype="dashed", size=1)+
theme(plot.title = element_text(hjust = 0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4 rows containing non-finite values (stat_bin).
x113 <- as.numeric((dataframe$`Rule of Law`))
ggplot(dataframe, aes(x=x113, color=region)) +
geom_histogram(fill="white")+
xlab("Rule of Law")+
theme(legend.position="top")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(plyr)
## Warning: package 'plyr' was built under R version 4.1.2
rataan<-ddply(dataframe, "region", summarise, rata2=mean(`Rule of Law`))
ggplot(dataframe, aes(x=`Rule of Law`, color=region)) +
geom_histogram(fill="white")+
geom_vline(data=rataan, aes(xintercept=rata2, color=region),
linetype="dashed")+
theme(legend.position="top")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(dataframe, aes(x=`Rule of Law`, color=region)) +
geom_histogram(fill="white")+
geom_vline(data=rataan, aes(xintercept=rata2, color=region),
linetype="dashed")+
theme(legend.position="top")+
scale_color_grey()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(ggplot2)
ggplot(data = dataframe, aes(x=`Consumer prices (annual avg. % growth)`))+
geom_histogram(aes(fill=region)) +
scale_fill_brewer(palette="Set1") +
facet_wrap( ~ region, ncol = 2) +
xlab("Consumer prices (annual avg. % growth)") +
theme_bw() +
ggtitle("Histogram Pertumbuhan Rata-Rata Tahunan \nIndeks Harga Konsumen berdasarkan Wilayah") +
theme(plot.title = element_text(hjust = 0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
hdi <- as.numeric(dataframe$HDI)
## Warning: NAs introduced by coercion
boxplot(hdi,
main = "Boxplot Indeks Pembangunan Manusia dari 115 Negara",
cex.main=1,
ylab = "HDI")
boxplot(dataframe$`Gov. Eff.`,
main = "Boxplot Indeks Efektivitas Pemerintah dari 115 Negara",
cex.main = 1,
ylab="Gov. Effectiveness",
col = "#fbd76a")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## v purrr 0.3.4
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::arrange() masks plyr::arrange()
## x purrr::compact() masks plyr::compact()
## x dplyr::count() masks plyr::count()
## x dplyr::failwith() masks plyr::failwith()
## x dplyr::filter() masks stats::filter()
## x dplyr::id() masks plyr::id()
## x dplyr::lag() masks stats::lag()
## x dplyr::mutate() masks plyr::mutate()
## x dplyr::rename() masks plyr::rename()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()
region.reord <- reorder(dataframe$region, dataframe$`Gov. Eff.`, FUN = median)
boxplot(dataframe$`Gov. Eff.` ~ region.reord,
col = c("#ffcc66", "#ff6666", "#aaaa55", "#0088cc", "#ff99e6"),
main = "Boxplot Indeks Efektivitas Pemerintah \nBerdasarkan Wilayah",
cex.main = 1,
xlab = "Region",
ylab = "Government Effectiveness")
library(ggplot2)
hdi <- as.numeric(dataframe$HDI)
## Warning: NAs introduced by coercion
ggplot(data = dataframe, mapping = aes(y = hdi)) +
ggtitle("Boxplot Indeks Pembangunan Manusia dari 115 Negara") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_boxplot()
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
ggplot(data = dataframe, mapping = aes(x = as.factor(region), y = `Gov. Eff.`, fill = region)) +
ggtitle("Boxplot Indeks Efektivitas Pemerintah \nBerdasarkan Wilayah") +
xlab("Region") +
ylab("Gov. Effectiveness") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_boxplot()
library(ggplot2)
X3 <- dataframe$`Real GDP growth ( avg last 5yrs%)`
hist(X3, breaks = 10, freq = F, border = "white", col = "#ffcccc",
main = "Rata-Rata Pertumbuhan GDP Real \ndari 115 Negara dalam 5 tahun terakhir",
xlab = "Gross dom. inv. (% GDP)")
lines(density(X3, na.rm=T, bw = 1.5), lwd = 2)
legend("topleft",
legend = c("Histogram", "Dugaan Sebaran Kepekatan Data"),
pch = c(15, NA), lty = c(NA, 1), lwd = c(NA, 2), pt.cex = c(1, NA),
col = c("#ffcccc", "black"), bty = "n", cex = .65)
# Normal
qqnorm(X3, cex = 1)
qqline(X3, distribution = qnorm, col = "red", lty = "dashed", lwd = .1)
hist(X3, breaks = 20, freq = F, xlim = c(-6, 12), border = "white",
col = "#ffcccc",
main = "X3 vs Normal")
curve(dnorm(x, mean = mean(X3), sd = sd(X3)), from = -6, to = 12, add = T,
lwd = 4)
# Lognormal
qqplot(rlnorm(n = length(X3), meanlog = mean(log(X3),na.rm =T), sdlog = sd(log(X3),na.rm=T)), X3)
## Warning in log(X3): NaNs produced
## Warning in log(X3): NaNs produced
qqline(distribution = function(p) qlnorm(p, meanlog = mean(log(X3),na.rm=T),
sdlog = sd(log(X3),na.rm=T)), X3,
col = "orange")
## Warning in log(X3): NaNs produced
## Warning in log(X3): NaNs produced
hist(X3, breaks = 20, freq = F, xlim = c(-6, 12), border = "white",
col = "#ffcccc", main = "X3 vs lognormal")
curve(dlnorm(x, meanlog = mean(log(X3),na.rm=T), sdlog = sd(log(X3),na.rm=T)),
from = -6, to = 12, add = T, lwd = 4, col="Black")
## Warning in log(X3): NaNs produced
## Warning in log(X3): NaNs produced
# Chi-Squared
qqplot(rchisq(n = length(X3), df = mean(X3)), X3)
qqline(distribution = function(p) qchisq(p, df = mean(X3)), X3, col = "red")
hist(X3, breaks = 20, freq = F, xlim = c(-6, 12), border = "white",
main = "X3 vs Chi-sq",
col = "#ffcccc")
curve(dchisq(x, df = mean(X3)), from = -6, to = 12, add = T, lwd = 4)
# Exponential
qqplot(rexp(n = length(X3), rate = 1/mean(X3)), X3)
qqline(distribution = function(p) qexp(p, rate = 1/mean(X3)), X3, col = "red")
hist(X3, breaks = 20, freq = F, xlim = c(-6, 12),
main = "X3 vs Exponential",
border = "white",
col = "#ffcccc")
curve(dexp(x, rate = 1/mean(X3)), from = -6, to = 12, add = T, lwd = 4)
library(ggplot2)
X115 <- dataframe$`Reg. Qual.`
hist(X115, breaks = 20, freq = F, border = "white", col = "#d9b38c",
main = "Regulatory Quality",
xlab = "Regulatory Quality")
lines(density(X115, bw = 1.5), lwd = 4)
legend("topleft", legend = c("Histogram", "Dugaan Sebaran Kepekatan Data"),
pch = c(15, NA), lty = c(NA, 1), lwd = c(NA, 2), pt.cex = c(1, NA),
col = c("#d9b38c", "black"), bty = "n", cex = .75)
## QQPlot Regulatory Quality
# Normal
qqnorm(X115, cex = 1)
qqline(X115, distribution = qnorm, col = "red", lty = "dashed", lwd = .1)
hist(X115, breaks = 20, freq = F, xlim = c(0, 100), border = "white",
col = "#d9b38c",
main = "X115 vs Normal")
curve(dnorm(x, mean = mean(X115), sd = sd(X115)), from = 0, to = 100, add = T,
lwd = 4)
# Lognormal
qqplot(rlnorm(n = length(X115), meanlog = mean(log(X115)),
sdlog = sd(log(X115))), X115)
qqline(distribution = function(p) qlnorm(p, meanlog = mean(log(X115)),
sdlog = sd(log(X115))), X115,
col = "red")
hist(X115, breaks = 20, freq = F, xlim = c(0, 50), border = "white",
col = "#d9b38c", main = "X115 vs lognormal")
curve(dlnorm(x, meanlog = mean(log(X115)), sdlog = sd(log(X115))),
from = 0, to = 50, add = T, lwd = 4)
# Chi-Squared
qqplot(rchisq(n = length(X115), df = mean(X115)), X115)
qqline(distribution = function(p) qchisq(p, df = mean(X115)), X115, col = "red")
hist(X115, breaks = 20, freq = F, xlim = c(0, 100), ylim=c(0, 0.04), border = "white",
col = "#d9b38c", main = "X115 vs Chi-sq")
curve(dchisq(x, df = mean(X115)), from = 0, to = 100, add = T, lwd = 4)
# Exponential
qqplot(rexp(n = length(X115), rate = 1/mean(X115)), X115)
qqline(distribution = function(p) qexp(p, rate = 1/mean(X115)), X115, col = "red")
hist(X115, breaks = 20, freq = F, xlim = c(0, 100),
main = "X115 vs Exponential",
border = "white",
col = "#d9b38c")
curve(dexp(x, rate = 1/mean(X115)), from = 0, to = 100, add = T, lwd = 4)