library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2

Mengimport Data dari Excel

Sheetdata <- read_excel ("C:/Users/ASUS/Documents/Kuliah/Semester 4/Analisis Eksplorasi Data/Praktikum/Praktikum 02 Histogram & Boxplot/Data untuk Eksplorasi.xlsx", sheet = "data", skip = 1)
head(Sheetdata)
## # A tibble: 6 x 38
##   Country `GDP (USDbn)` `Real GDP growth ~ `Real GDP growth~ `Consumer prices (~
##   <chr>           <dbl>              <dbl>             <dbl>               <dbl>
## 1 CH              749.                1.89            -2.72              0.00116
## 2 DE             3794.                1.63            -4.77              1.21   
## 3 DK              355.                2.69            -2.73              0.54   
## 4 LU               73.1               3.23            -1.31              1.17   
## 5 NL              910.                2.22            -3.74              1.18   
## 6 NO              363.                1.47            -0.766             2.62   
## # ... with 33 more variables: Consumer prices (annual avg. % growth) <dbl>,
## #   Gross dom. inv. (% GDP avg 5yrs) <dbl>, Gross dom. inv. (% GDP) <dbl>,
## #   Gross dom. svg. (% GDP avg 5yrs) <dbl>, Gross dom. svg. (% GDP) <dbl>,
## #   Bank System Assets (% GDP avg 5yr) <dbl>, Bank System Assets (% GDP) <dbl>,
## #   Loan-deposit ratio (% avg 5yr) <dbl>, Loan-deposit ratio (%) <dbl>,
## #   Capital adequacy ratio (% avg 5yr) <dbl>, Capital adequacy ratio (%) <dbl>,
## #   Non-performing loans (% of gross loans avg 5yr) <dbl>, ...
Sheetcountrycode <- read_excel("C:/Users/ASUS/Documents/Kuliah/Semester 4/Analisis Eksplorasi Data/Praktikum/Praktikum 02 Histogram & Boxplot/Data untuk Eksplorasi.xlsx", sheet = "country code", skip = 1)
head(Sheetcountrycode)
## # A tibble: 6 x 11
##   name      `alpha-2` `alpha-3` `country-code` `iso_3166-2` region `sub-region` 
##   <chr>     <chr>     <chr>              <dbl> <chr>        <chr>  <chr>        
## 1 Afghanis~ AF        AFG                    4 ISO 3166-2:~ Asia   Southern Asia
## 2 Ã…land Is~ AX        ALA                  248 ISO 3166-2:~ Europe Northern Eur~
## 3 Albania   AL        ALB                    8 ISO 3166-2:~ Europe Southern Eur~
## 4 Algeria   DZ        DZA                   12 ISO 3166-2:~ Africa Northern Afr~
## 5 American~ AS        ASM                   16 ISO 3166-2:~ Ocean~ Polynesia    
## 6 Andorra   AD        AND                   20 ISO 3166-2:~ Europe Southern Eur~
## # ... with 4 more variables: intermediate-region <chr>, region-code <dbl>,
## #   sub-region-code <dbl>, intermediate-region-code <dbl>
colnames(Sheetcountrycode)[2] <- c("Country")

Merging Data

dataframe <- merge(Sheetcountrycode, Sheetdata, by = "Country")

View(dataframe)

Histogram Sederhana

hist(dataframe$`GDP (USDbn)`,
     main = "Nominal Gross Domestic Product 115 Negara",
     xlab = "GDP (USDbn)")

Histogram Modifikasi

x8 <- as.numeric(dataframe$`Gross dom. inv. (% GDP)`)
hist (x8,
      main = "Histogram Gross Domestic Investment dari 115 Negara",
      cex.main = 1,
      ylab = "Frequency",
      xlab = "Gross dom. inv. (% GDP)",
      col = "#ffcccc",
      breaks = seq(min(x8, na.rm = T), max(x8, na.rm = T), length.out = 21+1))
abline(v=median(x8, na.rm = T),col="#333f66",lwd=2)
abline(v=mean(x8, na.rm = T), col="#b2b266", lwd=5)

# density plot
hist(x8, breaks = 20, freq = F, border = "white", col = "#ffcccc",
     main = "Gross Domestic Investment dari 115 Negara",
     xlab = "Gross dom. inv. (% GDP)")
lines(density(x8, na.rm=T, bw = 1.5), lwd = 4)
legend("topright", legend = c("Histogram", "Dugaan Sebaran Kepekatan Data"),
       pch = c(15, NA), lty = c(NA, 1), lwd = c(NA, 2), pt.cex = c(1, NA),
       col = c("#ffcccc", "black"), bty = "n", cex = .75)

Histogram Modifikasi: ggplot

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.2
hdi <- as.numeric(dataframe$HDI)
## Warning: NAs introduced by coercion
ggplot(dataframe, aes(x=hdi)) + 
  geom_histogram(color="black", fill="#e0ccff") +
  ggtitle("Histogram Indeks Pembangunan Manusia") +
  xlab("HDI") +
  ylab("Frequency") +
  geom_vline(aes(xintercept=mean(hdi, na.rm = T)),
            color="dark blue", linetype="dashed", size=1)+
  theme(plot.title = element_text(hjust = 0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4 rows containing non-finite values (stat_bin).

x113 <- as.numeric((dataframe$`Rule of Law`))
ggplot(dataframe, aes(x=x113, color=region)) +
  geom_histogram(fill="white")+
  xlab("Rule of Law")+
  theme(legend.position="top")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(plyr)
## Warning: package 'plyr' was built under R version 4.1.2
rataan<-ddply(dataframe, "region", summarise, rata2=mean(`Rule of Law`))
ggplot(dataframe, aes(x=`Rule of Law`, color=region)) +
  geom_histogram(fill="white")+
  geom_vline(data=rataan, aes(xintercept=rata2, color=region),
             linetype="dashed")+
  theme(legend.position="top")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(dataframe, aes(x=`Rule of Law`, color=region)) +
  geom_histogram(fill="white")+
  geom_vline(data=rataan, aes(xintercept=rata2, color=region),
             linetype="dashed")+
  theme(legend.position="top")+
  scale_color_grey() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(ggplot2)
ggplot(data = dataframe, aes(x=`Consumer prices (annual avg. % growth)`))+
  geom_histogram(aes(fill=region)) +
  scale_fill_brewer(palette="Set1") +
  facet_wrap( ~ region, ncol = 2) +
  xlab("Consumer prices (annual avg. % growth)") +
  theme_bw() +
  ggtitle("Histogram Pertumbuhan Rata-Rata Tahunan \nIndeks Harga Konsumen berdasarkan Wilayah") +
  theme(plot.title = element_text(hjust = 0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Boxplot Sederhana

hdi <- as.numeric(dataframe$HDI)
## Warning: NAs introduced by coercion
boxplot(hdi,
        main = "Boxplot Indeks Pembangunan Manusia dari 115 Negara",
        cex.main=1,
        ylab = "HDI")

boxplot(dataframe$`Gov. Eff.`,
        main = "Boxplot Indeks Efektivitas Pemerintah dari 115 Negara",
        cex.main = 1,
        ylab="Gov. Effectiveness", 
        col = "#fbd76a")

Boxplot Modifikasi

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## v purrr   0.3.4
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::arrange()   masks plyr::arrange()
## x purrr::compact()   masks plyr::compact()
## x dplyr::count()     masks plyr::count()
## x dplyr::failwith()  masks plyr::failwith()
## x dplyr::filter()    masks stats::filter()
## x dplyr::id()        masks plyr::id()
## x dplyr::lag()       masks stats::lag()
## x dplyr::mutate()    masks plyr::mutate()
## x dplyr::rename()    masks plyr::rename()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()
region.reord <- reorder(dataframe$region, dataframe$`Gov. Eff.`, FUN = median)

boxplot(dataframe$`Gov. Eff.` ~ region.reord,
        col = c("#ffcc66", "#ff6666", "#aaaa55", "#0088cc", "#ff99e6"),
        main = "Boxplot Indeks Efektivitas Pemerintah \nBerdasarkan Wilayah",
        cex.main = 1,
        xlab = "Region", 
        ylab = "Government Effectiveness")

Boxplot Sederhana: ggplot

library(ggplot2)
hdi <- as.numeric(dataframe$HDI)
## Warning: NAs introduced by coercion
ggplot(data = dataframe, mapping = aes(y = hdi)) + 
  ggtitle("Boxplot Indeks Pembangunan Manusia dari 115 Negara") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_boxplot()
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

Boxplot Modifikasi: ggplot

ggplot(data = dataframe, mapping = aes(x = as.factor(region), y = `Gov. Eff.`, fill = region)) +
  ggtitle("Boxplot Indeks Efektivitas Pemerintah \nBerdasarkan Wilayah") +
  xlab("Region") + 
  ylab("Gov. Effectiveness") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_boxplot()

QQPlot

library(ggplot2)
X3 <- dataframe$`Real GDP growth ( avg last 5yrs%)`
hist(X3, breaks = 10, freq = F, border = "white", col = "#ffcccc",
     main = "Rata-Rata Pertumbuhan GDP Real \ndari 115 Negara dalam 5 tahun terakhir",
     xlab = "Gross dom. inv. (% GDP)")
lines(density(X3, na.rm=T, bw = 1.5), lwd = 2)
legend("topleft", 
       legend = c("Histogram", "Dugaan Sebaran Kepekatan Data"),
       pch = c(15, NA), lty = c(NA, 1), lwd = c(NA, 2), pt.cex = c(1, NA),
       col = c("#ffcccc", "black"), bty = "n", cex = .65)

QQPlot Real GDP Growth (avg last 5yrs%)

# Normal
qqnorm(X3, cex = 1)
qqline(X3, distribution = qnorm, col = "red", lty = "dashed", lwd = .1)

hist(X3, breaks = 20, freq = F, xlim = c(-6, 12), border = "white",
     col = "#ffcccc", 
     main = "X3 vs Normal")
curve(dnorm(x, mean = mean(X3), sd = sd(X3)), from = -6, to = 12, add = T,
      lwd = 4)

# Lognormal
qqplot(rlnorm(n = length(X3), meanlog = mean(log(X3),na.rm =T), sdlog = sd(log(X3),na.rm=T)), X3)
## Warning in log(X3): NaNs produced

## Warning in log(X3): NaNs produced
qqline(distribution = function(p) qlnorm(p, meanlog = mean(log(X3),na.rm=T),
                                         sdlog = sd(log(X3),na.rm=T)), X3,
       col = "orange")
## Warning in log(X3): NaNs produced

## Warning in log(X3): NaNs produced

hist(X3, breaks = 20, freq = F, xlim = c(-6, 12), border = "white",
     col = "#ffcccc", main = "X3 vs lognormal")
curve(dlnorm(x, meanlog = mean(log(X3),na.rm=T), sdlog = sd(log(X3),na.rm=T)),
      from = -6, to = 12, add = T, lwd = 4, col="Black")
## Warning in log(X3): NaNs produced

## Warning in log(X3): NaNs produced

# Chi-Squared
qqplot(rchisq(n = length(X3), df = mean(X3)), X3)
qqline(distribution = function(p) qchisq(p, df = mean(X3)), X3, col = "red")

hist(X3, breaks = 20, freq = F, xlim = c(-6, 12), border = "white",
     main = "X3 vs Chi-sq",
     col = "#ffcccc")
curve(dchisq(x, df = mean(X3)), from = -6, to = 12, add = T, lwd = 4)

# Exponential
qqplot(rexp(n = length(X3), rate = 1/mean(X3)), X3)
qqline(distribution = function(p) qexp(p, rate = 1/mean(X3)), X3, col = "red")

hist(X3, breaks = 20, freq = F, xlim = c(-6, 12),
     main = "X3 vs Exponential",
     border = "white",
     col = "#ffcccc")
curve(dexp(x, rate = 1/mean(X3)), from = -6, to = 12, add = T, lwd = 4)

library(ggplot2)
X115 <- dataframe$`Reg. Qual.`
hist(X115, breaks = 20, freq = F, border = "white", col = "#d9b38c",
     main = "Regulatory Quality",
     xlab = "Regulatory Quality")
lines(density(X115, bw = 1.5), lwd = 4)
legend("topleft", legend = c("Histogram", "Dugaan Sebaran Kepekatan Data"),
       pch = c(15, NA), lty = c(NA, 1), lwd = c(NA, 2), pt.cex = c(1, NA),
       col = c("#d9b38c", "black"), bty = "n", cex = .75)

## QQPlot Regulatory Quality

# Normal
qqnorm(X115, cex = 1)
qqline(X115, distribution = qnorm, col = "red", lty = "dashed", lwd = .1)

hist(X115, breaks = 20, freq = F, xlim = c(0, 100), border = "white",
     col = "#d9b38c", 
     main = "X115 vs Normal")
curve(dnorm(x, mean = mean(X115), sd = sd(X115)), from = 0, to = 100, add = T,
      lwd = 4)

# Lognormal
qqplot(rlnorm(n = length(X115), meanlog = mean(log(X115)),
              sdlog = sd(log(X115))), X115)
qqline(distribution = function(p) qlnorm(p, meanlog = mean(log(X115)),
                                         sdlog = sd(log(X115))), X115,
       col = "red")

hist(X115, breaks = 20, freq = F, xlim = c(0, 50), border = "white",
     col = "#d9b38c", main = "X115 vs lognormal")
curve(dlnorm(x, meanlog = mean(log(X115)), sdlog = sd(log(X115))),
      from = 0, to = 50, add = T, lwd = 4)

# Chi-Squared
qqplot(rchisq(n = length(X115), df = mean(X115)), X115)
qqline(distribution = function(p) qchisq(p, df = mean(X115)), X115, col = "red")

hist(X115, breaks = 20, freq = F, xlim = c(0, 100), ylim=c(0, 0.04), border = "white",
     col = "#d9b38c", main = "X115 vs Chi-sq")
curve(dchisq(x, df = mean(X115)), from = 0, to = 100, add = T, lwd = 4)

# Exponential
qqplot(rexp(n = length(X115), rate = 1/mean(X115)), X115)
qqline(distribution = function(p) qexp(p, rate = 1/mean(X115)), X115, col = "red")

hist(X115, breaks = 20, freq = F, xlim = c(0, 100),
     main = "X115 vs Exponential",
     border = "white",
     col = "#d9b38c")
curve(dexp(x, rate = 1/mean(X115)), from = 0, to = 100, add = T, lwd = 4)