Data berikut merupakan data yang akan digunakan dalam eksplorasi kali ini.
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
DataPrak <- read_excel("C:/Users/ASUS/OneDrive/Documents/Kuliah/Semester 4/Analisis Eksporasi Data/Praktikum/Data untuk Eksplorasi.xlsx", sheet=1)
head(DataPrak)
## # A tibble: 6 x 38
## Country X2 X3 X4 X5 X6 X7 X8 X9 X10 X39 X40
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Country GDP (~ Real ~ Real ~ Consu~ Consu~ Gros~ Gros~ Gros~ Gros~ Bank~ Bank~
## 2 CH 749.0~ 1.885~ -2.72~ 1.16E~ -8.39~ 24.4~ 24.5~ 36.5~ 37.0~ 473.~ 523.~
## 3 DE 3793.~ 1.628~ -4.76~ 1.207~ 0.377~ 20.7~ 20.2~ 27.4~ 25.9~ 179.~ 193.~
## 4 DK 355.1~ 2.687~ -2.73~ 0.54 0.3 22.0~ 23.1~ 28.8~ 29.6~ 281.~ 310.~
## 5 LU 73.05~ 3.226~ -1.31~ 1.174~ 0.4 18.3~ 15.7~ 53.6~ 54.4~ 1618~ 1660~
## 6 NL 910.0~ 2.219~ -3.73~ 1.1773 1.112~ 21.1~ 21.3~ 31.0~ 31.9~ 292.~ 271.~
## # ... with 26 more variables: X42 <chr>, X43 <chr>, X44 <chr>, X45 <chr>,
## # X46 <chr>, X47 <chr>, X60 <chr>, X61 <chr>, X66 <chr>, X67 <chr>,
## # X102 <chr>, X103 <chr>, X104 <chr>, X105 <chr>, X106 <chr>, X107 <chr>,
## # X108 <chr>, X109 <chr>, X111 <chr>, X112 <chr>, X113 <chr>, X114 <chr>,
## # X115 <chr>, X116 <chr>, X117 <chr>, X118 <chr>
Dengan menggunakan fungsi summary() dan str() kita dapat mengetahui karakteristik data yang akan kita eksplorasi.
summary(DataPrak)
## Country X2 X3 X4
## Length:118 Length:118 Length:118 Length:118
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X5 X6 X7 X8
## Length:118 Length:118 Length:118 Length:118
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X9 X10 X39 X40
## Length:118 Length:118 Length:118 Length:118
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X42 X43 X44 X45
## Length:118 Length:118 Length:118 Length:118
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X46 X47 X60 X61
## Length:118 Length:118 Length:118 Length:118
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X66 X67 X102 X103
## Length:118 Length:118 Length:118 Length:118
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X104 X105 X106 X107
## Length:118 Length:118 Length:118 Length:118
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X108 X109 X111 X112
## Length:118 Length:118 Length:118 Length:118
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X113 X114 X115 X116
## Length:118 Length:118 Length:118 Length:118
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X117 X118
## Length:118 Length:118
## Class :character Class :character
## Mode :character Mode :character
str(DataPrak)
## tibble [118 x 38] (S3: tbl_df/tbl/data.frame)
## $ Country: chr [1:118] "Country" "CH" "DE" "DK" ...
## $ X2 : chr [1:118] "GDP (USDbn)" "749.0176725" "3793.5931639999999" "355.18403230000001" ...
## $ X3 : chr [1:118] "Real GDP growth ( avg last 5yrs%)" "1.8852199999999999" "1.6289199999999999" "2.6870799999999999" ...
## $ X4 : chr [1:118] "Real GDP growth (%)" "-2.7231999999999998" "-4.7653999999999996" "-2.7332999999999998" ...
## $ X5 : chr [1:118] "Consumer prices ( avg annual avg. % growth 5yrs)" "1.16E-3" "1.2076800000000001" "0.54" ...
## $ X6 : chr [1:118] "Consumer prices (annual avg. % growth)" "-8.3999999999999995E-3" "0.37780000000000002" "0.3" ...
## $ X7 : chr [1:118] "Gross dom. inv. (% GDP avg 5yrs)" "24.418880000000001" "20.71144" "22.063220000000001" ...
## $ X8 : chr [1:118] "Gross dom. inv. (% GDP)" "24.528400000000001" "20.247499999999999" "23.174800000000001" ...
## $ X9 : chr [1:118] "Gross dom. svg. (% GDP avg 5yrs)" "36.536700000000003" "27.497140000000002" "28.838280000000001" ...
## $ X10 : chr [1:118] "Gross dom. svg. (% GDP)" "37.051400000000001" "25.984300000000001" "29.664400000000001" ...
## $ X39 : chr [1:118] "Bank System Assets (% GDP avg 5yr)" "473.25580000000002" "179.52369999999999" "281.14729999999997" ...
## $ X40 : chr [1:118] "Bank System Assets (% GDP)" "523.8202" "193.952499113789" "310.48038618253599" ...
## $ X42 : chr [1:118] "Loan-deposit ratio (% avg 5yr)" "84.47336" "144.01910000000001" "355.61270000000002" ...
## $ X43 : chr [1:118] "Loan-deposit ratio (%)" "82.5" "138.35723879346699" "359.13886103320101" ...
## $ X44 : chr [1:118] "Capital adequacy ratio (% avg 5yr)" "17.92784" "18.782" "21.346" ...
## $ X45 : chr [1:118] "Capital adequacy ratio (%)" "19.3" "18.579999999999998" "22.6" ...
## $ X46 : chr [1:118] "Non-performing loans (% of gross loans avg 5yr)" "0.69089999999999996" "1.494" "2.5201799999999999" ...
## $ X47 : chr [1:118] "Non-performing loans (% of gross loans)" "0.75" NA "1.8" ...
## $ X60 : chr [1:118] "NXD (% GDP avg 5yr)" "-152.44499999999999" "-15.648099999999999" "-5.5908199999999999" ...
## $ X61 : chr [1:118] "NXD (% GDP)" "-154.95359999999999" "-19.091200000000001" "-13.994999999999999" ...
## $ X66 : chr [1:118] "GXD (% GDP avg 5yr)" "275.61689999999999" "165.29300000000001" "155.84030000000001" ...
## $ X67 : chr [1:118] "GXD (% GDP)" "319.59530000000001" "205.96360000000001" "194.6712" ...
## $ X102 : chr [1:118] "GDP per cap. (USD)" "89770.852100000004" "50891.581200000001" "67565.655599999998" ...
## $ X103 : chr [1:118] "GDP per cap. (% US)" "129.49325180099001" "73.4104242604391" "97.462553256857902" ...
## $ X104 : chr [1:118] "GNI per cap. (PPP)e (USD)" "71660" "57410" "62120" ...
## $ X105 : chr [1:118] "GNI per cap. (PPP)e (% US)" "108.444309927361" "86.879539951573804" "94.007263922518206" ...
## $ X106 : chr [1:118] "Real GDP per cap. (%, 5Y av. gr.)" "0.17330000000000001" "-0.1226" "1.3106" ...
## $ X107 : chr [1:118] "Population (%, 5Y av. gr.)" "0.84019999999999995" "0.48349999999999999" "0.36130000000000001" ...
## $ X108 : chr [1:118] "Unemployment (% labour force avg 5yr)" "2.8857400000000002" "3.7316400000000001" "5.5" ...
## $ X109 : chr [1:118] "Unemployment (% labour force)" "3.1728000000000001" "4.8177000000000003" "5.4" ...
## $ X111 : chr [1:118] "Pol. Stab." "94.761901855468693" "66.666664123535199" "83.809524536132798" ...
## $ X112 : chr [1:118] "Gov. Eff." "99.519233703613295" "93.269233703613295" "99.038459777832003" ...
## $ X113 : chr [1:118] "Rule of Law" "99.038459777832003" "92.307693481445298" "98.076919555664105" ...
## $ X114 : chr [1:118] "Ctrl. of Corr." "96.153846740722699" "95.192306518554702" "97.596153259277301" ...
## $ X115 : chr [1:118] "Reg. Qual." "94.711540222167997" "96.153846740722699" "92.307693481445298" ...
## $ X116 : chr [1:118] "Voice & Acc-ty" "97.044334411621094" "95.073890686035199" "98.522171020507798" ...
## $ X117 : chr [1:118] "HDI" "98.9" "97.3" "95.2" ...
## $ X118 : chr [1:118] "Ease of DB (p-tile)f" "81.5" "88.9" "98.5" ...
Dapat kita ketahui bahwa data tersebut memiliki kelas sebagai character. Oleh karena itu, kita perlu terlebih dahulu mengubah variabel data yang akan kita eksplorasi menjadi numeric.
Kita membuat objek data “Eksplor” sebagai objek data yang akan kita eksplorasi.
Eksplor <- DataPrak[2:108,c('X106','X107')]
Eksplor$X106 <- as.numeric(Eksplor$X106)
Eksplor$X107 <- as.numeric(Eksplor$X107)
head(Eksplor)
## # A tibble: 6 x 2
## X106 X107
## <dbl> <dbl>
## 1 0.173 0.840
## 2 -0.123 0.484
## 3 1.31 0.361
## 4 0.0792 2.02
## 5 0.488 0.593
## 6 0.153 0.838
str(Eksplor)
## tibble [107 x 2] (S3: tbl_df/tbl/data.frame)
## $ X106: num [1:107] 0.1733 -0.1226 1.3106 0.0792 0.4875 ...
## $ X107: num [1:107] 0.84 0.483 0.361 2.022 0.593 ...
summary(Eksplor)
## X106 X107
## Min. :-9.8453 Min. :-0.8862
## 1st Qu.:-0.8901 1st Qu.: 0.3372
## Median : 0.3121 Median : 1.0414
## Mean : 0.3341 Mean : 1.0460
## 3rd Qu.: 1.9074 3rd Qu.: 1.6647
## Max. : 6.0712 Max. : 4.4021
Nah, kini kita sudah memiliki objek data dengan kelas numeric sehingga kita mudah untuk mengeksplorasinya terutama untuk membuat grafik.
Pertama, mari kita amati korelasi antarvariabel dalam data Eksplor, yakni Real QDP per Cap dengan populasi penduduk.
cor(Eksplor)
## X106 X107
## X106 1.0000000 -0.2969766
## X107 -0.2969766 1.0000000
Berikut syntax menghitung IQR
q.Eksplor <- sapply(Eksplor,quantile,probs=c(.25,.75))
q.Eksplor["75%",] - q.Eksplor["25%",]
## X106 X107
## 2.79745 1.32750
Histogram merupakan representasi grafis (diagram) yang mengatur dan menampilkan frekuensi data pada rentang tertentu. Dengan menggunakan histogram, kita dapat menganalisis karakteristik dari data tersebut. Berikut Syntax R membuat histogram dari data pertumbuhan rata-rata real QDP dalam 5 tahun terakhir.
DataPrak$X3 <- as.numeric(DataPrak$X3)
## Warning: NAs introduced by coercion
hist (DataPrak$X3,
main = 'Histogram Rataan Peningkatan GDP Real Lima Tahun Terakhir',
ylab = 'Frekuensi', xlab = 'Avg Real GDP growth',
col = 'green',
breaks = seq(min(DataPrak$X3,na.rm = TRUE), max(DataPrak$X3,na.rm = TRUE), length.out = 21+1))
abline(v=median(DataPrak$X3,na.rm = TRUE),col="coral",lwd=2)
abline(v=mean(DataPrak$X3,na.rm = TRUE), col="blue", lwd=5)
Boxplot merupakan penyajian grafis suatu data kuantitatif yang dibentuk berdasarkan five-number summary.Boxplot digunakan untuk mendeskripsikan bentuk dari sebaran data secara kasar dan untuk mendeteksi adanya pencilan (outliers).
Berikut contoh pembuatan boxplot di R dari data pertumbuhan rata-rata real QDP dalam 5 tahun terakhir.
boxplot(DataPrak$X3,
main = "Rataan Peningkatan GDP Real Lima Tahun Terakhir", col = 'light green')
boxplot(Eksplor, col =c('Pink','orange'), ylim =c(-10,10), xaxt='n',
main = 'Boxplot Data Rel GDP per Cap dengan Population')
axis(1, at = 1:2, labels = c('Real GDP per cap. (%, 5Y av. gr.)','Population (%, 5Y av. gr.)'))
abline(h=0, lty = 'dashed', col = 'Green')
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.2
ggplot(data = DataPrak, mapping = aes(y = X3,),na.rm=T) + geom_boxplot(fill='green', col='orange') + theme_dark()
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
QQ Plot/Plot kuantil merupakan grafik yang membandingkan antara nilai-nilai kuantil dari sebaran suatu data terhadap kuantil-kuantil milik sebaran lain. Jika kedua sebaran berbentuk sama/hampir sama, maka tiap nilai amatannya akan berada di suatu garis lurus. Plot ini dapat digunakan untuk mencari tahu sebaran teoretis yang paling mendekati sebaran suatu data secara visual.
Dengan memanfaatkan QQ Plot, kita dapat menduga sebaran teoritis yang mendekati sebaran data pertumbuhan rata-rata real QDP dalam 5 tahun terakhir, sebagai berikut :
DataPrak$X3 <- as.numeric(DataPrak$X3,na.rm=T)
hist (DataPrak$X3,
main = 'Histogram Rataan Peningkatan GDP Real Lima Tahun Terakhir',
ylab = 'Frekuensi', xlab = 'Avg Real GDP growth',
col = 'green',
breaks = seq(min(DataPrak$X3,na.rm = TRUE), max(DataPrak$X3,na.rm = TRUE), length.out = 20))
abline(v=median(DataPrak$X3,na.rm = TRUE),col="coral",lwd=2)
abline(v=mean(DataPrak$X3,na.rm = TRUE), col="blue", lwd=5)
qqnorm(DataPrak$X3, cex = 1)
qqline(DataPrak$X3, distribution = qnorm, col = "red", lty = "dashed", lwd = 1)
hist(DataPrak$X3, breaks = 20, freq = F, xlim = c(-15, 15), border = "orange",
col = "skyblue", main = "DataPrak$X3 vs normal")
curve(dnorm(x, mean = mean(DataPrak$X3,na.rm=T), sd = sd(DataPrak$X3,na.rm=T)), from = -15, to = 20, add = T,
lwd = 1)
qqplot(rlnorm(n = length(DataPrak$X3), meanlog = mean(log(DataPrak$X3),na.rm =T), sdlog = sd(log(DataPrak$X3),na.rm=T)), DataPrak$X3)
## Warning in log(DataPrak$X3): NaNs produced
## Warning in log(DataPrak$X3): NaNs produced
qqline(distribution = function(p) qlnorm(p, meanlog = mean(log(DataPrak$X3),na.rm=T),
sdlog = sd(log(DataPrak$X3),na.rm=T)), DataPrak$X3,
col = "orange")
## Warning in log(DataPrak$X3): NaNs produced
## Warning in log(DataPrak$X3): NaNs produced
hist(DataPrak$X3, breaks = 20, freq = F, xlim = c(-15, 15), border = "Green",
col = "Yellow", main = "DataPrak$X3 vs lognormal")
curve(dlnorm(x, meanlog = mean(log(DataPrak$X3),na.rm=T), sdlog = sd(log(DataPrak$X3),na.rm=T)),
from = -15, to = 15, add = T, lwd = 1, col='Blue')
## Warning in log(DataPrak$X3): NaNs produced
## Warning in log(DataPrak$X3): NaNs produced
qqplot(rchisq(n = length(DataPrak$X3), df = mean(DataPrak$X3,na.rm = T)),
DataPrak$X3, col='Dark Blue')
qqline(distribution = function(p) qchisq(p, df = mean(DataPrak$X3,na.rm = T)),
DataPrak$X3, col = "red")
hist(DataPrak$X3, breaks = 20, freq = F, xlim = c(-15, 15), border = "Red",
col = "skyblue", main = "DataPrak$X3 vs Chi-sq")
curve(dchisq(x, df = mean(DataPrak$X3,na.rm=T)), from = -15, to = 15, add = T, lwd = 1)
qqplot(rexp(n = length(DataPrak$X3), rate = 1/mean(DataPrak$X3,na.rm = T)), DataPrak$X3, col='Orange')
qqline(distribution = function(p) qexp(p, rate = 1/mean(DataPrak$X3,na.rm=T)), DataPrak$X3, col = "Dark Blue")
hist(DataPrak$X3, breaks = 20, freq = F, xlim = c(-15, 15), border = "Yellow",
col = "Dark Green", main = "DataPrak$X3 vs Exponential")
curve(dexp(x, rate = 1/mean(DataPrak$X3,na.rm=T)), from = -15, to = 15, add = T, lwd = 1)
Dari keempat sebaran teoritis ini, kita melihat bahwa sebaran normal merupakan sebaran teoritis yang paling dekat dengan sebaran data pertumbuhan rata-rata real QDP dalam 5 tahun terakhir. Dengan demikan, kita bisa menyimpulkan bahwa sebaran paling mendekati sebaran normal.
Penulis berharap tulisan ini dapat memberikan kebermanfaatan bagi pembaca. Mohon maaf bila ada kekurangannya, semoga dapat menjadi lebih baik lagi. Terima kasih dan sampai jumpa :)