- Anda dipekerjakan sebagai Data Scientist oleh Bank Dunia dan Anda sedang mengerjakan sebuah proyek untuk menganalisis tren demografis Dunia.
- Anda diminta untuk membuat scatterplot yang menggambarkan Angka Kelahiran dan Stattistik Penggunaan Internet menurut Negara.
- Scatterplot juga perlu dikategorikan berdasarkan Kelompok Pendapatan Negara.
- Anda mendapat pekerjaan pembaruan mendesak dan secepatnya dilaksanakan dari manajer Anda.
- Anda diminta untuk membuat scatterplot kedua yang juga menggambarkan Angka Kelahiran dan Statistik Penggunaan Internet menurut Negara.
- Namun, kali ini scatterplot perlu dikategorikan berdasarkan Wilayah Negara.
- Data tambahan telah disediakan dalam bentuk vektor R.
#baca data
data_demografi <- read.csv("Demographic-Data.csv")
head(data_demografi) #mengembalikan 6 baris pertama
## Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## 1 Aruba ABW 10.244 78.9
## 2 Afghanistan AFG 35.253 5.9
## 3 Angola AGO 45.985 19.1
## 4 Albania ALB 12.877 57.2
## 5 United Arab Emirates ARE 11.044 88.0
## 6 Argentina ARG 17.716 59.9
## Pendapatan.Grup
## 1 High income
## 2 Low income
## 3 Upper middle income
## 4 Upper middle income
## 5 High income
## 6 High income
summary(data_demografi)
## Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## Length:195 Length:195 Min. : 7.90 Min. : 0.90
## Class :character Class :character 1st Qu.:12.12 1st Qu.:14.52
## Mode :character Mode :character Median :19.68 Median :41.00
## Mean :21.47 Mean :42.08
## 3rd Qu.:29.76 3rd Qu.:66.22
## Max. :49.66 Max. :96.55
## Pendapatan.Grup
## Length:195
## Class :character
## Mode :character
##
##
##
#Data dengan Angka Kelahiran di atas 2
filter_br <- data_demografi$Angka.Kelahiran > 2
head(data_demografi[filter_br,])
## Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## 1 Aruba ABW 10.244 78.9
## 2 Afghanistan AFG 35.253 5.9
## 3 Angola AGO 45.985 19.1
## 4 Albania ALB 12.877 57.2
## 5 United Arab Emirates ARE 11.044 88.0
## 6 Argentina ARG 17.716 59.9
## Pendapatan.Grup
## 1 High income
## 2 Low income
## 3 Upper middle income
## 4 Upper middle income
## 5 High income
## 6 High income
#Ambil data yang Angka Kelahirannya > 40
head(data_demografi[data_demografi$Angka.Kelahiran > 40,])
## Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## 3 Angola AGO 45.985 19.1
## 12 Burundi BDI 44.151 1.3
## 15 Burkina Faso BFA 40.551 9.1
## 66 Gambia, The GMB 42.525 14.0
## 116 Mali MLI 44.138 3.5
## 128 Niger NER 49.661 1.7
## Pendapatan.Grup
## 3 Upper middle income
## 12 Low income
## 15 Low income
## 66 Low income
## 116 Low income
## 128 Low income
# Di sini data data_demografi$Angka.Kelahiran > 40 adalah vektor yang berisi nilai BENAR seperti di bawah ini
#Ambil data yang Angka Kelahirannya > 40 dan pengguna Internetnya < 2
head(data_demografi[data_demografi$Angka.Kelahiran > 40 & data_demografi$Pengguna.Internet <2,])
## Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet Pendapatan.Grup
## 12 Burundi BDI 44.151 1.3 Low income
## 128 Niger NER 49.661 1.7 Low income
## 157 Somalia SOM 43.891 1.5 Low income
#Ambil data dengan kelompok Penghasilan Berpenghasilan Tinggi, di sini IncomeGroup berisi Data Kategoris
head(data_demografi[data_demografi$Pendapatan.Grup == "High income",])
## Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## 1 Aruba ABW 10.244 78.9000
## 5 United Arab Emirates ARE 11.044 88.0000
## 6 Argentina ARG 17.716 59.9000
## 8 Antigua and Barbuda ATG 16.447 63.4000
## 9 Australia AUS 13.200 83.0000
## 10 Austria AUT 9.400 80.6188
## Pendapatan.Grup
## 1 High income
## 5 High income
## 6 High income
## 8 High income
## 9 High income
## 10 High income
#Ambil detail Negara Indonesia
head(data_demografi[data_demografi$Nama.Negara == "Indonesia",])
## Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## 80 Indonesia IDN 20.297 14.94
## Pendapatan.Grup
## 80 Lower middle income
#Ploting
library(ggplot2)
qplot(data = data_demografi, x=Pengguna.Internet, y = Angka.Kelahiran, size=I(4))

# I() digunakan untuk menghapus legenda
qplot(data = data_demografi, x=Pengguna.Internet, y = Angka.Kelahiran, size=I(4), colour=I("green"))

#Scatter Plot antara Pengguna.Internet & Angka.Kelahiran dan dikategorikan berdasarkan Pendapatan.Grup
qplot(data = data_demografi, x=Pengguna.Internet, y = Angka.Kelahiran, size=I(3), colour=Pendapatan.Grup)

#Scatter Plot antara Pengguna.Internet & Angka.Kelahiran dan dikategorikan berdasarkan Wilayah Negara
#Data tambahan Tersedia dalam vektor R
Negara_2012_Dataset <- c("Aruba","Afghanistan","Angola","Albania","United Arab Emirates","Argentina","Armenia","Antigua and Barbuda","Australia","Austria","Azerbaijan","Burundi","Belgium","Benin","Burkina Faso","Bangladesh","Bulgaria","Bahrain","Bahamas, The","Bosnia and Herzegovina","Belarus","Belize","Bermuda","Bolivia","Brazil","Barbados","Brunei Darussalam","Bhutan","Botswana","Central African Republic","Canada","Switzerland","Chile","China","Cote d'Ivoire","Cameroon","Congo, Rep.","Colombia","Comoros","Cabo Verde","Costa Rica","Cuba","Cayman Islands","Cyprus","Czech Republic","Germany","Djibouti","Denmark","Dominican Republic","Algeria","Ecuador","Egypt, Arab Rep.","Eritrea","Spain","Estonia","Ethiopia","Finland","Fiji","France","Micronesia, Fed. Sts.","Gabon","United Kingdom","Georgia","Ghana","Guinea","Gambia, The","Guinea-Bissau","Equatorial Guinea","Greece","Grenada","Greenland","Guatemala","Guam","Guyana","Hong Kong SAR, China","Honduras","Croatia","Haiti","Hungary","Indonesia","India","Ireland","Iran, Islamic Rep.","Iraq","Iceland","Israel","Italy","Jamaica","Jordan","Japan","Kazakhstan","Kenya","Kyrgyz Republic","Cambodia","Kiribati","Korea, Rep.","Kuwait","Lao PDR","Lebanon","Liberia","Libya","St. Lucia","Liechtenstein","Sri Lanka","Lesotho","Lithuania","Luxembourg","Latvia","Macao SAR, China","Morocco","Moldova","Madagascar","Maldives","Mexico","Macedonia, FYR","Mali","Malta","Myanmar","Montenegro","Mongolia","Mozambique","Mauritania","Mauritius","Malawi","Malaysia","Namibia","New Caledonia","Niger","Nigeria","Nicaragua","Netherlands","Norway","Nepal","New Zealand","Oman","Pakistan","Panama","Peru","Philippines","Papua New Guinea","Poland","Puerto Rico","Portugal","Paraguay","French Polynesia","Qatar","Romania","Russian Federation","Rwanda","Saudi Arabia","Sudan","Senegal","Singapore","Solomon Islands","Sierra Leone","El Salvador","Somalia","Serbia","South Sudan","Sao Tome and Principe","Suriname","Slovak Republic","Slovenia","Sweden","Swaziland","Seychelles","Syrian Arab Republic","Chad","Togo","Thailand","Tajikistan","Turkmenistan","Timor-Leste","Tonga","Trinidad and Tobago","Tunisia","Turkey","Tanzania","Uganda","Ukraine","Uruguay","United States","Uzbekistan","St. Vincent and the Grenadines","Venezuela, RB","Virgin Islands (U.S.)","Vietnam","Vanuatu","West Bank and Gaza","Samoa","Yemen, Rep.","South Africa","Congo, Dem. Rep.","Zambia","Zimbabwe")
Kode_2012_Dataset <- c("ABW","AFG","AGO","ALB","ARE","ARG","ARM","ATG","AUS","AUT","AZE","BDI","BEL","BEN","BFA","BGD","BGR","BHR","BHS","BIH","BLR","BLZ","BMU","BOL","BRA","BRB","BRN","BTN","BWA","CAF","CAN","CHE","CHL","CHN","CIV","CMR","COG","COL","COM","CPV","CRI","CUB","CYM","CYP","CZE","DEU","DJI","DNK","DOM","DZA","ECU","EGY","ERI","ESP","EST","ETH","FIN","FJI","FRA","FSM","GAB","GBR","GEO","GHA","GIN","GMB","GNB","GNQ","GRC","GRD","GRL","GTM","GUM","GUY","HKG","HND","HRV","HTI","HUN","IDN","IND","IRL","IRN","IRQ","ISL","ISR","ITA","JAM","JOR","JPN","KAZ","KEN","KGZ","KHM","KIR","KOR","KWT","LAO","LBN","LBR","LBY","LCA","LIE","LKA","LSO","LTU","LUX","LVA","MAC","MAR","MDA","MDG","MDV","MEX","MKD","MLI","MLT","MMR","MNE","MNG","MOZ","MRT","MUS","MWI","MYS","NAM","NCL","NER","NGA","NIC","NLD","NOR","NPL","NZL","OMN","PAK","PAN","PER","PHL","PNG","POL","PRI","PRT","PRY","PYF","QAT","ROU","RUS","RWA","SAU","SDN","SEN","SGP","SLB","SLE","SLV","SOM","SRB","SSD","STP","SUR","SVK","SVN","SWE","SWZ","SYC","SYR","TCD","TGO","THA","TJK","TKM","TLS","TON","TTO","TUN","TUR","TZA","UGA","UKR","URY","USA","UZB","VCT","VEN","VIR","VNM","VUT","PSE","WSM","YEM","ZAF","COD","ZMB","ZWE")
Wilayah_2012_Dataset <- c("The Americas","Asia","Africa","Europe","Middle East","The Americas","Asia","The Americas","Oceania","Europe","Asia","Africa","Europe","Africa","Africa","Asia","Europe","Middle East","The Americas","Europe","Europe","The Americas","The Americas","The Americas","The Americas","The Americas","Asia","Asia","Africa","Africa","The Americas","Europe","The Americas","Asia","Africa","Africa","Africa","The Americas","Africa","Africa","The Americas","The Americas","The Americas","Europe","Europe","Europe","Africa","Europe","The Americas","Africa","The Americas","Africa","Africa","Europe","Europe","Africa","Europe","Oceania","Europe","Oceania","Africa","Europe","Asia","Africa","Africa","Africa","Africa","Africa","Europe","The Americas","The Americas","The Americas","Oceania","The Americas","Asia","The Americas","Europe","The Americas","Europe","Asia","Asia","Europe","Middle East","Middle East","Europe","Middle East","Europe","The Americas","Middle East","Asia","Asia","Africa","Asia","Asia","Oceania","Asia","Middle East","Asia","Middle East","Africa","Africa","The Americas","Europe","Asia","Africa","Europe","Europe","Europe","Asia","Africa","Europe","Africa","Asia","The Americas","Europe","Africa","Europe","Asia","Europe","Asia","Africa","Africa","Africa","Africa","Asia","Africa","Oceania","Africa","Africa","The Americas","Europe","Europe","Asia","Oceania","Middle East","Asia","The Americas","The Americas","Asia","Oceania","Europe","The Americas","Europe","The Americas","Oceania","Middle East","Europe","Europe","Africa","Middle East","Africa","Africa","Asia","Oceania","Africa","The Americas","Africa","Europe","Africa","Africa","The Americas","Europe","Europe","Europe","Africa","Africa","Middle East","Africa","Africa","Asia","Asia","Asia","Asia","Oceania","The Americas","Africa","Europe","Africa","Africa","Europe","The Americas","The Americas","Asia","The Americas","The Americas","The Americas","Asia","Oceania","Middle East","Oceania","Middle East","Africa","Africa","Africa","Africa")
#------------------------Membuat Data Frame
myDF <- data.frame(Negara_2012_Dataset, Kode_2012_Dataset, Wilayah_2012_Dataset)
head(myDF)
## Negara_2012_Dataset Kode_2012_Dataset Wilayah_2012_Dataset
## 1 Aruba ABW The Americas
## 2 Afghanistan AFG Asia
## 3 Angola AGO Africa
## 4 Albania ALB Europe
## 5 United Arab Emirates ARE Middle East
## 6 Argentina ARG The Americas
#Mengganti nama kolom myDF
colnames(myDF) <- c("Country","Kode","Region")
head(myDF)
## Country Kode Region
## 1 Aruba ABW The Americas
## 2 Afghanistan AFG Asia
## 3 Angola AGO Africa
## 4 Albania ALB Europe
## 5 United Arab Emirates ARE Middle East
## 6 Argentina ARG The Americas
summary(myDF)
## Country Kode Region
## Length:195 Length:195 Length:195
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
#Gabungkan myDF menjadi data_demografi dengan Kode dan Kode.Negara
merge_data <- merge(data_demografi, myDF, by.x="Kode.Negara", by.y ="Kode")
#Dalam bingkai data di atas kami memiliki kolom duplikat yaitu negara, mari kita hapus
merge_data$Nama.Negara <- NULL
head(merge_data)
## Kode.Negara Angka.Kelahiran Pengguna.Internet Pendapatan.Grup
## 1 ABW 10.244 78.9 High income
## 2 AFG 35.253 5.9 Low income
## 3 AGO 45.985 19.1 Upper middle income
## 4 ALB 12.877 57.2 Upper middle income
## 5 ARE 11.044 88.0 High income
## 6 ARG 17.716 59.9 High income
## Country Region
## 1 Aruba The Americas
## 2 Afghanistan Asia
## 3 Angola Africa
## 4 Albania Europe
## 5 United Arab Emirates Middle East
## 6 Argentina The Americas
str(merge_data)
## 'data.frame': 195 obs. of 6 variables:
## $ Kode.Negara : chr "ABW" "AFG" "AGO" "ALB" ...
## $ Angka.Kelahiran : num 10.2 35.3 46 12.9 11 ...
## $ Pengguna.Internet: num 78.9 5.9 19.1 57.2 88 ...
## $ Pendapatan.Grup : chr "High income" "Low income" "Upper middle income" "Upper middle income" ...
## $ Country : chr "Aruba" "Afghanistan" "Angola" "Albania" ...
## $ Region : chr "The Americas" "Asia" "Africa" "Europe" ...
#Scatter Plot antara Pengguna.Internet & Angka.Kelahiran dan dikategorikan berdasarkan Wilayah Negara
qplot(data=merge_data, x= Pengguna.Internet, y= Angka.Kelahiran, size=I(3), colour=Region)

#Mari kita ubah bentuk Scatter plot di atas
#1. Bentuk : Tersedia antara 0-25, untuk info lebih lanjut cek R-shape.png di google
qplot(data=merge_data, x= Pengguna.Internet, y= Angka.Kelahiran, size=I(3), colour=Region, shape = I(15))

#2. Transparansi
qplot(data=merge_data, x= Pengguna.Internet, y= Angka.Kelahiran, size=I(3), colour=Region, shape = I(19), alpha=10)
