Anda dipekerjakan sebagai Data Scientist oleh Bank Dunia dan Anda sedang mengerjakan sebuah proyek untuk menganalisis tren demografis Dunia.
Anda diminta untuk membuat scatterplot yang menggambarkan Angka Kelahiran dan Stattistik Penggunaan Internet menurut Negara.
Scatterplot juga perlu dikategorikan berdasarkan Kelompok Pendapatan Negara.
Anda mendapat pekerjaan pembaruan mendesak dan secepatnya dilaksanakan dari manajer Anda.
Anda diminta untuk membuat scatterplot kedua yang juga menggambarkan Angka Kelahiran dan Statistik Penggunaan Internet menurut Negara.
Namun, kali ini scatterplot perlu dikategorikan berdasarkan Wilayah Negara.
Data tambahan telah disediakan dalam bentuk vektor R.
#Pertama baca data dari file csv
dt_demografi <- read.csv("P2-Demographic-Data.csv")
head(dt_demografi, n=10)
## Country.Name Country.Code Birth.rate Internet.users
## 1 Aruba ABW 10.244 78.9000
## 2 Afghanistan AFG 35.253 5.9000
## 3 Angola AGO 45.985 19.1000
## 4 Albania ALB 12.877 57.2000
## 5 United Arab Emirates ARE 11.044 88.0000
## 6 Argentina ARG 17.716 59.9000
## 7 Armenia ARM 13.308 41.9000
## 8 Antigua and Barbuda ATG 16.447 63.4000
## 9 Australia AUS 13.200 83.0000
## 10 Austria AUT 9.400 80.6188
## Income.Group
## 1 High income
## 2 Low income
## 3 Upper middle income
## 4 Upper middle income
## 5 High income
## 6 High income
## 7 Lower middle income
## 8 High income
## 9 High income
## 10 High income
#menampilkan ringkasan dari data demografi
summary(dt_demografi)
## Country.Name Country.Code Birth.rate Internet.users
## Length:195 Length:195 Min. : 7.90 Min. : 0.90
## Class :character Class :character 1st Qu.:12.12 1st Qu.:14.52
## Mode :character Mode :character Median :19.68 Median :41.00
## Mean :21.47 Mean :42.08
## 3rd Qu.:29.76 3rd Qu.:66.22
## Max. :49.66 Max. :96.55
## Income.Group
## Length:195
## Class :character
## Mode :character
##
##
##
#Menampilkan data dengan Birth Rate lebih dari 2
filter_br <- dt_demografi$Birth.rate > 2
head(dt_demografi[filter_br,], n=10)
## Country.Name Country.Code Birth.rate Internet.users
## 1 Aruba ABW 10.244 78.9000
## 2 Afghanistan AFG 35.253 5.9000
## 3 Angola AGO 45.985 19.1000
## 4 Albania ALB 12.877 57.2000
## 5 United Arab Emirates ARE 11.044 88.0000
## 6 Argentina ARG 17.716 59.9000
## 7 Armenia ARM 13.308 41.9000
## 8 Antigua and Barbuda ATG 16.447 63.4000
## 9 Australia AUS 13.200 83.0000
## 10 Austria AUT 9.400 80.6188
## Income.Group
## 1 High income
## 2 Low income
## 3 Upper middle income
## 4 Upper middle income
## 5 High income
## 6 High income
## 7 Lower middle income
## 8 High income
## 9 High income
## 10 High income
#Ambil data yang Angka Kelahirannya > 40
head(dt_demografi[dt_demografi$Birth.rate > 40,], n=10)
## Country.Name Country.Code Birth.rate Internet.users Income.Group
## 3 Angola AGO 45.985 19.1 Upper middle income
## 12 Burundi BDI 44.151 1.3 Low income
## 15 Burkina Faso BFA 40.551 9.1 Low income
## 66 Gambia, The GMB 42.525 14.0 Low income
## 116 Mali MLI 44.138 3.5 Low income
## 128 Niger NER 49.661 1.7 Low income
## 129 Nigeria NGA 40.045 38.0 Lower middle income
## 157 Somalia SOM 43.891 1.5 Low income
## 168 Chad TCD 45.745 2.3 Low income
## 179 Uganda UGA 43.474 16.2 Low income
# Di sini demografi_data$Birth.rate > 40 adalah vektor yang berisi nilai BENAR seperti di bawah ini
#Ambil data yang Tingkat Kelahirannya> 40 dan pengguna Internet <2
head(dt_demografi[dt_demografi$Birth.rate > 40 & dt_demografi$Internet.users <2,])
## Country.Name Country.Code Birth.rate Internet.users Income.Group
## 12 Burundi BDI 44.151 1.3 Low income
## 128 Niger NER 49.661 1.7 Low income
## 157 Somalia SOM 43.891 1.5 Low income
#Ambil data Income group yang High Income, disini IncomeGroup berisi Data Kategori
head(dt_demografi[dt_demografi$Income.Group == "High income",], n=10)
## Country.Name Country.Code Birth.rate Internet.users Income.Group
## 1 Aruba ABW 10.244 78.90000 High income
## 5 United Arab Emirates ARE 11.044 88.00000 High income
## 6 Argentina ARG 17.716 59.90000 High income
## 8 Antigua and Barbuda ATG 16.447 63.40000 High income
## 9 Australia AUS 13.200 83.00000 High income
## 10 Austria AUT 9.400 80.61880 High income
## 13 Belgium BEL 11.200 82.17020 High income
## 18 Bahrain BHR 15.040 90.00004 High income
## 19 Bahamas, The BHS 15.339 72.00000 High income
## 23 Bermuda BMU 10.400 95.30000 High income
#Ambil detail Negara Argentina
head(dt_demografi[dt_demografi$Country.Name == "Argentina",])
## Country.Name Country.Code Birth.rate Internet.users Income.Group
## 6 Argentina ARG 17.716 59.9 High income
#load ggplot2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
#menampilkan scatterplot yang menggambarkan Angka Kelahiran (birth.rate) dan Statistik Penggunaan Internet (Internet.users)
qplot(data = dt_demografi, x=Internet.users, y = Birth.rate, size=I(4))
#memberikan warna pada node plot
qplot(data = dt_demografi, x=Internet.users, y = Birth.rate, size=I(4), colour=I("blue"))
#Scatterplot juga perlu dikategorikan berdasarkan Kelompok Pendapatan Negara (Income.Group).
qplot(data = dt_demografi, x=Internet.users, y = Birth.rate, size=I(3), colour=Income.Group)
# Namun, kali ini scatterplot perlu dikategorikan berdasarkan Wilayah Negara.
# Data tambahan telah disediakan dalam bentuk vektor R.
Countries_2012_Dataset <- c("Aruba","Afghanistan","Angola","Albania","United Arab Emirates","Argentina","Armenia","Antigua and Barbuda","Australia","Austria","Azerbaijan","Burundi","Belgium","Benin","Burkina Faso","Bangladesh","Bulgaria","Bahrain","Bahamas, The","Bosnia and Herzegovina","Belarus","Belize","Bermuda","Bolivia","Brazil","Barbados","Brunei Darussalam","Bhutan","Botswana","Central African Republic","Canada","Switzerland","Chile","China","Cote d'Ivoire","Cameroon","Congo, Rep.","Colombia","Comoros","Cabo Verde","Costa Rica","Cuba","Cayman Islands","Cyprus","Czech Republic","Germany","Djibouti","Denmark","Dominican Republic","Algeria","Ecuador","Egypt, Arab Rep.","Eritrea","Spain","Estonia","Ethiopia","Finland","Fiji","France","Micronesia, Fed. Sts.","Gabon","United Kingdom","Georgia","Ghana","Guinea","Gambia, The","Guinea-Bissau","Equatorial Guinea","Greece","Grenada","Greenland","Guatemala","Guam","Guyana","Hong Kong SAR, China","Honduras","Croatia","Haiti","Hungary","Indonesia","India","Ireland","Iran, Islamic Rep.","Iraq","Iceland","Israel","Italy","Jamaica","Jordan","Japan","Kazakhstan","Kenya","Kyrgyz Republic","Cambodia","Kiribati","Korea, Rep.","Kuwait","Lao PDR","Lebanon","Liberia","Libya","St. Lucia","Liechtenstein","Sri Lanka","Lesotho","Lithuania","Luxembourg","Latvia","Macao SAR, China","Morocco","Moldova","Madagascar","Maldives","Mexico","Macedonia, FYR","Mali","Malta","Myanmar","Montenegro","Mongolia","Mozambique","Mauritania","Mauritius","Malawi","Malaysia","Namibia","New Caledonia","Niger","Nigeria","Nicaragua","Netherlands","Norway","Nepal","New Zealand","Oman","Pakistan","Panama","Peru","Philippines","Papua New Guinea","Poland","Puerto Rico","Portugal","Paraguay","French Polynesia","Qatar","Romania","Russian Federation","Rwanda","Saudi Arabia","Sudan","Senegal","Singapore","Solomon Islands","Sierra Leone","El Salvador","Somalia","Serbia","South Sudan","Sao Tome and Principe","Suriname","Slovak Republic","Slovenia","Sweden","Swaziland","Seychelles","Syrian Arab Republic","Chad","Togo","Thailand","Tajikistan","Turkmenistan","Timor-Leste","Tonga","Trinidad and Tobago","Tunisia","Turkey","Tanzania","Uganda","Ukraine","Uruguay","United States","Uzbekistan","St. Vincent and the Grenadines","Venezuela, RB","Virgin Islands (U.S.)","Vietnam","Vanuatu","West Bank and Gaza","Samoa","Yemen, Rep.","South Africa","Congo, Dem. Rep.","Zambia","Zimbabwe")
Codes_2012_Dataset <- c("ABW","AFG","AGO","ALB","ARE","ARG","ARM","ATG","AUS","AUT","AZE","BDI","BEL","BEN","BFA","BGD","BGR","BHR","BHS","BIH","BLR","BLZ","BMU","BOL","BRA","BRB","BRN","BTN","BWA","CAF","CAN","CHE","CHL","CHN","CIV","CMR","COG","COL","COM","CPV","CRI","CUB","CYM","CYP","CZE","DEU","DJI","DNK","DOM","DZA","ECU","EGY","ERI","ESP","EST","ETH","FIN","FJI","FRA","FSM","GAB","GBR","GEO","GHA","GIN","GMB","GNB","GNQ","GRC","GRD","GRL","GTM","GUM","GUY","HKG","HND","HRV","HTI","HUN","IDN","IND","IRL","IRN","IRQ","ISL","ISR","ITA","JAM","JOR","JPN","KAZ","KEN","KGZ","KHM","KIR","KOR","KWT","LAO","LBN","LBR","LBY","LCA","LIE","LKA","LSO","LTU","LUX","LVA","MAC","MAR","MDA","MDG","MDV","MEX","MKD","MLI","MLT","MMR","MNE","MNG","MOZ","MRT","MUS","MWI","MYS","NAM","NCL","NER","NGA","NIC","NLD","NOR","NPL","NZL","OMN","PAK","PAN","PER","PHL","PNG","POL","PRI","PRT","PRY","PYF","QAT","ROU","RUS","RWA","SAU","SDN","SEN","SGP","SLB","SLE","SLV","SOM","SRB","SSD","STP","SUR","SVK","SVN","SWE","SWZ","SYC","SYR","TCD","TGO","THA","TJK","TKM","TLS","TON","TTO","TUN","TUR","TZA","UGA","UKR","URY","USA","UZB","VCT","VEN","VIR","VNM","VUT","PSE","WSM","YEM","ZAF","COD","ZMB","ZWE")
Regions_2012_Dataset <- c("The Americas","Asia","Africa","Europe","Middle East","The Americas","Asia","The Americas","Oceania","Europe","Asia","Africa","Europe","Africa","Africa","Asia","Europe","Middle East","The Americas","Europe","Europe","The Americas","The Americas","The Americas","The Americas","The Americas","Asia","Asia","Africa","Africa","The Americas","Europe","The Americas","Asia","Africa","Africa","Africa","The Americas","Africa","Africa","The Americas","The Americas","The Americas","Europe","Europe","Europe","Africa","Europe","The Americas","Africa","The Americas","Africa","Africa","Europe","Europe","Africa","Europe","Oceania","Europe","Oceania","Africa","Europe","Asia","Africa","Africa","Africa","Africa","Africa","Europe","The Americas","The Americas","The Americas","Oceania","The Americas","Asia","The Americas","Europe","The Americas","Europe","Asia","Asia","Europe","Middle East","Middle East","Europe","Middle East","Europe","The Americas","Middle East","Asia","Asia","Africa","Asia","Asia","Oceania","Asia","Middle East","Asia","Middle East","Africa","Africa","The Americas","Europe","Asia","Africa","Europe","Europe","Europe","Asia","Africa","Europe","Africa","Asia","The Americas","Europe","Africa","Europe","Asia","Europe","Asia","Africa","Africa","Africa","Africa","Asia","Africa","Oceania","Africa","Africa","The Americas","Europe","Europe","Asia","Oceania","Middle East","Asia","The Americas","The Americas","Asia","Oceania","Europe","The Americas","Europe","The Americas","Oceania","Middle East","Europe","Europe","Africa","Middle East","Africa","Africa","Asia","Oceania","Africa","The Americas","Africa","Europe","Africa","Africa","The Americas","Europe","Europe","Europe","Africa","Africa","Middle East","Africa","Africa","Asia","Asia","Asia","Asia","Oceania","The Americas","Africa","Europe","Africa","Africa","Europe","The Americas","The Americas","Asia","The Americas","The Americas","The Americas","Asia","Oceania","Middle East","Oceania","Middle East","Africa","Africa","Africa","Africa")
#membuat data frame
dt_frame <- data.frame(Countries_2012_Dataset, Codes_2012_Dataset, Regions_2012_Dataset)
head(dt_frame, n=10)
## Countries_2012_Dataset Codes_2012_Dataset Regions_2012_Dataset
## 1 Aruba ABW The Americas
## 2 Afghanistan AFG Asia
## 3 Angola AGO Africa
## 4 Albania ALB Europe
## 5 United Arab Emirates ARE Middle East
## 6 Argentina ARG The Americas
## 7 Armenia ARM Asia
## 8 Antigua and Barbuda ATG The Americas
## 9 Australia AUS Oceania
## 10 Austria AUT Europe
#Mengganti nama nama kolom data frame
colnames(dt_frame) <- c("Country","Codes","Region")
head(dt_frame, n=10)
## Country Codes Region
## 1 Aruba ABW The Americas
## 2 Afghanistan AFG Asia
## 3 Angola AGO Africa
## 4 Albania ALB Europe
## 5 United Arab Emirates ARE Middle East
## 6 Argentina ARG The Americas
## 7 Armenia ARM Asia
## 8 Antigua and Barbuda ATG The Americas
## 9 Australia AUS Oceania
## 10 Austria AUT Europe
summary(dt_frame)
## Country Codes Region
## Length:195 Length:195 Length:195
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
#Gabungkan myDF dengan demografi_data berdasarkan Codes dan Country.Code
merge_data <- merge(dt_demografi, dt_frame, by.x="Country.Code", by.y ="Codes")
#Pada data frame terdapat kolom negara yang sama, maka akan dihapus
merge_data$Country.Name <- NULL
head(merge_data, n=10)
## Country.Code Birth.rate Internet.users Income.Group
## 1 ABW 10.244 78.9000 High income
## 2 AFG 35.253 5.9000 Low income
## 3 AGO 45.985 19.1000 Upper middle income
## 4 ALB 12.877 57.2000 Upper middle income
## 5 ARE 11.044 88.0000 High income
## 6 ARG 17.716 59.9000 High income
## 7 ARM 13.308 41.9000 Lower middle income
## 8 ATG 16.447 63.4000 High income
## 9 AUS 13.200 83.0000 High income
## 10 AUT 9.400 80.6188 High income
## Country Region
## 1 Aruba The Americas
## 2 Afghanistan Asia
## 3 Angola Africa
## 4 Albania Europe
## 5 United Arab Emirates Middle East
## 6 Argentina The Americas
## 7 Armenia Asia
## 8 Antigua and Barbuda The Americas
## 9 Australia Oceania
## 10 Austria Europe
str(merge_data)
## 'data.frame': 195 obs. of 6 variables:
## $ Country.Code : chr "ABW" "AFG" "AGO" "ALB" ...
## $ Birth.rate : num 10.2 35.3 46 12.9 11 ...
## $ Internet.users: num 78.9 5.9 19.1 57.2 88 ...
## $ Income.Group : chr "High income" "Low income" "Upper middle income" "Upper middle income" ...
## $ Country : chr "Aruba" "Afghanistan" "Angola" "Albania" ...
## $ Region : chr "The Americas" "Asia" "Africa" "Europe" ...
#Scatter Plot antara Internet.users & Birth.rate dan dikategorikan berdasarkan Wilayah Negara
qplot(data=merge_data, x= Internet.users, y= Birth.rate, size=I(3), colour=Region)
#Mengubah bentuk Scatter plot menjadi persegi
qplot(data=merge_data, x= Internet.users, y= Birth.rate, size=I(3), colour=Region, shape = I(15))
#Mengubah bentuk Scatter plot menjadi transparan
qplot(data=merge_data, x= Internet.users, y= Birth.rate, size=I(3), colour=Region, shape = I(19), alpha=10)