#baca data
data_demografi <- read.csv("Demographic-Data.csv")
head(data_demografi) #mengembalikan 6 baris pertama 
##            Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## 1                Aruba         ABW          10.244              78.9
## 2          Afghanistan         AFG          35.253               5.9
## 3               Angola         AGO          45.985              19.1
## 4              Albania         ALB          12.877              57.2
## 5 United Arab Emirates         ARE          11.044              88.0
## 6            Argentina         ARG          17.716              59.9
##       Pendapatan.Grup
## 1         High income
## 2          Low income
## 3 Upper middle income
## 4 Upper middle income
## 5         High income
## 6         High income
summary(data_demografi)
##  Nama.Negara        Kode.Negara        Angka.Kelahiran Pengguna.Internet
##  Length:195         Length:195         Min.   : 7.90   Min.   : 0.90    
##  Class :character   Class :character   1st Qu.:12.12   1st Qu.:14.52    
##  Mode  :character   Mode  :character   Median :19.68   Median :41.00    
##                                        Mean   :21.47   Mean   :42.08    
##                                        3rd Qu.:29.76   3rd Qu.:66.22    
##                                        Max.   :49.66   Max.   :96.55    
##  Pendapatan.Grup   
##  Length:195        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
#Data dengan Angka Kelahiran di atas 2
filter_br <- data_demografi$Angka.Kelahiran > 2
head(data_demografi[filter_br,])
##            Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## 1                Aruba         ABW          10.244              78.9
## 2          Afghanistan         AFG          35.253               5.9
## 3               Angola         AGO          45.985              19.1
## 4              Albania         ALB          12.877              57.2
## 5 United Arab Emirates         ARE          11.044              88.0
## 6            Argentina         ARG          17.716              59.9
##       Pendapatan.Grup
## 1         High income
## 2          Low income
## 3 Upper middle income
## 4 Upper middle income
## 5         High income
## 6         High income
#Ambil data yang Angka Kelahirannya > 40
head(data_demografi[data_demografi$Angka.Kelahiran > 40,])  
##      Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## 3         Angola         AGO          45.985              19.1
## 12       Burundi         BDI          44.151               1.3
## 15  Burkina Faso         BFA          40.551               9.1
## 66   Gambia, The         GMB          42.525              14.0
## 116         Mali         MLI          44.138               3.5
## 128        Niger         NER          49.661               1.7
##         Pendapatan.Grup
## 3   Upper middle income
## 12           Low income
## 15           Low income
## 66           Low income
## 116          Low income
## 128          Low income
# Di sini data data_demografi$Angka.Kelahiran > 40 adalah vektor yang berisi nilai BENAR seperti di bawah ini
#Ambil data yang Angka Kelahirannya > 40 dan pengguna Internetnya < 2
head(data_demografi[data_demografi$Angka.Kelahiran > 40 & data_demografi$Pengguna.Internet <2,])
##     Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet Pendapatan.Grup
## 12      Burundi         BDI          44.151               1.3      Low income
## 128       Niger         NER          49.661               1.7      Low income
## 157     Somalia         SOM          43.891               1.5      Low income
#Ambil data dengan kelompok Penghasilan Berpenghasilan Tinggi, di sini IncomeGroup berisi Data Kategoris
head(data_demografi[data_demografi$Pendapatan.Grup == "High income",])
##             Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## 1                 Aruba         ABW          10.244           78.9000
## 5  United Arab Emirates         ARE          11.044           88.0000
## 6             Argentina         ARG          17.716           59.9000
## 8   Antigua and Barbuda         ATG          16.447           63.4000
## 9             Australia         AUS          13.200           83.0000
## 10              Austria         AUT           9.400           80.6188
##    Pendapatan.Grup
## 1      High income
## 5      High income
## 6      High income
## 8      High income
## 9      High income
## 10     High income
#Ambil detail Negara Indonesia
head(data_demografi[data_demografi$Nama.Negara == "Indonesia",])
##    Nama.Negara Kode.Negara Angka.Kelahiran Pengguna.Internet
## 80   Indonesia         IDN          20.297             14.94
##        Pendapatan.Grup
## 80 Lower middle income
#Ploting 
library(ggplot2)
qplot(data = data_demografi, x=Pengguna.Internet, y = Angka.Kelahiran, size=I(4))   

# I() digunakan untuk menghapus legenda
qplot(data = data_demografi, x=Pengguna.Internet, y = Angka.Kelahiran, size=I(4), colour=I("green"))

#Scatter Plot antara Pengguna.Internet & Angka.Kelahiran dan dikategorikan berdasarkan Pendapatan.Grup
qplot(data = data_demografi, x=Pengguna.Internet, y = Angka.Kelahiran, size=I(3), colour=Pendapatan.Grup)

#Scatter Plot antara Pengguna.Internet & Angka.Kelahiran dan dikategorikan berdasarkan Wilayah Negara
#Data tambahan Tersedia dalam vektor R

Negara_2012_Dataset <- c("Aruba","Afghanistan","Angola","Albania","United Arab Emirates","Argentina","Armenia","Antigua and Barbuda","Australia","Austria","Azerbaijan","Burundi","Belgium","Benin","Burkina Faso","Bangladesh","Bulgaria","Bahrain","Bahamas, The","Bosnia and Herzegovina","Belarus","Belize","Bermuda","Bolivia","Brazil","Barbados","Brunei Darussalam","Bhutan","Botswana","Central African Republic","Canada","Switzerland","Chile","China","Cote d'Ivoire","Cameroon","Congo, Rep.","Colombia","Comoros","Cabo Verde","Costa Rica","Cuba","Cayman Islands","Cyprus","Czech Republic","Germany","Djibouti","Denmark","Dominican Republic","Algeria","Ecuador","Egypt, Arab Rep.","Eritrea","Spain","Estonia","Ethiopia","Finland","Fiji","France","Micronesia, Fed. Sts.","Gabon","United Kingdom","Georgia","Ghana","Guinea","Gambia, The","Guinea-Bissau","Equatorial Guinea","Greece","Grenada","Greenland","Guatemala","Guam","Guyana","Hong Kong SAR, China","Honduras","Croatia","Haiti","Hungary","Indonesia","India","Ireland","Iran, Islamic Rep.","Iraq","Iceland","Israel","Italy","Jamaica","Jordan","Japan","Kazakhstan","Kenya","Kyrgyz Republic","Cambodia","Kiribati","Korea, Rep.","Kuwait","Lao PDR","Lebanon","Liberia","Libya","St. Lucia","Liechtenstein","Sri Lanka","Lesotho","Lithuania","Luxembourg","Latvia","Macao SAR, China","Morocco","Moldova","Madagascar","Maldives","Mexico","Macedonia, FYR","Mali","Malta","Myanmar","Montenegro","Mongolia","Mozambique","Mauritania","Mauritius","Malawi","Malaysia","Namibia","New Caledonia","Niger","Nigeria","Nicaragua","Netherlands","Norway","Nepal","New Zealand","Oman","Pakistan","Panama","Peru","Philippines","Papua New Guinea","Poland","Puerto Rico","Portugal","Paraguay","French Polynesia","Qatar","Romania","Russian Federation","Rwanda","Saudi Arabia","Sudan","Senegal","Singapore","Solomon Islands","Sierra Leone","El Salvador","Somalia","Serbia","South Sudan","Sao Tome and Principe","Suriname","Slovak Republic","Slovenia","Sweden","Swaziland","Seychelles","Syrian Arab Republic","Chad","Togo","Thailand","Tajikistan","Turkmenistan","Timor-Leste","Tonga","Trinidad and Tobago","Tunisia","Turkey","Tanzania","Uganda","Ukraine","Uruguay","United States","Uzbekistan","St. Vincent and the Grenadines","Venezuela, RB","Virgin Islands (U.S.)","Vietnam","Vanuatu","West Bank and Gaza","Samoa","Yemen, Rep.","South Africa","Congo, Dem. Rep.","Zambia","Zimbabwe")
Kode_2012_Dataset <- c("ABW","AFG","AGO","ALB","ARE","ARG","ARM","ATG","AUS","AUT","AZE","BDI","BEL","BEN","BFA","BGD","BGR","BHR","BHS","BIH","BLR","BLZ","BMU","BOL","BRA","BRB","BRN","BTN","BWA","CAF","CAN","CHE","CHL","CHN","CIV","CMR","COG","COL","COM","CPV","CRI","CUB","CYM","CYP","CZE","DEU","DJI","DNK","DOM","DZA","ECU","EGY","ERI","ESP","EST","ETH","FIN","FJI","FRA","FSM","GAB","GBR","GEO","GHA","GIN","GMB","GNB","GNQ","GRC","GRD","GRL","GTM","GUM","GUY","HKG","HND","HRV","HTI","HUN","IDN","IND","IRL","IRN","IRQ","ISL","ISR","ITA","JAM","JOR","JPN","KAZ","KEN","KGZ","KHM","KIR","KOR","KWT","LAO","LBN","LBR","LBY","LCA","LIE","LKA","LSO","LTU","LUX","LVA","MAC","MAR","MDA","MDG","MDV","MEX","MKD","MLI","MLT","MMR","MNE","MNG","MOZ","MRT","MUS","MWI","MYS","NAM","NCL","NER","NGA","NIC","NLD","NOR","NPL","NZL","OMN","PAK","PAN","PER","PHL","PNG","POL","PRI","PRT","PRY","PYF","QAT","ROU","RUS","RWA","SAU","SDN","SEN","SGP","SLB","SLE","SLV","SOM","SRB","SSD","STP","SUR","SVK","SVN","SWE","SWZ","SYC","SYR","TCD","TGO","THA","TJK","TKM","TLS","TON","TTO","TUN","TUR","TZA","UGA","UKR","URY","USA","UZB","VCT","VEN","VIR","VNM","VUT","PSE","WSM","YEM","ZAF","COD","ZMB","ZWE")
Wilayah_2012_Dataset <- c("The Americas","Asia","Africa","Europe","Middle East","The Americas","Asia","The Americas","Oceania","Europe","Asia","Africa","Europe","Africa","Africa","Asia","Europe","Middle East","The Americas","Europe","Europe","The Americas","The Americas","The Americas","The Americas","The Americas","Asia","Asia","Africa","Africa","The Americas","Europe","The Americas","Asia","Africa","Africa","Africa","The Americas","Africa","Africa","The Americas","The Americas","The Americas","Europe","Europe","Europe","Africa","Europe","The Americas","Africa","The Americas","Africa","Africa","Europe","Europe","Africa","Europe","Oceania","Europe","Oceania","Africa","Europe","Asia","Africa","Africa","Africa","Africa","Africa","Europe","The Americas","The Americas","The Americas","Oceania","The Americas","Asia","The Americas","Europe","The Americas","Europe","Asia","Asia","Europe","Middle East","Middle East","Europe","Middle East","Europe","The Americas","Middle East","Asia","Asia","Africa","Asia","Asia","Oceania","Asia","Middle East","Asia","Middle East","Africa","Africa","The Americas","Europe","Asia","Africa","Europe","Europe","Europe","Asia","Africa","Europe","Africa","Asia","The Americas","Europe","Africa","Europe","Asia","Europe","Asia","Africa","Africa","Africa","Africa","Asia","Africa","Oceania","Africa","Africa","The Americas","Europe","Europe","Asia","Oceania","Middle East","Asia","The Americas","The Americas","Asia","Oceania","Europe","The Americas","Europe","The Americas","Oceania","Middle East","Europe","Europe","Africa","Middle East","Africa","Africa","Asia","Oceania","Africa","The Americas","Africa","Europe","Africa","Africa","The Americas","Europe","Europe","Europe","Africa","Africa","Middle East","Africa","Africa","Asia","Asia","Asia","Asia","Oceania","The Americas","Africa","Europe","Africa","Africa","Europe","The Americas","The Americas","Asia","The Americas","The Americas","The Americas","Asia","Oceania","Middle East","Oceania","Middle East","Africa","Africa","Africa","Africa")


#------------------------Membuat Data Frame
myDF <- data.frame(Negara_2012_Dataset, Kode_2012_Dataset, Wilayah_2012_Dataset)
head(myDF)
##    Negara_2012_Dataset Kode_2012_Dataset Wilayah_2012_Dataset
## 1                Aruba               ABW         The Americas
## 2          Afghanistan               AFG                 Asia
## 3               Angola               AGO               Africa
## 4              Albania               ALB               Europe
## 5 United Arab Emirates               ARE          Middle East
## 6            Argentina               ARG         The Americas
#Mengganti nama kolom myDF
colnames(myDF) <- c("Country","Kode","Region")
head(myDF)
##                Country Kode       Region
## 1                Aruba  ABW The Americas
## 2          Afghanistan  AFG         Asia
## 3               Angola  AGO       Africa
## 4              Albania  ALB       Europe
## 5 United Arab Emirates  ARE  Middle East
## 6            Argentina  ARG The Americas
summary(myDF)
##    Country              Kode              Region         
##  Length:195         Length:195         Length:195        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character
#Gabungkan myDF menjadi data_demografi dengan Kode dan Kode.Negara
merge_data <- merge(data_demografi, myDF, by.x="Kode.Negara", by.y ="Kode")
#Dalam bingkai data di atas kami memiliki kolom duplikat yaitu negara, mari kita hapus
merge_data$Nama.Negara <- NULL
head(merge_data)
##   Kode.Negara Angka.Kelahiran Pengguna.Internet     Pendapatan.Grup
## 1         ABW          10.244              78.9         High income
## 2         AFG          35.253               5.9          Low income
## 3         AGO          45.985              19.1 Upper middle income
## 4         ALB          12.877              57.2 Upper middle income
## 5         ARE          11.044              88.0         High income
## 6         ARG          17.716              59.9         High income
##                Country       Region
## 1                Aruba The Americas
## 2          Afghanistan         Asia
## 3               Angola       Africa
## 4              Albania       Europe
## 5 United Arab Emirates  Middle East
## 6            Argentina The Americas
str(merge_data)
## 'data.frame':    195 obs. of  6 variables:
##  $ Kode.Negara      : chr  "ABW" "AFG" "AGO" "ALB" ...
##  $ Angka.Kelahiran  : num  10.2 35.3 46 12.9 11 ...
##  $ Pengguna.Internet: num  78.9 5.9 19.1 57.2 88 ...
##  $ Pendapatan.Grup  : chr  "High income" "Low income" "Upper middle income" "Upper middle income" ...
##  $ Country          : chr  "Aruba" "Afghanistan" "Angola" "Albania" ...
##  $ Region           : chr  "The Americas" "Asia" "Africa" "Europe" ...
#Scatter Plot antara Pengguna.Internet & Angka.Kelahiran dan dikategorikan berdasarkan Wilayah Negara
qplot(data=merge_data, x= Pengguna.Internet, y= Angka.Kelahiran, size=I(3), colour=Region)

#Mari kita ubah bentuk Scatter plot di atas
#1. Bentuk : Tersedia antara 0-25, untuk info lebih lanjut cek R-shape.png di google
qplot(data=merge_data, x= Pengguna.Internet, y= Angka.Kelahiran, size=I(3), colour=Region, shape = I(15))

#2. Transparansi
qplot(data=merge_data, x= Pengguna.Internet, y= Angka.Kelahiran, size=I(3), colour=Region, shape = I(19), alpha=10)