Skenario

Analisa Data Demografi

#Pertama baca data dari file csv
dt_demografi <- read.csv("P2-Demographic-Data.csv")
head(dt_demografi, n=10)
##            Country.Name Country.Code Birth.rate Internet.users
## 1                 Aruba          ABW     10.244        78.9000
## 2           Afghanistan          AFG     35.253         5.9000
## 3                Angola          AGO     45.985        19.1000
## 4               Albania          ALB     12.877        57.2000
## 5  United Arab Emirates          ARE     11.044        88.0000
## 6             Argentina          ARG     17.716        59.9000
## 7               Armenia          ARM     13.308        41.9000
## 8   Antigua and Barbuda          ATG     16.447        63.4000
## 9             Australia          AUS     13.200        83.0000
## 10              Austria          AUT      9.400        80.6188
##           Income.Group
## 1          High income
## 2           Low income
## 3  Upper middle income
## 4  Upper middle income
## 5          High income
## 6          High income
## 7  Lower middle income
## 8          High income
## 9          High income
## 10         High income
#menampilkan ringkasan dari data demografi
summary(dt_demografi)
##  Country.Name       Country.Code         Birth.rate    Internet.users 
##  Length:195         Length:195         Min.   : 7.90   Min.   : 0.90  
##  Class :character   Class :character   1st Qu.:12.12   1st Qu.:14.52  
##  Mode  :character   Mode  :character   Median :19.68   Median :41.00  
##                                        Mean   :21.47   Mean   :42.08  
##                                        3rd Qu.:29.76   3rd Qu.:66.22  
##                                        Max.   :49.66   Max.   :96.55  
##  Income.Group      
##  Length:195        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
#Menampilkan data dengan Birth Rate lebih dari 2
filter_br <- dt_demografi$Birth.rate > 2
head(dt_demografi[filter_br,], n=10)
##            Country.Name Country.Code Birth.rate Internet.users
## 1                 Aruba          ABW     10.244        78.9000
## 2           Afghanistan          AFG     35.253         5.9000
## 3                Angola          AGO     45.985        19.1000
## 4               Albania          ALB     12.877        57.2000
## 5  United Arab Emirates          ARE     11.044        88.0000
## 6             Argentina          ARG     17.716        59.9000
## 7               Armenia          ARM     13.308        41.9000
## 8   Antigua and Barbuda          ATG     16.447        63.4000
## 9             Australia          AUS     13.200        83.0000
## 10              Austria          AUT      9.400        80.6188
##           Income.Group
## 1          High income
## 2           Low income
## 3  Upper middle income
## 4  Upper middle income
## 5          High income
## 6          High income
## 7  Lower middle income
## 8          High income
## 9          High income
## 10         High income
#Ambil data yang Angka Kelahirannya > 40
head(dt_demografi[dt_demografi$Birth.rate > 40,], n=10)  
##     Country.Name Country.Code Birth.rate Internet.users        Income.Group
## 3         Angola          AGO     45.985           19.1 Upper middle income
## 12       Burundi          BDI     44.151            1.3          Low income
## 15  Burkina Faso          BFA     40.551            9.1          Low income
## 66   Gambia, The          GMB     42.525           14.0          Low income
## 116         Mali          MLI     44.138            3.5          Low income
## 128        Niger          NER     49.661            1.7          Low income
## 129      Nigeria          NGA     40.045           38.0 Lower middle income
## 157      Somalia          SOM     43.891            1.5          Low income
## 168         Chad          TCD     45.745            2.3          Low income
## 179       Uganda          UGA     43.474           16.2          Low income
# Di sini demografi_data$Birth.rate > 40 adalah vektor yang berisi nilai BENAR seperti di bawah ini
#Ambil data yang Tingkat Kelahirannya> 40 dan pengguna Internet <2
head(dt_demografi[dt_demografi$Birth.rate > 40 & dt_demografi$Internet.users <2,])
##     Country.Name Country.Code Birth.rate Internet.users Income.Group
## 12       Burundi          BDI     44.151            1.3   Low income
## 128        Niger          NER     49.661            1.7   Low income
## 157      Somalia          SOM     43.891            1.5   Low income
#Ambil data Income group yang High Income, disini IncomeGroup berisi Data Kategori
head(dt_demografi[dt_demografi$Income.Group == "High income",], n=10)
##            Country.Name Country.Code Birth.rate Internet.users Income.Group
## 1                 Aruba          ABW     10.244       78.90000  High income
## 5  United Arab Emirates          ARE     11.044       88.00000  High income
## 6             Argentina          ARG     17.716       59.90000  High income
## 8   Antigua and Barbuda          ATG     16.447       63.40000  High income
## 9             Australia          AUS     13.200       83.00000  High income
## 10              Austria          AUT      9.400       80.61880  High income
## 13              Belgium          BEL     11.200       82.17020  High income
## 18              Bahrain          BHR     15.040       90.00004  High income
## 19         Bahamas, The          BHS     15.339       72.00000  High income
## 23              Bermuda          BMU     10.400       95.30000  High income
#Ambil detail Negara Argentina
head(dt_demografi[dt_demografi$Country.Name == "Argentina",])
##   Country.Name Country.Code Birth.rate Internet.users Income.Group
## 6    Argentina          ARG     17.716           59.9  High income

Plotting Data Demografi

#load ggplot2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
#menampilkan scatterplot yang menggambarkan Angka Kelahiran (birth.rate) dan Statistik Penggunaan Internet (Internet.users)
qplot(data = dt_demografi, x=Internet.users, y = Birth.rate, size=I(4)) 

#memberikan warna pada node plot
qplot(data = dt_demografi, x=Internet.users, y = Birth.rate, size=I(4), colour=I("blue"))

#Scatterplot juga perlu dikategorikan berdasarkan Kelompok Pendapatan Negara (Income.Group).
qplot(data = dt_demografi, x=Internet.users, y = Birth.rate, size=I(3), colour=Income.Group)

# Namun, kali ini scatterplot perlu dikategorikan berdasarkan Wilayah Negara.
# Data tambahan telah disediakan dalam bentuk vektor R.

Countries_2012_Dataset <- c("Aruba","Afghanistan","Angola","Albania","United Arab Emirates","Argentina","Armenia","Antigua and Barbuda","Australia","Austria","Azerbaijan","Burundi","Belgium","Benin","Burkina Faso","Bangladesh","Bulgaria","Bahrain","Bahamas, The","Bosnia and Herzegovina","Belarus","Belize","Bermuda","Bolivia","Brazil","Barbados","Brunei Darussalam","Bhutan","Botswana","Central African Republic","Canada","Switzerland","Chile","China","Cote d'Ivoire","Cameroon","Congo, Rep.","Colombia","Comoros","Cabo Verde","Costa Rica","Cuba","Cayman Islands","Cyprus","Czech Republic","Germany","Djibouti","Denmark","Dominican Republic","Algeria","Ecuador","Egypt, Arab Rep.","Eritrea","Spain","Estonia","Ethiopia","Finland","Fiji","France","Micronesia, Fed. Sts.","Gabon","United Kingdom","Georgia","Ghana","Guinea","Gambia, The","Guinea-Bissau","Equatorial Guinea","Greece","Grenada","Greenland","Guatemala","Guam","Guyana","Hong Kong SAR, China","Honduras","Croatia","Haiti","Hungary","Indonesia","India","Ireland","Iran, Islamic Rep.","Iraq","Iceland","Israel","Italy","Jamaica","Jordan","Japan","Kazakhstan","Kenya","Kyrgyz Republic","Cambodia","Kiribati","Korea, Rep.","Kuwait","Lao PDR","Lebanon","Liberia","Libya","St. Lucia","Liechtenstein","Sri Lanka","Lesotho","Lithuania","Luxembourg","Latvia","Macao SAR, China","Morocco","Moldova","Madagascar","Maldives","Mexico","Macedonia, FYR","Mali","Malta","Myanmar","Montenegro","Mongolia","Mozambique","Mauritania","Mauritius","Malawi","Malaysia","Namibia","New Caledonia","Niger","Nigeria","Nicaragua","Netherlands","Norway","Nepal","New Zealand","Oman","Pakistan","Panama","Peru","Philippines","Papua New Guinea","Poland","Puerto Rico","Portugal","Paraguay","French Polynesia","Qatar","Romania","Russian Federation","Rwanda","Saudi Arabia","Sudan","Senegal","Singapore","Solomon Islands","Sierra Leone","El Salvador","Somalia","Serbia","South Sudan","Sao Tome and Principe","Suriname","Slovak Republic","Slovenia","Sweden","Swaziland","Seychelles","Syrian Arab Republic","Chad","Togo","Thailand","Tajikistan","Turkmenistan","Timor-Leste","Tonga","Trinidad and Tobago","Tunisia","Turkey","Tanzania","Uganda","Ukraine","Uruguay","United States","Uzbekistan","St. Vincent and the Grenadines","Venezuela, RB","Virgin Islands (U.S.)","Vietnam","Vanuatu","West Bank and Gaza","Samoa","Yemen, Rep.","South Africa","Congo, Dem. Rep.","Zambia","Zimbabwe")
Codes_2012_Dataset <- c("ABW","AFG","AGO","ALB","ARE","ARG","ARM","ATG","AUS","AUT","AZE","BDI","BEL","BEN","BFA","BGD","BGR","BHR","BHS","BIH","BLR","BLZ","BMU","BOL","BRA","BRB","BRN","BTN","BWA","CAF","CAN","CHE","CHL","CHN","CIV","CMR","COG","COL","COM","CPV","CRI","CUB","CYM","CYP","CZE","DEU","DJI","DNK","DOM","DZA","ECU","EGY","ERI","ESP","EST","ETH","FIN","FJI","FRA","FSM","GAB","GBR","GEO","GHA","GIN","GMB","GNB","GNQ","GRC","GRD","GRL","GTM","GUM","GUY","HKG","HND","HRV","HTI","HUN","IDN","IND","IRL","IRN","IRQ","ISL","ISR","ITA","JAM","JOR","JPN","KAZ","KEN","KGZ","KHM","KIR","KOR","KWT","LAO","LBN","LBR","LBY","LCA","LIE","LKA","LSO","LTU","LUX","LVA","MAC","MAR","MDA","MDG","MDV","MEX","MKD","MLI","MLT","MMR","MNE","MNG","MOZ","MRT","MUS","MWI","MYS","NAM","NCL","NER","NGA","NIC","NLD","NOR","NPL","NZL","OMN","PAK","PAN","PER","PHL","PNG","POL","PRI","PRT","PRY","PYF","QAT","ROU","RUS","RWA","SAU","SDN","SEN","SGP","SLB","SLE","SLV","SOM","SRB","SSD","STP","SUR","SVK","SVN","SWE","SWZ","SYC","SYR","TCD","TGO","THA","TJK","TKM","TLS","TON","TTO","TUN","TUR","TZA","UGA","UKR","URY","USA","UZB","VCT","VEN","VIR","VNM","VUT","PSE","WSM","YEM","ZAF","COD","ZMB","ZWE")
Regions_2012_Dataset <- c("The Americas","Asia","Africa","Europe","Middle East","The Americas","Asia","The Americas","Oceania","Europe","Asia","Africa","Europe","Africa","Africa","Asia","Europe","Middle East","The Americas","Europe","Europe","The Americas","The Americas","The Americas","The Americas","The Americas","Asia","Asia","Africa","Africa","The Americas","Europe","The Americas","Asia","Africa","Africa","Africa","The Americas","Africa","Africa","The Americas","The Americas","The Americas","Europe","Europe","Europe","Africa","Europe","The Americas","Africa","The Americas","Africa","Africa","Europe","Europe","Africa","Europe","Oceania","Europe","Oceania","Africa","Europe","Asia","Africa","Africa","Africa","Africa","Africa","Europe","The Americas","The Americas","The Americas","Oceania","The Americas","Asia","The Americas","Europe","The Americas","Europe","Asia","Asia","Europe","Middle East","Middle East","Europe","Middle East","Europe","The Americas","Middle East","Asia","Asia","Africa","Asia","Asia","Oceania","Asia","Middle East","Asia","Middle East","Africa","Africa","The Americas","Europe","Asia","Africa","Europe","Europe","Europe","Asia","Africa","Europe","Africa","Asia","The Americas","Europe","Africa","Europe","Asia","Europe","Asia","Africa","Africa","Africa","Africa","Asia","Africa","Oceania","Africa","Africa","The Americas","Europe","Europe","Asia","Oceania","Middle East","Asia","The Americas","The Americas","Asia","Oceania","Europe","The Americas","Europe","The Americas","Oceania","Middle East","Europe","Europe","Africa","Middle East","Africa","Africa","Asia","Oceania","Africa","The Americas","Africa","Europe","Africa","Africa","The Americas","Europe","Europe","Europe","Africa","Africa","Middle East","Africa","Africa","Asia","Asia","Asia","Asia","Oceania","The Americas","Africa","Europe","Africa","Africa","Europe","The Americas","The Americas","Asia","The Americas","The Americas","The Americas","Asia","Oceania","Middle East","Oceania","Middle East","Africa","Africa","Africa","Africa")
#membuat data frame
dt_frame <- data.frame(Countries_2012_Dataset, Codes_2012_Dataset, Regions_2012_Dataset)
head(dt_frame, n=10)
##    Countries_2012_Dataset Codes_2012_Dataset Regions_2012_Dataset
## 1                   Aruba                ABW         The Americas
## 2             Afghanistan                AFG                 Asia
## 3                  Angola                AGO               Africa
## 4                 Albania                ALB               Europe
## 5    United Arab Emirates                ARE          Middle East
## 6               Argentina                ARG         The Americas
## 7                 Armenia                ARM                 Asia
## 8     Antigua and Barbuda                ATG         The Americas
## 9               Australia                AUS              Oceania
## 10                Austria                AUT               Europe
#Mengganti nama nama kolom data frame
colnames(dt_frame) <- c("Country","Codes","Region")
head(dt_frame, n=10)
##                 Country Codes       Region
## 1                 Aruba   ABW The Americas
## 2           Afghanistan   AFG         Asia
## 3                Angola   AGO       Africa
## 4               Albania   ALB       Europe
## 5  United Arab Emirates   ARE  Middle East
## 6             Argentina   ARG The Americas
## 7               Armenia   ARM         Asia
## 8   Antigua and Barbuda   ATG The Americas
## 9             Australia   AUS      Oceania
## 10              Austria   AUT       Europe
summary(dt_frame)
##    Country             Codes              Region         
##  Length:195         Length:195         Length:195        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character
#Gabungkan myDF dengan demografi_data berdasarkan Codes dan Country.Code
merge_data <- merge(dt_demografi, dt_frame, by.x="Country.Code", by.y ="Codes")
#Pada data frame terdapat kolom negara yang sama, maka akan dihapus
merge_data$Country.Name <- NULL
head(merge_data, n=10)
##    Country.Code Birth.rate Internet.users        Income.Group
## 1           ABW     10.244        78.9000         High income
## 2           AFG     35.253         5.9000          Low income
## 3           AGO     45.985        19.1000 Upper middle income
## 4           ALB     12.877        57.2000 Upper middle income
## 5           ARE     11.044        88.0000         High income
## 6           ARG     17.716        59.9000         High income
## 7           ARM     13.308        41.9000 Lower middle income
## 8           ATG     16.447        63.4000         High income
## 9           AUS     13.200        83.0000         High income
## 10          AUT      9.400        80.6188         High income
##                 Country       Region
## 1                 Aruba The Americas
## 2           Afghanistan         Asia
## 3                Angola       Africa
## 4               Albania       Europe
## 5  United Arab Emirates  Middle East
## 6             Argentina The Americas
## 7               Armenia         Asia
## 8   Antigua and Barbuda The Americas
## 9             Australia      Oceania
## 10              Austria       Europe
str(merge_data)
## 'data.frame':    195 obs. of  6 variables:
##  $ Country.Code  : chr  "ABW" "AFG" "AGO" "ALB" ...
##  $ Birth.rate    : num  10.2 35.3 46 12.9 11 ...
##  $ Internet.users: num  78.9 5.9 19.1 57.2 88 ...
##  $ Income.Group  : chr  "High income" "Low income" "Upper middle income" "Upper middle income" ...
##  $ Country       : chr  "Aruba" "Afghanistan" "Angola" "Albania" ...
##  $ Region        : chr  "The Americas" "Asia" "Africa" "Europe" ...
#Scatter Plot antara Internet.users & Birth.rate dan dikategorikan berdasarkan Wilayah Negara
qplot(data=merge_data, x= Internet.users, y= Birth.rate, size=I(3), colour=Region)

#Mengubah bentuk Scatter plot menjadi persegi
qplot(data=merge_data, x= Internet.users, y= Birth.rate, size=I(3), colour=Region, shape = I(15))

#Mengubah bentuk Scatter plot menjadi transparan
qplot(data=merge_data, x= Internet.users, y= Birth.rate, size=I(3), colour=Region, shape = I(19), alpha=10)