Membaca data dari file berupa JSON dengan R.
Kemampuan R dalam mengolah data tentunya sudah tidak diragukan lagi. Banyak sekali source data yang dapat diintegrasikan dengan R, baik berupa file data maupun database.
R juga bisa membaca file berupa pdf dan gambar sebagai input data, tentunya dengan memanfaatkan berbagai package yang tersedia. Salah satu format data yang mampu dibaca oleh R yang akan dibahas pada tulisan ini adalah file JSON.
JSON adalah singkatan dari JavaScript Object Notation. Ya, JSON adalah sebuah format penyimpanan data dengan berdasarkan JavaScript. File JSON mirip seperti data pada text file biasa, hanya saja dengan berbagai aturan dan format yang sesuai dengan sintaksis pada JavaScript.
Pada JSON, nilai yang ada harus berupa salah satu dari tipe data berikut.
{"name": "John"}
{"age": 38}
{
"employee": {"name": "John", "age": 30, "city": "New York"}
}
{
"employees": [ "John", "Anna", "Peter" ]
}
{"male": true}
{"midname": null}
Bagaimana dengan data berupa tanggal? JSON menyimpannya sebagai teks, kecuali untuk tanggal yang semuanya numerik. Misalnya 20190711 dan tanpa tanda kutip (") akan dibaca sebagai numerik.
Kita dapat dengan mudah membaca/import data dari file JSON dengan menggunakan package jsonlite.
library(jsonlite)
json1 <- '{
"ID": [1,2,3,4,5],
"Name": ["Rick","Dan","Michelle","Ryan","Gary"],
"Salary": [623.3,515.2,611,729,843.25],
"StartDate": ["1/1/2012","9/23/2013","11/15/2014","5/11/2014","3/27/2015"],
"Dept": ["IT","Operations","IT","HR","Finance"]
}'
x1 <- fromJSON(json1)
str(x1)
List of 5
$ ID : int [1:5] 1 2 3 4 5
$ Name : chr [1:5] "Rick" "Dan" "Michelle" "Ryan" ...
$ Salary : num [1:5] 623 515 611 729 843
$ StartDate: chr [1:5] "1/1/2012" "9/23/2013" "11/15/2014" "5/11/2014" ...
$ Dept : chr [1:5] "IT" "Operations" "IT" "HR" ...
Jadikan data.frame!
x1 <- as.data.frame(x1, stringsAsFactors = FALSE)
str(x1)
'data.frame': 5 obs. of 5 variables:
$ ID : int 1 2 3 4 5
$ Name : chr "Rick" "Dan" "Michelle" "Ryan" ...
$ Salary : num 623 515 611 729 843
$ StartDate: chr "1/1/2012" "9/23/2013" "11/15/2014" "5/11/2014" ...
$ Dept : chr "IT" "Operations" "IT" "HR" ...
json2 <- '[
[1, "Rick", 623.3, "1/1/2012", "IT"],
[2, "Dan", 515.2, "9/23/2013", "Operations"],
[3, "Michelle", 611, "11/15/2014", "IT"],
[4, "Ryan", 729, "5/11/2014", "HR"],
[5, "Gary", 843.25, "3/27/2015", "Finance"]
]'
x2 <- fromJSON(json2)
str(x2)
chr [1:5, 1:5] "1" "2" "3" "4" "5" "Rick" "Dan" "Michelle" "Ryan" "Gary" ...
x2 <- as.data.frame(x2, stringsAsFactors = F)
str(x2)
'data.frame': 5 obs. of 5 variables:
$ V1: chr "1" "2" "3" "4" ...
$ V2: chr "Rick" "Dan" "Michelle" "Ryan" ...
$ V3: chr "623.3" "515.2" "611" "729" ...
$ V4: chr "1/1/2012" "9/23/2013" "11/15/2014" "5/11/2014" ...
$ V5: chr "IT" "Operations" "IT" "HR" ...
json3 <- '[
{
"ID":1, "Name":"Rick", "Salary":623.3, "StartDate":"1/1/2012", "Dept":"IT"
},
{
"ID":2, "Name":"Dan", "Salary":515.2, "StartDate":"9/23/2013", "Dept":"Operations"
},
{
"ID":3, "Name":"Michelle", "Salary":611, "StartDate":"11/15/2014", "Dept":"IT"
},
{
"ID":4, "Name":"Ryan", "Salary":729, "StartDate":"5/11/2014", "Dept":"HR"
},
{
"ID":5, "Name":"Gary", "Salary":843.25, "StartDate":"3/27/2015", "Dept":"Finance"
}
]'
x3 <- fromJSON(json3)
str(x3)
'data.frame': 5 obs. of 5 variables:
$ ID : int 1 2 3 4 5
$ Name : chr "Rick" "Dan" "Michelle" "Ryan" ...
$ Salary : num 623 515 611 729 843
$ StartDate: chr "1/1/2012" "9/23/2013" "11/15/2014" "5/11/2014" ...
$ Dept : chr "IT" "Operations" "IT" "HR" ...
# Read JSON data from website: infopemilu.kpu.go.id
dps2019 <- fromJSON("https://pemilu2019.kpu.go.id/static/json/hhcw/ppwp.json")
# View(dps2019)
head(dps2019$table)
$`1`
$`1`$`21`
[1] 399066
$`1`$`22`
[1] 2359989
$`1`$persen
[1] 98.64242
$`-99`
$`-99`$`21`
[1] 577637
$`-99`$`22`
[1] 223575
$`-99`$persen
[1] 98.64652
$`6728`
$`6728`$`21`
[1] 3917069
$`6728`$`22`
[1] 3577472
$`6728`$persen
[1] 99.86643
$`12920`
$`12920`$`21`
[1] 407638
$`12920`$`22`
[1] 2485265
$`12920`$persen
[1] 100
$`14086`
$`14086`$`21`
[1] 1246888
$`14086`$`22`
[1] 1973298
$`14086`$persen
[1] 100
$`15885`
$`15885`$`21`
[1] 858738
$`15885`$`22`
[1] 1200255
$`15885`$persen
[1] 100
dps2019 <- do.call(rbind.data.frame, dps2019$table)
str(dps2019)
'data.frame': 35 obs. of 3 variables:
$ X21 : int 399066 577637 3917069 407638 1246888 858738 1915685 582845 2845798 495500 ...
$ X22 : int 2359989 223575 3577472 2485265 1973298 1200255 2830633 585598 1951645 288097 ...
$ persen: num 98.6 98.6 99.9 100 100 ...
head(dps2019)
write_json(dps2019, "dps2019.json", pretty = TRUE, dataframes = "rows")
# Read JSON data from local .json file
dps2018 <- fromJSON('dps2018.json', simplifyDataFrame = TRUE)
str(dps2018)
'data.frame': 10 obs. of 20 variables:
$ namaPropinsi : chr "JAWA BARAT" "JAWA BARAT" "JAWA BARAT" "JAWA BARAT" ...
$ namaKabKota : chr "BOGOR" "BOGOR" "BOGOR" "BOGOR" ...
$ namaKecamatan : chr "DRAMAGA" "DRAMAGA" "DRAMAGA" "DRAMAGA" ...
$ namaKelurahan : chr "BABAKAN" "CIHERANG" "CIKARAWANG" "DRAMAGA" ...
$ jmlTps : int 14 26 15 23 14 22 11 12 12 12
$ jmlPemilihLaki : int 2546 5333 3350 4522 3371 4972 2570 3293 3070 2933
$ jmlPemilihPerempuan : int 2582 5195 3171 4328 3202 4590 2514 3135 2826 2745
$ jmlPemilihKosong : int 0 0 0 0 0 0 0 0 0 0
$ totalPemilih : int 5128 10528 6521 8850 6573 9562 5084 6428 5896 5678
$ jmlPemilihPemulaLaki : int 5 16 105 38 7 9 6 18 8 3
$ jmlPemilihPemulaPerempuan: int 1 11 90 53 10 5 6 22 13 4
$ totalPemilihPemula : int 6 27 195 91 17 14 12 40 21 7
$ persenPemilihPemula : num 0.12 0.26 2.99 1.03 0.26 0.15 0.24 0.62 0.36 0.12
$ jmlDifabel1 : int 0 2 0 3 0 0 1 2 3 3
$ jmlDifabel2 : int 0 0 0 0 0 0 0 1 4 0
$ jmlDifabel3 : int 1 0 0 6 0 0 0 0 2 1
$ jmlDifabel4 : int 0 0 0 1 0 0 1 0 5 0
$ jmlDifabel5 : int 0 2 0 0 0 1 5 0 4 1
$ totalDifabel : int 1 4 0 10 0 1 7 3 18 5
$ persenDifabel : num 0.02 0.04 0 0.11 0 0.01 0.14 0.05 0.31 0.09
head(dps2018)
library(dplyr)
# How many 'Kelurahan' in Dramaga?
dps2018 %>%
distinct(namaKelurahan) %>%
count()
# What 5 'Kelurahan' have the largest number of TPS?
dps2018 %>%
arrange(desc(jmlTps)) %>%
select(namaKelurahan, jmlTps) %>%
head(n = 5)
NA
library(ggplot2)
dps2018 %>%
ggplot(aes(x = reorder(namaKelurahan, jmlTps), y = jmlTps)) +
geom_bar(stat = "identity") +
geom_text(aes(label = jmlTps)) +
labs(title = "Number of TPS by Kelurahan",
y = "Number of TPS",
x = "Kelurahan") +
coord_flip() +
theme_minimal()