The Indonesia Database for Policy and Economic Research (INDO-DAPOER) contains relevant economic and social indicators at the province- and district-level, which span across four main categories: fiscal, economic, social and demographic, as well as infrastructure. This report contains exploration, such as:
library(readxl)
datadapoer <- read_excel("D:/S2 Statistika/Ekplorasi dan Visualisasi/datadapoer.xlsx")
head(datadapoer)
## # A tibble: 6 × 14
## Provinces Na…¹ Provi…² Serie…³ Serie…⁴ 2011 …⁵ 2012 …⁶ 2013 …⁷ 2014 …⁸ 2015 …⁹
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Bali, Prop. ID.BA Agricu… FC.XPD… 326780… 149132… .. .. ..
## 2 Bali, Prop. ID.BA Averag… SE.NEX… .. .. .. .. ..
## 3 Bali, Prop. ID.BA Averag… SE.NEX… .. .. .. .. ..
## 4 Bali, Prop. ID.BA Averag… SE.NEX… .. .. .. .. ..
## 5 Bali, Prop. ID.BA Birth … SH.STA… 96.457 96.9896 98.494… 98.354… 99.809…
## 6 Bali, Prop. ID.BA BPK Au… BPK.AU… 2 2 5 1 1
## # … with 5 more variables: `2016 [YR2016]` <chr>, `2017 [YR2017]` <chr>,
## # `2018 [YR2018]` <chr>, `2019 [YR2019]` <chr>, `2020 [YR2020]` <chr>, and
## # abbreviated variable names ¹`Provinces Name`, ²`Provinces Code`,
## # ³`Series Name`, ⁴`Series Code`, ⁵`2011 [YR2011]`, ⁶`2012 [YR2012]`,
## # ⁷`2013 [YR2013]`, ⁸`2014 [YR2014]`, ⁹`2015 [YR2015]`
library(tidyverse)
names(datadapoer) <- names(datadapoer) %>% make.names()
head(datadapoer)
## # A tibble: 6 × 14
## Provinces.Name Provi…¹ Serie…² Serie…³ X2011…⁴ X2012…⁵ X2013…⁶ X2014…⁷ X2015…⁸
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Bali, Prop. ID.BA Agricu… FC.XPD… 326780… 149132… .. .. ..
## 2 Bali, Prop. ID.BA Averag… SE.NEX… .. .. .. .. ..
## 3 Bali, Prop. ID.BA Averag… SE.NEX… .. .. .. .. ..
## 4 Bali, Prop. ID.BA Averag… SE.NEX… .. .. .. .. ..
## 5 Bali, Prop. ID.BA Birth … SH.STA… 96.457 96.9896 98.494… 98.354… 99.809…
## 6 Bali, Prop. ID.BA BPK Au… BPK.AU… 2 2 5 1 1
## # … with 5 more variables: X2016..YR2016. <chr>, X2017..YR2017. <chr>,
## # X2018..YR2018. <chr>, X2019..YR2019. <chr>, X2020..YR2020. <chr>, and
## # abbreviated variable names ¹Provinces.Code, ²Series.Name, ³Series.Code,
## # ⁴X2011..YR2011., ⁵X2012..YR2012., ⁶X2013..YR2013., ⁷X2014..YR2014.,
## # ⁸X2015..YR2015.
gos <- datadapoer %>% filter(Series.Name=="Goods and services expenditure (in IDR)") %>%
select(c(Provinces.Name,5:14))
colnames(gos) <- c('Provinces', 2011:2020)
library(data.table)
gos <- melt(setDT(gos), id.vars = c("Provinces"), variable.name = "Year")
gos$value <- as.numeric(gos$value)/(10^12)
gos2 <- gos
head(gos)
## Provinces Year value
## 1: Bali, Prop. 2011 0.5093046
## 2: Banten, Prop. 2011 0.9726174
## 3: Bengkulu, Prop. 2011 0.2375907
## 4: DI Yogyakarta, Prop. 2011 0.3743235
## 5: DKI Jakarta, Prop. 2011 8.3418312
## 6: Gorontalo, Prop. 2011 0.2254455
elec <- datadapoer %>% filter(Series.Name=="Household Access to Electricity: Total (in % of total household)") %>%
select(c(Provinces.Name,5:14))
colnames(elec) <- c('Provinces', 2011:2020)
library(data.table)
elec <- melt(setDT(elec), id.vars = c("Provinces"), variable.name = "Year")
elec$value <- as.numeric(elec$value)
head(elec)
## Provinces Year value
## 1: Bali, Prop. 2011 98.85030
## 2: Banten, Prop. 2011 99.27480
## 3: Bengkulu, Prop. 2011 87.69487
## 4: DI Yogyakarta, Prop. 2011 99.54836
## 5: DKI Jakarta, Prop. 2011 99.94611
## 6: Gorontalo, Prop. 2011 80.87733
house <- datadapoer %>% filter(Series.Name=="Household per capita expenditure (in IDR)") %>%
select(c(Provinces.Name,5:14))
colnames(house) <- c('Provinces', 2011:2020)
library(data.table)
house <- melt(setDT(house), id.vars = c("Provinces"), variable.name = "Year")
house$value <- as.numeric(house$value)
head(house)
## Provinces Year value
## 1: Bali, Prop. 2011 760449
## 2: Banten, Prop. 2011 694545
## 3: Bengkulu, Prop. 2011 563066
## 4: DI Yogyakarta, Prop. 2011 649901
## 5: DKI Jakarta, Prop. 2011 1223955
## 6: Gorontalo, Prop. 2011 480449
tourism <- datadapoer %>% filter(Series.Name=="Tourism and culture function expenditure (in IDR)") %>%
select(c(Provinces.Name,5:14))
colnames(tourism) <- c('Provinces', 2011:2020)
library(data.table)
tourism <- melt(setDT(tourism), id.vars = c("Provinces"), variable.name = "Year")
tourism$value <- as.numeric(tourism$value)
target <- c("Bali, Prop.","DI Yogyakarta, Prop.", 'Jawa Timur, Prop.', "Nusa Tenggara Timur, Prop.")
tourism <- tourism %>% filter(Provinces%in% target) %>% filter(Year==2020)
head(tourism)
## Provinces Year value
## 1: Bali, Prop. 2020 58659967293
## 2: DI Yogyakarta, Prop. 2020 313198463331
## 3: Jawa Timur, Prop. 2020 90073235455
## 4: Nusa Tenggara Timur, Prop. 2020 25083710468
library(ggplot2)
library(hrbrthemes)
target <- c(2019,2020)
gos %>% filter(Year %in% target) %>%
ggplot( aes(x=value, fill=Year)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080")) +
theme_ipsum() +
ggtitle("Goods and services expenditure \n(in trillion IDR)") + ylab("GOS") + theme(plot.title = element_text(color="black", size=12, face="bold.italic"))
library(viridis)
target <- c(2017:2020)
elec %>% filter(Year %in% target) %>%
ggplot( aes(x=Year, y=value, fill=Year)) +
geom_boxplot() +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="A") +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Household Access to Electricity: Total (in % of total household)") +
ylab("elec")
library(ggridges)
target <- c(2011,2020)
house %>% filter(Year %in% target) %>%
ggplot(aes(x = value, y = Year, fill=Year)) +
geom_density_ridges() +
theme_ridges() +
theme_ipsum() +
theme(legend.position = "none")+theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison Household per capita expenditure (in IDR) 2011 & 2020")
library(RColorBrewer)
myPalette <- brewer.pal(4, "Set2")
pie(tourism$value, labels = c("Bali","DI Yogyakarta", 'Jawa Timur', "Nusa Tenggara Timur"), border="white", col=myPalette,
main="Tourism and culture function \nexpenditure (Share) 2020")
# library
elec <- elec %>% filter(Year == 2020)
house <- house %>% filter(Year == 2020)
scat <- cbind(elec$value, house$value)
colnames(scat) <- c("elec","house")
scat <- as.data.frame(scat)
library(ggplot2)
ggplot(scat, aes(x=house, y=elec)) +
geom_point(
color="orange",
fill="#69b3a2",
shape=21,
alpha=0.5,
size=6,
stroke = 2) +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Relationship between Household Access to Electricity(elec) \nand Household per capita expenditure (house)")
elec <- elec %>% filter(Year == 2020)
house <- house %>% filter(Year == 2020)
gos <- gos %>% filter(Year == 2020)
korelasi <- cbind(elec$value, house$value, gos$value)
colnames(korelasi) <- c("elec","house","gos")
korelasi <- as.data.frame(korelasi)
library(corrgram)
corrgram(korelasi, order=NULL, lower.panel=panel.shade, upper.panel=NULL, text.panel=panel.txt, main="Correlation Matrix")
library(PerformanceAnalytics)
chart.Correlation(korelasi, histogram = TRUE, pch= 19)
gos2 <- gos2 %>% filter(Year == 2020)
korelasi <- cbind(korelasi, gos2$Provinces)
tmp <- korelasi %>%
dplyr::mutate(
annotation = case_when(
house > 1800000 & elec > 99 ~ "yes",
house < 1000000 ~ "yes",
elec < 80 ~ "yes"
)
) %>% mutate(provinces = factor(gos2$Provinces, gos2$Provinces))
library(gridExtra)
library(ggrepel)
library(plotly)
tmp %>%
ggplot(aes(x=house, y=elec, size = gos,color=provinces )) +
geom_point(alpha=0.7) +
scale_size(range = c(1.4, 19), name="GOS") +
scale_color_viridis(discrete=TRUE, guide=FALSE) +
theme_ipsum() +
theme(legend.position="none") +
geom_text_repel(data=tmp %>% filter(annotation=="yes"), aes(label=provinces), size=4 )
gos3 <- datadapoer %>% filter(Series.Name=="Goods and services expenditure (in IDR)") %>% select(c(Provinces.Name,5:14))
colnames(gos3) <- c('Provinces', 2011:2020)
gos3 <- melt(setDT(gos3), id.vars = c("Provinces"), variable.name = "Year")
gos3$value <- as.numeric(gos3$value)/(10^9)
gos3 <- gos3 %>% filter(Provinces=="Bali, Prop.")
gos3$index <- 1:nrow(gos3) # create index variable
loessMod25 <- loess(value ~ index, data=gos3, span=0.25) # 25% smoothing span
loessMod50 <- loess(value ~ index, data=gos3, span=0.50) # 50% smoothing span
loessMod75 <- loess(value ~ index, data=gos3, span=0.75) # 75% smoothing span
smoothed25 <- predict(loessMod25)
smoothed50 <- predict(loessMod50)
smoothed75 <- predict(loessMod75)
plot(gos3$value, x=gos3$Year, type="l", main="Bali, Prop. Goods and services expenditure (in billion IDR)", xlab="Year", ylab="GOS")
lines(smoothed25, x=gos3$Year, col="red")
lines(smoothed50, x=gos3$Year, col="green")
lines(smoothed75, x=gos3$Year, col="blue")
legend('bottomright', legend=c('.25', '.50', '.75'),
col=c('red', 'green', 'blue'), pch=19, title='Smoothing Span')