Read Data

library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2
library(sf)
## Warning: package 'sf' was built under R version 4.3.2
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(ggplot2)
library(reshape2)

data <- read_xlsx("D:\\Campss\\Season 4\\AED\\3\\Data Tugas 1 AED.xlsx")
## New names:
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
View(data)

data1  <- data[-(1:2),]
data1 <- data1[-35,]
data1 <- as.data.frame(lapply(data1, function(x) gsub("\\,", ".", x)))

data1$Provinsi <- lapply(data1$Provinsi, function(x) str_to_title(x))

data1[11,1] <- "DKI Jakarta"
data1[14,1] <- "DI Yogyakarta"
colnames(data1) <- c("Provinsi", "Februari 2022", "Agustus 2022", "Februari 2023", "Agustus 2023")
str(data1)
## 'data.frame':    34 obs. of  5 variables:
##  $ Provinsi     :List of 34
##   ..$ : chr "Aceh"
##   ..$ : chr "Sumatera Utara"
##   ..$ : chr "Sumatera Barat"
##   ..$ : chr "Riau"
##   ..$ : chr "Jambi"
##   ..$ : chr "Sumatera Selatan"
##   ..$ : chr "Bengkulu"
##   ..$ : chr "Lampung"
##   ..$ : chr "Kep. Bangka Belitung"
##   ..$ : chr "Kep. Riau"
##   ..$ : chr "DKI Jakarta"
##   ..$ : chr "Jawa Barat"
##   ..$ : chr "Jawa Tengah"
##   ..$ : chr "DI Yogyakarta"
##   ..$ : chr "Jawa Timur"
##   ..$ : chr "Banten"
##   ..$ : chr "Bali"
##   ..$ : chr "Nusa Tenggara Barat"
##   ..$ : chr "Nusa Tenggara Timur"
##   ..$ : chr "Kalimantan Barat"
##   ..$ : chr "Kalimantan Tengah"
##   ..$ : chr "Kalimantan Selatan"
##   ..$ : chr "Kalimantan Timur"
##   ..$ : chr "Kalimantan Utara"
##   ..$ : chr "Sulawesi Utara"
##   ..$ : chr "Sulawesi Tengah"
##   ..$ : chr "Sulawesi Selatan"
##   ..$ : chr "Sulawesi Tenggara"
##   ..$ : chr "Gorontalo"
##   ..$ : chr "Sulawesi Barat"
##   ..$ : chr "Maluku"
##   ..$ : chr "Maluku Utara"
##   ..$ : chr "Papua Barat"
##   ..$ : chr "Papua"
##  $ Februari 2022: chr  "5.97" "5.47" "6.17" "4.40" ...
##  $ Agustus 2022 : chr  "6.17" "6.16" "6.28" "4.37" ...
##  $ Februari 2023: chr  "5.75" "5.24" "5.9" "4.25" ...
##  $ Agustus 2023 : chr  "6.03" "5.89" "5.94" "4.23" ...
data1$`Februari 2022`<- as.numeric(data1$`Februari 2022`)
data1$`Februari 2023`<- as.numeric(data1$`Februari 2023`)
data1$`Agustus 2022`<- as.numeric(data1$`Agustus 2022`)
data1$`Agustus 2023`<- as.numeric(data1$`Agustus 2023`)

{
feb22 <- as.numeric(data1$`Februari 2022`)
feb23 <- as.numeric(data1$`Februari 2023`)
agt22 <- as.numeric(data1$`Agustus 2022`)
agt23 <- as.numeric(data1$`Agustus 2023`)
}

yr2022 <- data.frame(feb22, agt22)
yr2023 <- data.frame(feb23, agt23)

provinsi <- as.factor(unlist(data1$Provinsi))
data2 <- data.frame(provinsi, feb22, agt22, feb23, agt23)

mean22 <- mean(c(mean(feb22),mean(agt22)))
mean23 <- mean(c(mean(feb23),mean(agt23)))

data1$`Tahun 2022` <- rowMeans(data2[, c("feb22", "agt22")])
data1$`Tahun 2023` <- rowMeans(data2[, c("feb23", "agt23")])

data3 <- data.frame(provinsi, feb22, agt22, feb23, agt23, data1$`Tahun 2022`, data1$`Tahun 2023`)
colnames(data3) <- c("Provinsi", "Februari 2022", "Agustus 2022", "Februari 2023", "Agustus 2023", "Tahun 2022", "Tahun 2023")

Pulau <- c(rep("Sumatera",10), rep("Jawa",6), rep("Nusa Tenggara",3), rep("Kalimantan",5), rep("Sulawesi",6), rep("Maluku",2), rep("Papua",2)) 


data.pulau <- data.frame(data1, Pulau)
{
sumatera <- subset(data.pulau, Pulau == "Sumatera")
jawa <- subset(data.pulau, Pulau == "Jawa")
kalimantan <- subset(data.pulau, Pulau == "Kalimantan")
sulawesi <- subset(data.pulau, Pulau == "Sulawesi")
papua <- subset(data.pulau, Pulau == "Papua")
}

datpulau5 <- rbind(sumatera, jawa, kalimantan, sulawesi, papua)
colnames(datpulau5) <- c("Provinsi", "Februari 2022", "Agustus 2022", "Februari 2023", "Agustus 2023", "Tahun 2022", "Tahun 2023", "Pulau")

Statistik Deskriptif

statdesc <- function(a){
  x <- as.numeric(a)
  Max <- round(max(x), 2)
  Min <- round(min(x),2)
  Mean <- round(mean(x),2)
  Qrt.1 <- round(quantile(x,0.25),2)
  Med <- round(median(x),2)
  Qrt.3 <- round(quantile(x,0.75),2)
  Var <- round(var(x),2)
  Stdev <- round(sqrt(Var),2)
  Range <- round((Max-Min),2)
  
  frek <- table(x)
  Mod <- as.numeric(names(frek)[frek == max(frek)])

  dt <- as.data.frame(rbind(Max, Min, Mod, Mean, Qrt.1, Med, Qrt.3, Var, Stdev, Range))
  colnames(dt) <- "Statistik Deskriptif"
  return(dt)
}

statdesc2 <- function(a,b){
  x <- as.numeric(a)
  Max <- round(max(x), 3)
  Min <- round(min(x),3)
  Mean <- round(mean(x),3)
  Qrt.1 <- round(quantile(x,0.25),3)
  Med <- round(median(x),3)
  Qrt.3 <- round(quantile(x,0.75),3)
  Var <- round(var(x),3)
  Stdev <- round(sqrt(Var),3)
  Range <- round((Max-Min),3)
  

  dt <- as.data.frame(rbind(Max, NULL, Min, NULL, Mean, NULL, Qrt.1, NULL, Med, NULL, Qrt.3, NULL, Var,NULL, Stdev, NULL, Range))
  colnames(dt) <- b
  return(t(dt))
}

(statdesc2(agt23, "Agustus 2023"))
##               Max  Min  Mean Qrt.1  Med Qrt.3   Var Stdev Range
## Agustus 2023 7.52 2.27 4.614 3.487 4.32 5.762 2.014 1.419  5.25
summary(agt23)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.270   3.487   4.320   4.614   5.763   7.520

Redefine New Data

Data per Bulan

{
feb22name <- rep("Februari 2022", 34)
feb23name <- rep("Februari 2023", 34)
agt22name <- rep("Agustus 2022", 34)
agt23name <- rep("Agustus 2023", 34)
}

data1melt <- melt(data = data1, id.var = "Provinsi", variable.name = "Bulan",
                  value.name = "Nilai TPT")

Bulan <- as.factor(rbind(c(feb22name, agt22name, feb23name, agt23name)))
Angka <- as.numeric(rbind(c(feb22, agt22, feb23, agt23)))

newdat <- data.frame(Bulan,Angka)
reord <- reorder(newdat$Bulan, newdat$Angka, FUN = median)

ggplot()+
  geom_boxplot(data=newdat, aes(x = Bulan, y = Angka))+
  labs(x = NULL, y = "Tingkat Pengangguran Terbuka Indonesia")

Data per Tahun

{
  tahun22 <- rep("Tahun 2022", 34)
  tahun23 <- rep("Tahun 2023", 34)
}

Tahun <- as.factor(rbind(c(tahun22, tahun23)))
Tahun22 <- as.factor(tahun22)
Tahun23 <- as.factor(tahun23)

Nilai <- as.numeric(rbind(c(data1$`Tahun 2022`, data1$`Tahun 2023`)))
Nilai22 <- as.numeric(data1$`Tahun 2022`)
Nilai23 <- as.numeric(data1$`Tahun 2023`)


newdatyr <- data.frame(Tahun, Nilai)
newdatyr22 <- data.frame(Tahun22, Nilai22)
newdatyr23 <- data.frame(Tahun23, Nilai23)


reord2 <- reorder(newdatyr$Tahun, newdatyr$Nilai, FUN = median)

Visualisasi Data

Boxplot

ggplot(data=newdat, aes(x = reord, y = Angka, fill = Bulan), color = "black") +
  geom_boxplot()

ggplot(data=newdat, aes(x = reord, 
                        y = Angka, color = Bulan)) +
  geom_boxplot(width = 0.5)+
  geom_jitter(alpha = 0.5) + 
  theme(legend.position = "top",
         plot.margin = unit(c(1, 1, 1, 3), "cm"))+
  labs(x = NULL, y = "Tingkat Pengangguran Terbuka Indonesia")+
  scale_x_discrete(limits = rev(levels(reord)))

reord <- reorder(newdat$Bulan, newdat$Angka, FUN = median)
orderp <- reorder(data.pulau$Pulau, data.pulau$Tahun.2023, FUN = median)

neworder <- reorder(datpulau5$Pulau, datpulau5$`Tahun 2023`, FUN = median)

ggplot(data=datpulau5, aes(x = neworder, y = `Tahun 2023`, color = Pulau)) +
  geom_boxplot(width = 0.5)+
  geom_jitter(alpha = 0.5) + 
  theme(legend.position = "top",
         plot.margin = unit(c(1, 1, 1, 3), "cm"))+
  labs(x = NULL, y = "TPT Indonesia Tahun 2023")+
  scale_x_discrete()

ggplot(data=newdatyr, aes(x = reord2, 
                        y = Nilai, color = Tahun)) +
  geom_boxplot()+
  geom_jitter(alpha = 0.5) + 
  scale_x_discrete(limits = rev(levels(reord2))) +
  theme()+
  labs(x = NULL, y = "Tingkat Pengangguran Terbuka Indonesia")

Histogram

ggplot(data = newdat, aes(x=Angka)) +
  geom_histogram(aes(fill=Bulan)) +
  scale_fill_brewer(palette="Set2") +
  facet_wrap( ~ Bulan, ncol=1) +
  xlab("TPT Indonesia") +
  ylab("Frekuensi") +
  theme_bw() +
  ggtitle("Angka TPT per Bulan\n")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(plyr)
library(dplyr)
rataan<-ddply(newdat, "Bulan", summarise, rata2=mean(Angka))
rataan2 <- ddply(newdatyr, "Tahun", summarise, rata2=mean(Nilai)) 

ggplot(newdat, aes(x=Angka, color=Bulan)) +
  geom_histogram(fill="white")+
  geom_vline(data=rataan, aes(xintercept=rata2, color=Bulan),
             linetype="dashed")+
  theme(legend.position="top")+
  labs(y = "Frekuensi", x = "Angka TPT Indonesia") +
  geom_text(data = rataan, aes(x = rata2-0.1, y = 11.7, label = paste(round(rata2,1))), vjust = -1, size = 2.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Violin Plot

ggplot(newdatyr,aes(reord2,Nilai))+
  geom_violin(aes(col = Tahun), fill = "white", width = 0.7)+
  geom_boxplot(width=.1, fill= "#4B5768", outlier.colour=NA)+
  geom_jitter(alpha = 0.5, aes(color = Tahun))+
  geom_hline(data=rataan2, aes(yintercept=rata2, color=Tahun),
             linetype="dashed", alpha = 2)+
  labs(x = NULL, y = "Tingkat Pengangguran Terbuka")+
  theme(legend.position = "top")+
  stat_summary(fun=median,geom="point",fill="blue",shape=21,size=2.5) +
  scale_x_discrete(limits = rev(levels(reord2))) + coord_flip()

Density Plot

# Density

ggplot(data3)+
  geom_density(aes(x=`Februari 2022`,fill= "Februari 2022" ),color="coral", alpha=0.4)+
  geom_density(aes(x=`Februari 2023`,fill= "Februari 2023" ),color="#e9ecef", alpha=0.4)+
  geom_density(aes(x=`Agustus 2022`,fill="Agustus 2022"), color="#e9ecef", alpha=0.4)+
  geom_density(aes(x=`Agustus 2023`,fill= "Agustus 2023" ),color="cyan", alpha=0.4)+
  labs(title="Density Plot Sebaran Data Tingkat Pengangguran Terbuka Indonesia")+
  xlab("Tingkat Pengangguran Terbuka")+
  xlim(1,10.2)+
  scale_fill_manual(name = NULL, 
                    values = c("Februari 2022" = "#e41a1c", "Februari 2023" = "#4daf4a", 
                               "Agustus 2022" = "#377eb8", "Agustus 2023" = "#984ea3"))

ggplot(data3)+
  geom_density(aes(x=`Tahun 2022`,fill= "Tahun 2022" ),color="#e9ecef", alpha=0.4)+
  geom_density(aes(x=`Tahun 2023`,fill= "Tahun 2023" ),color="#e9ecef", alpha=0.4)+
  labs(title="Density Plot Sebaran Data Tingkat Pengangguran Terbuka Indonesia")+
  xlab("TPT")+
  ylab("Jumlah")+
  xlim(1,10.2)

Histogram (gif)

library(gganimate)
## Warning: package 'gganimate' was built under R version 4.3.2
library(gifski)
## Warning: package 'gifski' was built under R version 4.3.2
library(av)
## Warning: package 'av' was built under R version 4.3.2
animasi<- ggplot(newdat,aes(Angka)) + 
  geom_histogram(col = "white",fill = "blue") + 
  transition_states(Bulan,5,0.5, wrap = F)+
  view_follow(fixed_x = T)
animasi
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Bar Chart

ggplot(data = data3,
  aes(x = Provinsi, y = `Tahun 2023`)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Tingkat Pengangguran Terbuka", subtitle="Tingkat Pengangguran Terbuka di Indonesia Tahun 2023",
      caption="Sumber: Badan Pusat Statistik", y = "TPT", x = "Provinsi")

ggplot(data = data3, aes(x = Provinsi)) +
  geom_bar(aes(y = `Tahun 2022`, fill = "Tahun 2022"), stat = "identity", position = position_dodge(width = 0.8)) +
  geom_bar(aes(y = `Tahun 2023`, fill = "Tahun 2023"), stat = "identity", position = position_dodge(width = 0.8)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(
    title = "Tingkat Pengangguran Terbuka",
    subtitle = "Tingkat Pengangguran Terbuka di Indonesia Tahun 2023",
    caption = "Sumber: Badan Pusat Statistik",
    y = "TPT",
    x = "Provinsi"
  ) +
  scale_fill_manual(values = c("Tahun 2022" = "#f44369", "Tahun 2023" = "#3e3b92"), name = "Tahun")

ggplot(data = data3, aes(x = Provinsi)) +
  geom_bar(aes(y = `Agustus 2022`, fill = "Agustus 2022"), alpha = 0.9, stat = "identity", position = position_dodge(width = 0.8)) +
  geom_bar(aes(y = `Agustus 2023`, fill = "Agustus 2023"), alpha = 0.8, stat = "identity", position = position_dodge(width = 0.8)) +
  theme(axis.text.x = element_text(angle = 0, hjust = 1),
        axis.text.y = element_text(size = 6.5, angle = 0, hjust = 1)) +
  labs(
    title = "Tingkat Pengangguran Terbuka Indonesia",
    subtitle = "Per Bulan Agustus",
    caption = "Sumber: Badan Pusat Statistik",
    y = "TPT",
    x = "Provinsi"
  ) +
  scale_fill_manual(values = c("Agustus 2022" = "#1ECE70", "Agustus 2023" = "#3e3b92"), name = "Periode Waktu") +
  coord_flip()

data3_melted <- melt(data3[,c(1,6:7)], id.vars = "Provinsi", variable.name = "Tahun", value.name = "TPT")
data4 <- melt(data3[,c(1:5)], id.vars = "Provinsi", variable.name = "Bulan", value.name = "TPT")

# Plotting
ggplot(data = data3_melted, aes(x = Provinsi, y = TPT, fill = Tahun)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
  theme(axis.text.x = element_text(size = 8,angle = 45, hjust = 1)) +
  labs(
    title = "Tingkat Pengangguran Terbuka",
    subtitle = "Tingkat Pengangguran Terbuka di Indonesia Tahun 2023",
    caption = "Sumber: Badan Pusat Statistik",
    y = "TPT",
    x = "Provinsi"
  )

datpul_melt <- melt(datpulau5[,c(2:5, 8)],id.vars = "Pulau", variable.name = "Bulan", value.name = "TPT")
ggplot(data = datpul_melt, aes(x = Pulau, y = TPT, fill = Bulan)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
  theme(axis.text.x = element_text(size = 10,angle = 0, hjust = 0.5)) +
  labs(
    title = "Tingkat Pengangguran Terbuka",
    subtitle = "Tingkat Pengangguran Terbuka di Indonesia Tahun 2022 - 2023",
    caption = "Sumber: Badan Pusat Statistik",
    y = "TPT",
    x = "Pulau"
  )

# Plot chart vertikal
ggplot(data = data3_melted, aes(x = TPT, y = Provinsi, fill = Tahun)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
  theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 6),
        axis.text.x = element_text(size = 8, angle = 0, hjust = 1)) + 
  labs(
    title = "Tingkat Pengangguran Terbuka",
    subtitle = "Perbandingan Tingkat Pengangguran Terbuka di Indonesia Tahun 2022 dan 2023",
    caption = "Sumber: Badan Pusat Statistik",
    x = "TPT",
    y = "Provinsi"
  )

View(data3_melted)

ggplot(data = data4, aes(x = TPT, y = Provinsi, fill = Bulan)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
  theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 6),
        axis.text.x = element_text(size = 8, angle = 0, hjust = 1)) + 
  labs(
    title = "Tingkat Pengangguran Terbuka",
    subtitle = "Perbandingan Tingkat Pengangguran Terbuka di Indonesia Tahun 2022 dan 2023",
    caption = "Sumber: Badan Pusat Statistik",
    x = "TPT",
    y = "Provinsi"
  )

Peta Konsentrasi

#Base Map
library(indonesia)
prov <- id_map("indonesia", "provinsi")
colnames(prov) <- c("Provinsi", "geometry")

View(prov)
prov_merge <- merge(prov, data1, by = "Provinsi") 
## old-style crs object detected; please recreate object with a recent sf::st_crs()
ggplot(prov) +
  geom_sf()
## old-style crs object detected; please recreate object with a recent sf::st_crs()
## old-style crs object detected; please recreate object with a recent sf::st_crs()
## old-style crs object detected; please recreate object with a recent sf::st_crs()

#Continuous
library(viridis)
## Warning: package 'viridis' was built under R version 4.3.2
## Loading required package: viridisLite
ggplot(prov_merge) +
  geom_sf(aes(fill = `Februari 2022`)) +
  scale_fill_viridis_c(option = "F")

#Discrete
prov_mod <- prov_merge
{
prov_mod$`Februari 2022` <- as.numeric(as.character(prov_mod$`Februari 2022`))
prov_mod$`Februari 2023` <- as.numeric(as.character(prov_mod$`Februari 2023`))
prov_mod$`Agustus 2022` <- as.numeric(as.character(prov_mod$`Agustus 2022`))
prov_mod$`Agustus 2023` <- as.numeric(as.character(prov_mod$`Februari 2023`))
}

ggplot(prov_mod) +
  geom_sf(aes(fill = `Tahun 2022`)) +
  scale_fill_gradient(high = c("#DC443F","darkblue"), low = "#FFEDD2")+
  labs(title = "Tahun 2022")

ggplot(prov_mod) +
  geom_sf(aes(fill = `Tahun 2023`)) +
  scale_fill_gradient(high = c("#DC443F","darkblue"), low = "#FFEDD2")+
  labs(title = "Tahun 2023")

#hehe
ggplot(prov_mod) +
  geom_sf(aes(fill = `Agustus 2023`)) +
  scale_fill_gradient(high = c("#fff1bf", "#ec458d"), low = "#474ed7")

ggplot(prov_mod) +
  geom_sf(aes(fill = `Tahun 2023`)) +
  scale_fill_gradient(high = c("blue", "#ec458d"), low = "#fff1bf")+
  labs(title = "Tahun 2023")

ggplot(prov_mod) +
  geom_sf(aes(fill = `Tahun 2022`)) +
  scale_fill_gradient(high = c("#474ed7", "#ec458d"), low = "#fff1bf")+
  labs(title = "Tahun 2022")