library(readxl)
df <- read_excel("D:/Cooliah/Semester 4/Analisis Eksplorasi Data/Pertemuan 3/Data.xlsx", sheet="Sheet2")
df
## # A tibble: 136 × 2
##    TPT   Waktu        
##    <chr> <chr>        
##  1 5.97  Februari 2022
##  2 5.47  Februari 2022
##  3 6.17  Februari 2022
##  4 4.40  Februari 2022
##  5 4.70  Februari 2022
##  6 4.74  Februari 2022
##  7 3.39  Februari 2022
##  8 4.31  Februari 2022
##  9 4.18  Februari 2022
## 10 8.02  Februari 2022
## # ℹ 126 more rows
df$TPT <- as.double(df$TPT)
library(maps)
library(mapdata)
library(ggplot2)
library(ggrepel)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
ggplot(df, aes(x = as.factor(Waktu), y = TPT)) +
    geom_violin(aes(col = Waktu),fill = NA, alpha = 0.2, draw_quantiles = 0.5) +
  geom_boxplot(width=.1,fill="orange",outlier.colour=NA)+
  stat_summary(fun.y=median,geom="point",fill="blue",shape=21,size=2.5) +
  ggtitle("Violin Plot : Tingkat Pengangguran Terbuka di Indonesia 
          pada Tahun 2022-2023") +
  ylab("Tingkat Pengangguran Terbuka") +
  xlab("Waktu") + 
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(readxl)
data <- read_excel("D:/Cooliah/Semester 4/Analisis Eksplorasi Data/Pertemuan 3/Data.xlsx")
data
## # A tibble: 34 × 6
##    Provinsi  Pulau `Februari 2022` `Agustus 2022` `Februari 2023` `Agustus 2023`
##    <chr>     <chr> <chr>           <chr>          <chr>           <chr>         
##  1 ACEH      Suma… 5.97            6.17           5.75            6.03          
##  2 SUMATERA… Suma… 5.47            6.16           5.24            5.89          
##  3 SUMATERA… Suma… 6.17            6.28           5.9             5.94          
##  4 RIAU      Suma… 4.40            4.37           4.25            4.23          
##  5 JAMBI     Suma… 4.70            4.59           4.5             4.53          
##  6 SUMATERA… Suma… 4.74            4.63           4.53            4.11          
##  7 BENGKULU  Suma… 3.39            3.59           3.21            3.42          
##  8 LAMPUNG   Suma… 4.31            4.52           4.18            4.23          
##  9 KEP. BAN… Suma… 4.18            4.77           3.89            4.56          
## 10 KEP. RIAU Suma… 8.02            8.23           7.61            6.8           
## # ℹ 24 more rows
str(data)
## tibble [34 × 6] (S3: tbl_df/tbl/data.frame)
##  $ Provinsi     : chr [1:34] "ACEH" "SUMATERA UTARA" "SUMATERA BARAT" "RIAU" ...
##  $ Pulau        : chr [1:34] "Sumatra" "Sumatra" "Sumatra" "Sumatra" ...
##  $ Februari 2022: chr [1:34] "5.97" "5.47" "6.17" "4.40" ...
##  $ Agustus 2022 : chr [1:34] "6.17" "6.16" "6.28" "4.37" ...
##  $ Februari 2023: chr [1:34] "5.75" "5.24" "5.9" "4.25" ...
##  $ Agustus 2023 : chr [1:34] "6.03" "5.89" "5.94" "4.23" ...
data$`Februari 2022`<- as.double(data$`Februari 2022`)
data$`Agustus 2022`<- as.double(data$`Agustus 2022`)
data$`Februari 2023`<- as.double(data$`Februari 2023`)
data$`Agustus 2023`<- as.double(data$`Agustus 2023`)
str(data)
## tibble [34 × 6] (S3: tbl_df/tbl/data.frame)
##  $ Provinsi     : chr [1:34] "ACEH" "SUMATERA UTARA" "SUMATERA BARAT" "RIAU" ...
##  $ Pulau        : chr [1:34] "Sumatra" "Sumatra" "Sumatra" "Sumatra" ...
##  $ Februari 2022: num [1:34] 5.97 5.47 6.17 4.4 4.7 4.74 3.39 4.31 4.18 8.02 ...
##  $ Agustus 2022 : num [1:34] 6.17 6.16 6.28 4.37 4.59 4.63 3.59 4.52 4.77 8.23 ...
##  $ Februari 2023: num [1:34] 5.75 5.24 5.9 4.25 4.5 4.53 3.21 4.18 3.89 7.61 ...
##  $ Agustus 2023 : num [1:34] 6.03 5.89 5.94 4.23 4.53 4.11 3.42 4.23 4.56 6.8 ...
base::summary(data)
##    Provinsi            Pulau           Februari 2022    Agustus 2022  
##  Length:34          Length:34          Min.   :3.110   Min.   :2.340  
##  Class :character   Class :character   1st Qu.:3.985   1st Qu.:4.000  
##  Mode  :character   Mode  :character   Median :4.775   Median :4.685  
##                                        Mean   :5.123   Mean   :4.966  
##                                        3rd Qu.:5.923   3rd Qu.:6.048  
##                                        Max.   :8.530   Max.   :8.310  
##  Februari 2023    Agustus 2023  
##  Min.   :3.040   Min.   :2.270  
##  1st Qu.:3.730   1st Qu.:3.487  
##  Median :4.415   Median :4.320  
##  Mean   :4.806   Mean   :4.614  
##  3rd Qu.:5.695   3rd Qu.:5.763  
##  Max.   :7.970   Max.   :7.520
rata_per_pulau <- data %>%
  group_by(Pulau) %>%
  summarise(mean_agustus_2023 = mean(`Agustus 2023`, na.rm = TRUE))

rata_per_pulau
## # A tibble: 7 × 2
##   Pulau                   mean_agustus_2023
##   <chr>                               <dbl>
## 1 Jawa                                 5.86
## 2 Kalimantan                           4.56
## 3 Kepulauan Maluku                     5.31
## 4 Kepulauan Nusa Tenggara              2.88
## 5 Papua                                4.03
## 6 Sulawesi                             3.64
## 7 Sumatra                              4.97
rata_per_pulau$percentage <- rata_per_pulau$mean_agustus_2023 / sum(rata_per_pulau$mean_agustus_2023) * 100
ggplot(rata_per_pulau, aes(x = "", y = mean_agustus_2023, fill = Pulau)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = paste0(round(percentage), "%")), position = position_stack(vjust = 0.5)) + 
  coord_polar("y", start = 0) +
  labs(title = "Komposisi TPT Per Pulau di Indonesia pada Agustus 2023", fill = "Pulau") +
 scale_fill_brewer(palette = "Set3") +
  theme_void()

library(treemap)
treemap(data, 
        index = c("Pulau", "Provinsi"), 
        vSize = "Agustus 2023",
        title = "Treemap: Komposisi TPT Per Provinsi pada Agustus 2023",
        fontsize.title = 18,
        fontsize.labels = 8,
        fontcolor.labels = "white"
)

library(ggplot2)
library(dplyr)

data %>%
  slice_head(n = 10) %>%
  ggplot(aes(y = reorder(Provinsi, `Agustus 2023`), x = `Agustus 2023`)) +
  geom_segment(aes(xend = 0, yend = Provinsi), color = "steelblue", size = 2) +
  geom_point(color = "lightgreen", size = 5) +
  labs(title = "Top 10 Provinsi dengan TPT Tertinggi Pada Agustus 2023", x = "Jumlah", y = "Provinsi")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(ggplot2)
library(dplyr)

data %>%
  slice_tail(n = 10) %>%
  ggplot(aes(y = reorder(Provinsi, `Agustus 2023`), x = `Agustus 2023`)) +
  geom_segment(aes(xend = 0, yend = Provinsi), color = "steelblue", size = 2) +
  geom_point(color = "lightgreen", size = 5) +
  labs(title = "10 Provinsi dengan TPT Terendah Pada Agustus 2023", x = "Jumlah", y = "Provinsi")