Teknik Sampling dan Survei

Assignment Week 11

Check Sheet

library(DT)
library(dplyr)

# Set seed untuk hasil acak yang konsisten
set.seed(42)

# Vektor pilihan
request_types <- c("Document Verification", "ID Card Issuance", "Business Registration", "Tax Payment Assistance", "Permit Application")
request_statuses <- c("Fulfilled", "Not Fulfilled")

# Buat dataset
check_sheet <- data.frame(
  Request_ID = paste0("REQ", sprintf("%03d", 1:100)),
  Citizen_Name = paste0("Citizen_", 1:100),
  Request_Type = sample(request_types, 100, replace = TRUE),
  Request_Status = sample(request_statuses, 100, replace = TRUE),
  stringsAsFactors = FALSE
)

# Menambahkan kolom Problem dengan variasi berdasarkan jenis permintaan
check_sheet$Problem <- case_when(
  check_sheet$Request_Status == "Not Fulfilled" & check_sheet$Request_Type == "Document Verification" ~ "Issue: Documents missing or incomplete",
  check_sheet$Request_Status == "Not Fulfilled" & check_sheet$Request_Type == "ID Card Issuance" ~ "Issue: Waiting for biometric verification",
  check_sheet$Request_Status == "Not Fulfilled" & check_sheet$Request_Type == "Business Registration" ~ "Issue: Delay in approval",
  check_sheet$Request_Status == "Not Fulfilled" & check_sheet$Request_Type == "Tax Payment Assistance" ~ "Issue: Payment system error",
  check_sheet$Request_Status == "Not Fulfilled" & check_sheet$Request_Type == "Permit Application" ~ "Issue: Permit not yet approved",
  
  check_sheet$Request_Status == "Fulfilled" & check_sheet$Request_Type == "Document Verification" ~ "No Issue: Document verified successfully",
  check_sheet$Request_Status == "Fulfilled" & check_sheet$Request_Type == "ID Card Issuance" ~ "No Issue: ID Card issued",
  check_sheet$Request_Status == "Fulfilled" & check_sheet$Request_Type == "Business Registration" ~ "No Issue: Business registration completed",
  check_sheet$Request_Status == "Fulfilled" & check_sheet$Request_Type == "Tax Payment Assistance" ~ "No Issue: Tax payment processed",
  check_sheet$Request_Status == "Fulfilled" & check_sheet$Request_Type == "Permit Application" ~ "No Issue: Permit issued",
  
  TRUE ~ "Other Issue"
)

# Tampilkan tabel interaktif dengan kolom Problem yang bervariasi
datatable(
  check_sheet,
  options = list(
    pageLength = 10,
    scrollCollapse = TRUE,
    autoWidth = TRUE
  ),
  rownames = FALSE,
  caption = htmltools::tags$caption(
    style = 'caption-side: top; text-align: left; 
             font-size: 18px; font-weight: bold;',
    'Check Sheet: Citizen Requests for Government Services with Detailed Problem Status'
  ),
  class = 'stripe hover compact'
)
library(dplyr)
library(DT)

# Count frequency of each delay reason (Problem)
reason_summary <- check_sheet %>%
  count(Problem, sort = TRUE) %>%
  rename(Frequency = n)

# Display summary table
datatable(
  reason_summary,
  options = list(
    scrollCollapse = TRUE,
    searching = FALSE,   # Remove search box
    paging = FALSE       # Remove pagination
  ),
  rownames = FALSE,
  caption = htmltools::tags$caption(
    style = 'caption-side: top; text-align: left; 
             font-size: 18px; font-weight: bold;',
    'Check Sheet: Summary of Delay Reasons'
  ),
  class = 'stripe hover compact'
)
library(plotly)

# Count frequency of each delay reason (Problem) from the previous dataset
reason_summary <- check_sheet %>%
  count(Problem, sort = TRUE) %>%
  rename(Frequency = n)

# Interactive bar chart using plotly
plot_ly(reason_summary,
        x = ~Frequency,
        y = ~reorder(Problem, Frequency),  # Reorder the problems based on frequency
        type = 'bar',
        orientation = 'h',
        marker = list(
          color = ~Frequency,
          colorscale = 'Viridis',  # Can also try: 'Bluered', 'Cividis', 'YlOrRd'
          showscale = TRUE
        )
) %>% 
  layout(
    title = list(text = "Check Sheet: Frequency of Delay Reasons", font = list(size = 18)),
    xaxis = list(title = "Frequency"),
    yaxis = list(title = "Reason"),
    margin = list(l = 120)
  )

Fishbone Diagram

library(DiagrammeR)
library(rsvg)

# Modify the graph to reflect the delay reasons from the check_sheet data
graph <- grViz("  
digraph fishbone {
  graph [layout = dot, rankdir = LR]

  # Default node styles
  node [fontname=Helvetica, fontsize=25, style=filled]

  # Central problem
  Problem [label='Delayed Requests for Government Services', shape=ellipse, fillcolor=lightcoral, width=5.0, height=1.2]

  # Category nodes (shared style)
  node [shape=diamond, width=2.5, height=1.0, fillcolor='#FFD700']
  A1 [label='Documents']
  A2 [label='Verification']
  A3 [label='Payment']
  A4 [label='Approval']
  A5 [label='Technical Issues']

  # Reset node style for sub-categories
  node [shape=ellipse, width=2.5, height=0.6, fillcolor='#90EE90']
  A1a [label='Incomplete or missing documents']
  A1b [label='Incorrect document details']

  A2a [label='Pending biometric verification']
  A2b [label='Unclear verification instructions']

  A3a [label='Payment system error']
  A3b [label='System overload or downtime']

  A4a [label='Delay in approval process']
  A4b [label='Waiting for further inspection']

  A5a [label='Network issues']
  A5b [label='Software malfunction']

  # Relationships
  A1 -> Problem
  A2 -> Problem
  A3 -> Problem
  A4 -> Problem
  A5 -> Problem

  A1a -> A1
  A1b -> A1

  A2a -> A2
  A2b -> A2

  A3a -> A3
  A3b -> A3

  A4a -> A4
  A4b -> A4

  A5a -> A5
  A5b -> A5
}
")

# Membuat folder untuk menyimpan output gambar jika belum ada
output_dir <- "images/layanan"
if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive =TRUE)
}

graph

Flowchart Process

library(DiagrammeR)

# Flowchart vertikal dengan layout ke bawah
graph <- grViz("
digraph flowchart {
  graph [layout = dot, rankdir = TB]

  # Definisi node (simbol)
  node [fontname=Helvetica, fontsize=16, style=filled, width=2.5]

  # Start node
  Start [label='Start', shape=ellipse, fillcolor=lightblue]

  # Proses jenis layanan
  JenisLayanan [label='Pilih Jenis Layanan', shape=diamond, fillcolor='#FFD700']
  
  # Pilihan jenis layanan
  DocumentVerification [label='Cek Dokumen', shape=box, fillcolor='#90EE90']
  IDCardIssuance [label='Penerbitan KTP', shape=box, fillcolor='#90EE90']
  BusinessRegistration [label='Registrasi Usaha', shape=box, fillcolor='#90EE90']
  TaxPayment [label='Bantuan Pembayaran Pajak', shape=box, fillcolor='#90EE90']
  PermitApplication [label='Aplikasi Izin Usaha', shape=box, fillcolor='#90EE90']

  # Proses status layanan
  StatusSelesai [label='Layanan Selesai', shape=ellipse, fillcolor='lightgreen']
  StatusBelumSelesai [label='Layanan Belum Selesai', shape=ellipse, fillcolor='lightcoral']

  # Hubungan antar node
  Start -> JenisLayanan
  JenisLayanan -> DocumentVerification
  JenisLayanan -> IDCardIssuance
  JenisLayanan -> BusinessRegistration
  JenisLayanan -> TaxPayment
  JenisLayanan -> PermitApplication

  DocumentVerification -> StatusSelesai [label='Selesai']
  DocumentVerification -> StatusBelumSelesai [label='Belum Selesai']

  IDCardIssuance -> StatusSelesai [label='Selesai']
  IDCardIssuance -> StatusBelumSelesai [label='Belum Selesai']

  BusinessRegistration -> StatusSelesai [label='Selesai']
  BusinessRegistration -> StatusBelumSelesai [label='Belum Selesai']

  TaxPayment -> StatusSelesai [label='Selesai']
  TaxPayment -> StatusBelumSelesai [label='Belum Selesai']

  PermitApplication -> StatusSelesai [label='Selesai']
  PermitApplication -> StatusBelumSelesai [label='Belum Selesai']
}
")

# Output Flowchart
graph

Histogram

# Load libraries
library(ggplot2)
library(dplyr)

# Summarize frequency by category
hist_data <- check_sheet %>%
  count(Problem, sort = TRUE) %>%
  rename(Reason = Problem, Frequency = n)

# Sort dari yang kecil ke besar
sorted_data <- hist_data %>%
  arrange(Frequency)

# Bagi menjadi dua, lalu gabungkan ulang dengan yang paling besar di tengah
n <- nrow(sorted_data)
half <- floor(n / 2)

# Urutan: kecil → besar (tengah) ← kecil
if (n %% 2 == 0) {
  new_order <- c(sorted_data$Reason[1:half], rev(sorted_data$Reason[(half + 1):n]))
} else {
  new_order <- c(sorted_data$Reason[1:half], sorted_data$Reason[n], rev(sorted_data$Reason[(half + 1):(n - 1)]))
}

# Tambahkan urutan baru
hist_data$Reason <- factor(hist_data$Reason, levels = new_order)
hist_data <- hist_data[order(hist_data$Reason), ]
hist_data$Index <- seq_along(hist_data$Reason)

# Buat spline untuk density-like curve
spline_data <- as.data.frame(spline(x = hist_data$Index, y = hist_data$Frequency, n = 200))

# Plot dengan ggplot2
ggplot(hist_data, aes(x = Index, y = Frequency)) +
  geom_col(fill = "lightblue", color = "black", width = 0.9) +
  geom_line(data = spline_data, aes(x = x, y = y), color = "black", size = 1.2) +
  scale_x_continuous(breaks = hist_data$Index, labels = hist_data$Reason) +
  labs(
    title = "Histogram of Delay Reasons with Centered Peak",
    x = "Delay Reason",
    y = "Frequency / Density"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(hjust = 0.5)
  )

Pareto Chart

# Load necessary libraries
library(dplyr)
library(plotly)
library(RColorBrewer)

# Summarize the number of delays by reason (using the 'Problem' column)
pareto_data <- check_sheet %>%
  count(Problem, sort = TRUE) %>%
  mutate(
    cum_freq = cumsum(n) / sum(n) * 100  # cumulative percentage
  )

# Create different colors for each delay reason
colors <- brewer.pal(n = length(pareto_data$Problem), name = "Set3")

# Create Plotly Pareto Chart
fig <- plot_ly()

# Add Bar Chart (Count) - with different colors
fig <- fig %>% add_bars(
  x = ~reorder(pareto_data$Problem, -pareto_data$n),
  y = ~pareto_data$n,
  name = 'Number of Delays',
  marker = list(color = colors),
  yaxis = "y1"
)

# Add Cumulative Line
fig <- fig %>% add_lines(
  x = ~reorder(pareto_data$Problem, -pareto_data$n),
  y = ~pareto_data$cum_freq,
  name = 'Cumulative (%)',
  yaxis = "y2",
  line = list(color = 'red', dash = 'dash')
)

# Add Cut-off Line at 80%
fig <- fig %>% add_lines(
  x = ~reorder(pareto_data$Problem, -pareto_data$n),
  y = rep(80, length(pareto_data$Problem)),
  name = 'Cut-off 80%',
  yaxis = "y2",
  line = list(color = 'green', dash = 'dot')
)

# Adjust layout
fig <- fig %>% layout(
  title = "Pareto Chart - Delay Reasons for Citizen Requests",
  xaxis = list(
    title = "Delay Reasons",
    tickangle = -45   # tilt 45 degrees
  ),
  yaxis = list(title = "Number of Delays"),
  yaxis2 = list(
    title = "Cumulative (%)",
    overlaying = "y",
    side = "right",
    range = c(0, 100)
  ),
  legend = list(x = 0.8, y = 0.75),
  shapes = list(
    list(
      type = "line",
      x0 = -0.5,
      x1 = length(pareto_data$Problem) - 0.5,
      y0 = 80,
      y1 = 80,
      yref = "y2",
      line = list(color = "green", width = 2, dash = "dot")
    )
  )
)

# Show the chart
fig

Scatter Diagram

# Load the plotly package
library(plotly)

# Modifikasi dataset berdasarkan data yang kamu berikan
set.seed(42)  # For reproducibility

# Jumlah permintaan berdasarkan jenis layanan
data <- data.frame(
  Request_Type = c("Document Verification", "ID Card Issuance", "Business Registration", 
                   "Tax Payment Assistance", "Permit Application"),
  Number_of_Requests = c(30, 45, 50, 40, 60),  # Jumlah permintaan per jenis layanan
  Number_of_Fulfilled = c(20, 35, 40, 30, 50),  # Jumlah layanan yang selesai
  Number_of_Not_Fulfilled = c(10, 10, 10, 10, 10)  # Jumlah layanan yang belum selesai
)

# Hitung koefisien korelasi antara jumlah permintaan dan status layanan (terpenuhi atau belum terpenuhi)
correlation_value <- cor(data$Number_of_Requests, data$Number_of_Fulfilled)

# Buat scatter plot menggunakan Plotly dengan garis regresi linear
fig <- plot_ly(data, 
               x = ~Number_of_Requests, 
               y = ~Number_of_Fulfilled, 
               type = 'scatter', 
               mode = 'markers',
               marker = list(color = 'blue', size = 10)) %>%
  add_lines(x = data$Number_of_Requests, 
            y = predict(lm(Number_of_Fulfilled ~ Number_of_Requests, data = data)), 
            line = list(color = 'red', dash = 'solid', width = 2)) %>%
  layout(title = paste("Scatter Plot: Relationship Between Number of Requests and Number of Fulfilled Requests\nCorrelation: ", round(correlation_value, 2)),
         xaxis = list(title = "Number of Requests"),
         yaxis = list(title = "Number of Fulfilled Requests"))

# Show the plot
fig