library(readr)
## Warning: package 'readr' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df <- read_delim("C:/Users/USER/Downloads/2023 Maret JABAR - SUSENAS KOR INDIVIDU PART1 (3).csv", delim = ";")
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 84688 Columns: 183
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (71): R703_A, R703_B, R703_C, R703_D, R703_X, R807_A, R807_B, R807_C, R8...
## dbl (99): ...1, URUT, PSU, SSU, WI1, WI2, R101, R102, R105, R401, R403, R404...
## lgl (13): R810_F, R811_F, R1107_F, R1109_D, R1110_A, R1110_C, R1110_F, R1202...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df
## # A tibble: 84,688 × 183
##     ...1   URUT   PSU    SSU   WI1    WI2  R101  R102  R105  R401  R403  R404
##    <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     0 500001 12448 123442 12435 123427    32     7     2     1     1     4
##  2     1 500001 12448 123442 12435 123427    32     7     2     2     3     2
##  3     2 500001 12448 123442 12435 123427    32     7     2     3     6     1
##  4     3 500001 12448 123442 12435 123427    32     7     2     4     6     1
##  5     4 500002 31373 311039 31360 311024    32    72     1     1     1     3
##  6     5 500002 31373 311039 31360 311024    32    72     1     2     3     1
##  7     6 500003 12092 119908 12079 119893    32     6     2     1     1     2
##  8     7 500003 12092 119908 12079 119893    32     6     2     2     2     2
##  9     8 500003 12092 119908 12079 119893    32     6     2     3     3     1
## 10     9 500004 31135 308689 31122 308674    32    72     1     1     1     2
## # ℹ 84,678 more rows
## # ℹ 171 more variables: R405 <dbl>, R407 <dbl>, R408 <dbl>, R409 <dbl>,
## #   R406A <dbl>, R406B <dbl>, R406C <dbl>, R410 <dbl>, R501 <dbl>, R502 <dbl>,
## #   R503 <dbl>, R504 <dbl>, R506 <dbl>, R507 <dbl>, R508 <dbl>, R509 <dbl>,
## #   R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl>, R606 <dbl>,
## #   R607 <dbl>, R608 <dbl>, R609 <dbl>, R610 <dbl>, R611 <dbl>, R612 <dbl>,
## #   R613 <dbl>, R614 <dbl>, R615 <dbl>, R616 <dbl>, R617 <dbl>, R618 <dbl>, …
# Memfilter data untuk Kabupaten Bogor dan Kota Bogor berdasarkan R102
df_bogor <- df[df$R102 %in% c("1", "71"), ]
# Memilih kolom R102 (kode wilayah) dan R905 sampai R915
df_bogor_selected <- df_bogor[, c("R102", "R905", "R906", "R907", "R908", "R909", 
                                  "R910", "R911", "R912", "R913", "R914", "R915", "R916")]
# Melihat beberapa baris pertama dari data yang telah diseleksi
head(df_bogor_selected)
## # A tibble: 6 × 13
##    R102  R905  R906  R907  R908  R909  R910  R911  R912  R913  R914  R915  R916
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1     5     0     0     0     0     0     0     0     0     0     0     0
## 2     1     5     0     0     0     0     0     0     0     0     0     0     0
## 3     1     5     0     0     0     0     0     0     0     0     0     0     0
## 4     1     5     0     0     0     0     0     0     0     0     0     0     0
## 5     1     5     0     0     0     0     0     0     0     0     0     0     0
## 6     1     5     0     0     0     0     0     0     0     0     0     0     0
df_bogor_selected$R905 <- as.factor(df_bogor_selected$R905)
df_bogor_selected$R916 <- as.factor(df_bogor_selected$R916)
# Membuat tabel silang (crosstab)

library(dplyr)

table_korban_lapor <- table(df_bogor_selected$R905, df_bogor_selected$R916)
print(table_korban_lapor)
##    
##        0    1    5
##   1   39    6   20
##   5 7335    0    0
# Uji Chi-Square
chisq.test(table_korban_lapor)
## Warning in chisq.test(table_korban_lapor): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  table_korban_lapor
## X-squared = 2944.3, df = 2, p-value < 2.2e-16
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
ggplot(as.data.frame(table_korban_lapor), aes(Var1, Freq, fill = Var2)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "Pernah Menjadi Korban (R905)", y = "Jumlah Orang", fill = "Melapor ke Polisi (R916)") +
  theme_minimal()

prop_lapor <- prop.table(table_korban_lapor, margin = 1) * 100
print(prop_lapor)
##    
##              0          1          5
##   1  60.000000   9.230769  30.769231
##   5 100.000000   0.000000   0.000000
df_lapor <- as.data.frame(prop_lapor)
colnames(df_lapor) <- c("Tidak Lapor", "Lapor")
df_lapor$JenisKejahatan <- rownames(prop_lapor)
print(df_lapor)
##   Tidak Lapor Lapor         NA JenisKejahatan
## 1           1     0  60.000000              1
## 2           5     0 100.000000              5
## 3           1     1   9.230769              1
## 4           5     1   0.000000              5
## 5           1     5  30.769231              1
## 6           5     5   0.000000              5
library(ggplot2)

ggplot(df_lapor, aes(x = reorder(JenisKejahatan, Lapor), y = Lapor, fill = JenisKejahatan)) +
  geom_bar(stat = "identity") +
  labs(title = "Jenis Kejahatan yang Paling Sering Dilaporkan",
       x = "Jenis Kejahatan",
       y = "Persentase Pelaporan (%)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = "none")
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

df_lapor$Lapor <- as.numeric(as.character(df_lapor$Lapor))
ggplot(df_lapor, aes(x = factor(JenisKejahatan, levels = unique(JenisKejahatan)), y = Lapor, fill = JenisKejahatan)) +
  geom_bar(stat = "identity") +
  labs(title = "Jenis Kejahatan yang Paling Sering Dilaporkan",
       x = "Jenis Kejahatan",
       y = "Persentase Pelaporan (%)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = "none")

df_lapor$JenisKejahatan <- factor(df_lapor$JenisKejahatan, 
                                  levels = c(1, 2, 3, 4, 5, 6, 7),
                                  labels = c("Pencurian", "Perampokan", "Penipuan", "Kekerasan", "Narkoba", "Korupsi", "Lainnya"))
ggplot(df_lapor, aes(x = reorder(JenisKejahatan, Lapor), y = Lapor, fill = JenisKejahatan)) +
  geom_bar(stat = "identity") +
  labs(title = "Jenis Kejahatan yang Paling Sering Dilaporkan",
       x = "Persentase Pelaporan (%)",
       y = "Jenis Kejahatan") +
  coord_flip() +  # Membalikkan grafik
  guides(fill = "none")

df_bogor_selected$R102 <- as.factor(df_bogor_selected$R102)
df_bogor_selected$R905 <- as.factor(df_bogor_selected$R905)
table_kejahatan_wilayah <- table(df_bogor_selected$R102, df_bogor_selected$R905)
print(table_kejahatan_wilayah)
##     
##         1    5
##   1    28 4554
##   71   37 2781
hasil_chi_wilayah <- chisq.test(table_kejahatan_wilayah)
print(hasil_chi_wilayah)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table_kejahatan_wilayah
## X-squared = 9.0837, df = 1, p-value = 0.002579
# Membuat tabel jumlah kejahatan berdasarkan wilayah
table_kejahatan_wilayah <- table(df_bogor_selected$R102, df_bogor_selected$R905)

# Menampilkan hasil
print(table_kejahatan_wilayah)
##     
##         1    5
##   1    28 4554
##   71   37 2781
# Visualisasi dengan bar plot
barplot(table_kejahatan_wilayah, beside = TRUE, 
        col = c("blue", "red"), 
        legend = c("Kabupaten", "Kota"),
        main = "Perbandingan Jumlah Kejahatan di Kota dan Kabupaten Bogor",
        xlab = "Pernah menjadi korban kejahatan",
        ylab = "Jumlah kejadian")

# Membuat tabel frekuensi jenis kejahatan berdasarkan wilayah
table_jenis_kejahatan <- table(df_bogor_selected$R102, df_bogor_selected$R906) # Ganti R906 sesuai variabel jenis kejahatan

# Menampilkan hasil
print(table_jenis_kejahatan)
##     
##         0    1    2
##   1  4555   26    1
##   71 2786   30    2
# Visualisasi dengan stacked bar chart
barplot(table_jenis_kejahatan, beside = FALSE,
        col = c("blue", "red"),
        legend = c("Kabupaten", "Kota"),
        main = "Perbandingan Jenis Kejahatan di Kota dan Kabupaten Bogor",
        xlab = "Jenis Kejahatan",
        ylab = "Jumlah Kasus",
        las = 2) # Memutar label sumbu X agar lebih mudah dibaca

# Uji Chi-Square antara wilayah dan jenis kejahatan
chi_jenis_kejahatan <- chisq.test(table_jenis_kejahatan)
## Warning in chisq.test(table_jenis_kejahatan): Chi-squared approximation may be
## incorrect
print(chi_jenis_kejahatan)
## 
##  Pearson's Chi-squared test
## 
## data:  table_jenis_kejahatan
## X-squared = 6.7909, df = 2, p-value = 0.03353
# Membuat tabel frekuensi jenis kejahatan berdasarkan wilayah
table_jenis_kejahatan <- table(df_bogor_selected$R102, df_bogor_selected$R906) # Ganti R906 dengan variabel jenis kejahatan

# Menampilkan hasil
print(table_jenis_kejahatan)
##     
##         0    1    2
##   1  4555   26    1
##   71 2786   30    2
df_bogor_selected$R906 <- as.numeric(df_bogor_selected$R906)
df_bogor_selected$R910 <- as.numeric(df_bogor_selected$R910)
cor_test <- cor.test(df_bogor_selected$R906, df_bogor_selected$R910, method = "spearman")
## Warning in cor.test.default(df_bogor_selected$R906, df_bogor_selected$R910, :
## Cannot compute exact p-value with ties
print(cor_test)
## 
##  Spearman's rank correlation rho
## 
## data:  df_bogor_selected$R906 and df_bogor_selected$R910
## S = 4.4366e+10, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3430931
library(ggplot2)
# Menghitung frekuensi jenis kejahatan
kejahatan_freq <- colSums(df_bogor_selected[, c("R905", "R906", "R907", "R908", "R909", 
                                                "R910", "R911", "R912", "R913", "R914", 
                                                "R915", "R916")] == 1, na.rm = TRUE)
# Konversi ke dataframe untuk visualisasi
df_kejahatan <- data.frame(Jenis_Kejahatan = names(kejahatan_freq), 
                           Frekuensi = kejahatan_freq)
ggplot(df_kejahatan, aes(x = reorder(Jenis_Kejahatan, -Frekuensi), y = Frekuensi)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(title = "Distribusi Jenis Kejahatan di Bogor", 
       x = "Jenis Kejahatan", y = "Frekuensi") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Pastikan R916 (melapor ke polisi) bertipe faktor
df_bogor_selected$R916 <- as.factor(df_bogor_selected$R916)

# Membuat tabel silang jenis kejahatan vs pelaporan ke polisi
table_kejahatan_laporan <- sapply(df_bogor_selected[, c("R905", "R906", "R907", "R908", "R909", 
                                                         "R910", "R911", "R912", "R913", "R914", 
                                                         "R915")], function(x) table(x, df_bogor_selected$R916))

# Menampilkan hasil dalam bentuk tabel
table_kejahatan_laporan
## $R905
##    
## x      0    1    5
##   1   39    6   20
##   5 7335    0    0
## 
## $R906
##    
## x      0    1    5
##   0 7338    0    3
##   1   34    6   16
##   2    2    0    1
## 
## $R907
##    
## x      0    1    5
##   0 7374    0    5
##   1    0    6   14
##   2    0    0    1
## 
## $R908
##    
## x      0    1    5
##   0 7373    2   17
##   1    0    2    3
##   2    1    0    0
##   5    0    2    0
## 
## $R909
##    
## x      0    1    5
##   0 7374    3   17
##   1    0    2    3
##   5    0    1    0
## 
## $R910
##    
## x      0    1    5
##   0 7374    2   17
##   1    0    2    3
##   5    0    2    0
## 
## $R911
##    
## x      0    1    5
##   0 7374    3   17
##   1    0    2    3
##   5    0    1    0
## 
## $R912
##    
## x      0    1    5
##   0 7374    2   17
##   1    0    2    3
##   5    0    2    0
## 
## $R913
##    
## x      0    1    5
##   0 7374    3   17
##   1    0    2    3
##   5    0    1    0
## 
## $R914
##    
## x      0    1    5
##   0 7370    2   12
##   1    4    2    7
##   2    0    0    1
##   5    0    2    0
## 
## $R915
##    
## x      0    1    5
##   0 7374    3   12
##   1    0    2    7
##   2    0    0    1
##   5    0    1    0
fisher_results <- lapply(df_bogor_selected[, c("R905", "R906", "R907", "R908", "R909", 
                                               "R910", "R911", "R912", "R913", "R914", 
                                               "R915")], function(x) fisher.test(table(x, df_bogor_selected$R916)))

# Menampilkan p-value dari setiap jenis kejahatan
fisher_pvalues <- sapply(fisher_results, function(x) x$p.value)
print(fisher_pvalues)
##         R905         R906         R907         R908         R909         R910 
## 1.061246e-56 1.194693e-47 1.088728e-57 1.194522e-19 1.456266e-16 4.407170e-20 
##         R911         R912         R913         R914         R915 
## 1.456266e-16 4.407170e-20 1.456266e-16 4.482178e-29 4.290499e-29