library(readr)
## Warning: package 'readr' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- read_delim("C:/Users/USER/Downloads/2023 Maret JABAR - SUSENAS KOR INDIVIDU PART1 (3).csv", delim = ";")
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 84688 Columns: 183
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (71): R703_A, R703_B, R703_C, R703_D, R703_X, R807_A, R807_B, R807_C, R8...
## dbl (99): ...1, URUT, PSU, SSU, WI1, WI2, R101, R102, R105, R401, R403, R404...
## lgl (13): R810_F, R811_F, R1107_F, R1109_D, R1110_A, R1110_C, R1110_F, R1202...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df
## # A tibble: 84,688 × 183
## ...1 URUT PSU SSU WI1 WI2 R101 R102 R105 R401 R403 R404
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 500001 12448 123442 12435 123427 32 7 2 1 1 4
## 2 1 500001 12448 123442 12435 123427 32 7 2 2 3 2
## 3 2 500001 12448 123442 12435 123427 32 7 2 3 6 1
## 4 3 500001 12448 123442 12435 123427 32 7 2 4 6 1
## 5 4 500002 31373 311039 31360 311024 32 72 1 1 1 3
## 6 5 500002 31373 311039 31360 311024 32 72 1 2 3 1
## 7 6 500003 12092 119908 12079 119893 32 6 2 1 1 2
## 8 7 500003 12092 119908 12079 119893 32 6 2 2 2 2
## 9 8 500003 12092 119908 12079 119893 32 6 2 3 3 1
## 10 9 500004 31135 308689 31122 308674 32 72 1 1 1 2
## # ℹ 84,678 more rows
## # ℹ 171 more variables: R405 <dbl>, R407 <dbl>, R408 <dbl>, R409 <dbl>,
## # R406A <dbl>, R406B <dbl>, R406C <dbl>, R410 <dbl>, R501 <dbl>, R502 <dbl>,
## # R503 <dbl>, R504 <dbl>, R506 <dbl>, R507 <dbl>, R508 <dbl>, R509 <dbl>,
## # R601 <dbl>, R602 <dbl>, R603 <dbl>, R604 <dbl>, R605 <dbl>, R606 <dbl>,
## # R607 <dbl>, R608 <dbl>, R609 <dbl>, R610 <dbl>, R611 <dbl>, R612 <dbl>,
## # R613 <dbl>, R614 <dbl>, R615 <dbl>, R616 <dbl>, R617 <dbl>, R618 <dbl>, …
# Memfilter data untuk Kabupaten Bogor dan Kota Bogor berdasarkan R102
df_bogor <- df[df$R102 %in% c("1", "71"), ]
# Memilih kolom R102 (kode wilayah) dan R905 sampai R915
df_bogor_selected <- df_bogor[, c("R102", "R905", "R906", "R907", "R908", "R909",
"R910", "R911", "R912", "R913", "R914", "R915", "R916")]
# Melihat beberapa baris pertama dari data yang telah diseleksi
head(df_bogor_selected)
## # A tibble: 6 × 13
## R102 R905 R906 R907 R908 R909 R910 R911 R912 R913 R914 R915 R916
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 5 0 0 0 0 0 0 0 0 0 0 0
## 2 1 5 0 0 0 0 0 0 0 0 0 0 0
## 3 1 5 0 0 0 0 0 0 0 0 0 0 0
## 4 1 5 0 0 0 0 0 0 0 0 0 0 0
## 5 1 5 0 0 0 0 0 0 0 0 0 0 0
## 6 1 5 0 0 0 0 0 0 0 0 0 0 0
df_bogor_selected$R905 <- as.factor(df_bogor_selected$R905)
df_bogor_selected$R916 <- as.factor(df_bogor_selected$R916)
# Membuat tabel silang (crosstab)
library(dplyr)
table_korban_lapor <- table(df_bogor_selected$R905, df_bogor_selected$R916)
print(table_korban_lapor)
##
## 0 1 5
## 1 39 6 20
## 5 7335 0 0
# Uji Chi-Square
chisq.test(table_korban_lapor)
## Warning in chisq.test(table_korban_lapor): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: table_korban_lapor
## X-squared = 2944.3, df = 2, p-value < 2.2e-16
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
ggplot(as.data.frame(table_korban_lapor), aes(Var1, Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Pernah Menjadi Korban (R905)", y = "Jumlah Orang", fill = "Melapor ke Polisi (R916)") +
theme_minimal()

prop_lapor <- prop.table(table_korban_lapor, margin = 1) * 100
print(prop_lapor)
##
## 0 1 5
## 1 60.000000 9.230769 30.769231
## 5 100.000000 0.000000 0.000000
df_lapor <- as.data.frame(prop_lapor)
colnames(df_lapor) <- c("Tidak Lapor", "Lapor")
df_lapor$JenisKejahatan <- rownames(prop_lapor)
print(df_lapor)
## Tidak Lapor Lapor NA JenisKejahatan
## 1 1 0 60.000000 1
## 2 5 0 100.000000 5
## 3 1 1 9.230769 1
## 4 5 1 0.000000 5
## 5 1 5 30.769231 1
## 6 5 5 0.000000 5
library(ggplot2)
ggplot(df_lapor, aes(x = reorder(JenisKejahatan, Lapor), y = Lapor, fill = JenisKejahatan)) +
geom_bar(stat = "identity") +
labs(title = "Jenis Kejahatan yang Paling Sering Dilaporkan",
x = "Jenis Kejahatan",
y = "Persentase Pelaporan (%)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = "none")
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

df_lapor$Lapor <- as.numeric(as.character(df_lapor$Lapor))
ggplot(df_lapor, aes(x = factor(JenisKejahatan, levels = unique(JenisKejahatan)), y = Lapor, fill = JenisKejahatan)) +
geom_bar(stat = "identity") +
labs(title = "Jenis Kejahatan yang Paling Sering Dilaporkan",
x = "Jenis Kejahatan",
y = "Persentase Pelaporan (%)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = "none")

df_lapor$JenisKejahatan <- factor(df_lapor$JenisKejahatan,
levels = c(1, 2, 3, 4, 5, 6, 7),
labels = c("Pencurian", "Perampokan", "Penipuan", "Kekerasan", "Narkoba", "Korupsi", "Lainnya"))
ggplot(df_lapor, aes(x = reorder(JenisKejahatan, Lapor), y = Lapor, fill = JenisKejahatan)) +
geom_bar(stat = "identity") +
labs(title = "Jenis Kejahatan yang Paling Sering Dilaporkan",
x = "Persentase Pelaporan (%)",
y = "Jenis Kejahatan") +
coord_flip() + # Membalikkan grafik
guides(fill = "none")

df_bogor_selected$R102 <- as.factor(df_bogor_selected$R102)
df_bogor_selected$R905 <- as.factor(df_bogor_selected$R905)
table_kejahatan_wilayah <- table(df_bogor_selected$R102, df_bogor_selected$R905)
print(table_kejahatan_wilayah)
##
## 1 5
## 1 28 4554
## 71 37 2781
hasil_chi_wilayah <- chisq.test(table_kejahatan_wilayah)
print(hasil_chi_wilayah)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table_kejahatan_wilayah
## X-squared = 9.0837, df = 1, p-value = 0.002579
# Membuat tabel jumlah kejahatan berdasarkan wilayah
table_kejahatan_wilayah <- table(df_bogor_selected$R102, df_bogor_selected$R905)
# Menampilkan hasil
print(table_kejahatan_wilayah)
##
## 1 5
## 1 28 4554
## 71 37 2781
# Visualisasi dengan bar plot
barplot(table_kejahatan_wilayah, beside = TRUE,
col = c("blue", "red"),
legend = c("Kabupaten", "Kota"),
main = "Perbandingan Jumlah Kejahatan di Kota dan Kabupaten Bogor",
xlab = "Pernah menjadi korban kejahatan",
ylab = "Jumlah kejadian")

# Membuat tabel frekuensi jenis kejahatan berdasarkan wilayah
table_jenis_kejahatan <- table(df_bogor_selected$R102, df_bogor_selected$R906) # Ganti R906 sesuai variabel jenis kejahatan
# Menampilkan hasil
print(table_jenis_kejahatan)
##
## 0 1 2
## 1 4555 26 1
## 71 2786 30 2
# Visualisasi dengan stacked bar chart
barplot(table_jenis_kejahatan, beside = FALSE,
col = c("blue", "red"),
legend = c("Kabupaten", "Kota"),
main = "Perbandingan Jenis Kejahatan di Kota dan Kabupaten Bogor",
xlab = "Jenis Kejahatan",
ylab = "Jumlah Kasus",
las = 2) # Memutar label sumbu X agar lebih mudah dibaca

# Uji Chi-Square antara wilayah dan jenis kejahatan
chi_jenis_kejahatan <- chisq.test(table_jenis_kejahatan)
## Warning in chisq.test(table_jenis_kejahatan): Chi-squared approximation may be
## incorrect
print(chi_jenis_kejahatan)
##
## Pearson's Chi-squared test
##
## data: table_jenis_kejahatan
## X-squared = 6.7909, df = 2, p-value = 0.03353
# Membuat tabel frekuensi jenis kejahatan berdasarkan wilayah
table_jenis_kejahatan <- table(df_bogor_selected$R102, df_bogor_selected$R906) # Ganti R906 dengan variabel jenis kejahatan
# Menampilkan hasil
print(table_jenis_kejahatan)
##
## 0 1 2
## 1 4555 26 1
## 71 2786 30 2
df_bogor_selected$R906 <- as.numeric(df_bogor_selected$R906)
df_bogor_selected$R910 <- as.numeric(df_bogor_selected$R910)
cor_test <- cor.test(df_bogor_selected$R906, df_bogor_selected$R910, method = "spearman")
## Warning in cor.test.default(df_bogor_selected$R906, df_bogor_selected$R910, :
## Cannot compute exact p-value with ties
print(cor_test)
##
## Spearman's rank correlation rho
##
## data: df_bogor_selected$R906 and df_bogor_selected$R910
## S = 4.4366e+10, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3430931
library(ggplot2)
# Menghitung frekuensi jenis kejahatan
kejahatan_freq <- colSums(df_bogor_selected[, c("R905", "R906", "R907", "R908", "R909",
"R910", "R911", "R912", "R913", "R914",
"R915", "R916")] == 1, na.rm = TRUE)
# Konversi ke dataframe untuk visualisasi
df_kejahatan <- data.frame(Jenis_Kejahatan = names(kejahatan_freq),
Frekuensi = kejahatan_freq)
ggplot(df_kejahatan, aes(x = reorder(Jenis_Kejahatan, -Frekuensi), y = Frekuensi)) +
geom_bar(stat = "identity", fill = "steelblue") +
theme_minimal() +
labs(title = "Distribusi Jenis Kejahatan di Bogor",
x = "Jenis Kejahatan", y = "Frekuensi") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Pastikan R916 (melapor ke polisi) bertipe faktor
df_bogor_selected$R916 <- as.factor(df_bogor_selected$R916)
# Membuat tabel silang jenis kejahatan vs pelaporan ke polisi
table_kejahatan_laporan <- sapply(df_bogor_selected[, c("R905", "R906", "R907", "R908", "R909",
"R910", "R911", "R912", "R913", "R914",
"R915")], function(x) table(x, df_bogor_selected$R916))
# Menampilkan hasil dalam bentuk tabel
table_kejahatan_laporan
## $R905
##
## x 0 1 5
## 1 39 6 20
## 5 7335 0 0
##
## $R906
##
## x 0 1 5
## 0 7338 0 3
## 1 34 6 16
## 2 2 0 1
##
## $R907
##
## x 0 1 5
## 0 7374 0 5
## 1 0 6 14
## 2 0 0 1
##
## $R908
##
## x 0 1 5
## 0 7373 2 17
## 1 0 2 3
## 2 1 0 0
## 5 0 2 0
##
## $R909
##
## x 0 1 5
## 0 7374 3 17
## 1 0 2 3
## 5 0 1 0
##
## $R910
##
## x 0 1 5
## 0 7374 2 17
## 1 0 2 3
## 5 0 2 0
##
## $R911
##
## x 0 1 5
## 0 7374 3 17
## 1 0 2 3
## 5 0 1 0
##
## $R912
##
## x 0 1 5
## 0 7374 2 17
## 1 0 2 3
## 5 0 2 0
##
## $R913
##
## x 0 1 5
## 0 7374 3 17
## 1 0 2 3
## 5 0 1 0
##
## $R914
##
## x 0 1 5
## 0 7370 2 12
## 1 4 2 7
## 2 0 0 1
## 5 0 2 0
##
## $R915
##
## x 0 1 5
## 0 7374 3 12
## 1 0 2 7
## 2 0 0 1
## 5 0 1 0
fisher_results <- lapply(df_bogor_selected[, c("R905", "R906", "R907", "R908", "R909",
"R910", "R911", "R912", "R913", "R914",
"R915")], function(x) fisher.test(table(x, df_bogor_selected$R916)))
# Menampilkan p-value dari setiap jenis kejahatan
fisher_pvalues <- sapply(fisher_results, function(x) x$p.value)
print(fisher_pvalues)
## R905 R906 R907 R908 R909 R910
## 1.061246e-56 1.194693e-47 1.088728e-57 1.194522e-19 1.456266e-16 4.407170e-20
## R911 R912 R913 R914 R915
## 1.456266e-16 4.407170e-20 1.456266e-16 4.482178e-29 4.290499e-29