Analisis Univariat
Histogram
library(ggplot2)
ggplot(land, aes(x = interest_rate)) +
# Histogram (pakai density, bukan frekuensi)
geom_histogram(
aes(y = after_stat(density)),
bins = 20,
na.rm = TRUE,
fill = "#8ECAE6",
color = "white",
alpha = 0.8
) +
# Density curve
geom_density(
na.rm = TRUE,
color = "#023047",
linewidth = 1.2
) +
# Garis median
geom_vline(
aes(xintercept = median(interest_rate, na.rm = TRUE)),
color = "#D62828",
linetype = "dashed",
linewidth = 1
) +
labs(
title = "Distribusi Interest Rate Proyek",
subtitle = "Histogram (Bar) + Density",
x = "Interest Rate (%)",
y = "Density",
caption = "Garis putus-putus merah menunjukkan nilai median"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold"),
plot.subtitle = element_text(color = "grey40"),
plot.caption = element_text(size = 9, color = "grey40")
)

Density Plot
library(ggplot2)
ggplot(land, aes(x = interest_rate)) +
geom_density(
na.rm = TRUE,
fill = "#8ECAE6",
color = "#023047",
alpha = 0.6,
linewidth = 1
) +
geom_vline(
aes(xintercept = median(interest_rate, na.rm = TRUE)),
color = "#D62828",
linetype = "dashed",
linewidth = 1
) +
labs(
title = "Density Plot Interest Rate Proyek",
subtitle = "Distribusi Tidak Normal (Skewed & Bounded)",
x = "Interest Rate (%)",
y = "Density",
caption = "Garis putus-putus merah menunjukkan nilai median"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold"),
plot.subtitle = element_text(color = "grey40"),
plot.caption = element_text(size = 9, color = "grey40")
)

# install.packages("plotly") # jika belum terpasang
library(plotly)
# Ambil data & buang NA
x <- land$interest_rate
x <- x[!is.na(x)]
# Hitung density & median
d <- density(x)
med <- median(x)
# Plotly density plot
p <- plot_ly() %>%
add_lines(
x = d$x,
y = d$y,
line = list(color = "#023047", width = 3),
fill = "tozeroy",
fillcolor = "rgba(142,202,230,0.5)",
name = "Density"
) %>%
add_segments(
x = med, xend = med,
y = 0, yend = max(d$y),
line = list(color = "#D62828", width = 2, dash = "dash"),
name = "Median"
) %>%
layout(
title = list(
text = "Density Plot Interest Rate Proyek<br><sup>Distribusi Tidak Normal (Skewed & Bounded)</sup>"
),
xaxis = list(title = "Interest Rate (%)"),
yaxis = list(title = "Density"),
showlegend = TRUE
)
p
Bar+Density Plot
library(plotly)
# Ambil data & buang NA
x <- land$interest_rate
x <- x[!is.na(x)]
# Hitung density & median
d <- density(x)
med <- median(x)
p <- plot_ly() %>%
add_histogram(
x = x,
nbinsx = 20,
histnorm = "probability density",
marker = list(
color = "rgba(142,202,230,0.7)",
line = list(color = "white", width = 1)
),
name = "Histogram"
) %>%
add_lines(
x = d$x,
y = d$y,
line = list(color = "#023047", width = 3),
name = "Density"
) %>%
add_segments(
x = med, xend = med,
y = 0, yend = max(d$y),
line = list(color = "#D62828", width = 2, dash = "dash"),
name = "Median"
) %>%
layout(
title = list(
text = "Distribusi Interest Rate Proyek<br><sup>Histogram (Bar) + Density</sup>"
),
xaxis = list(title = "Interest Rate (%)"),
yaxis = list(title = "Density"),
bargap = 0.05,
legend = list(
orientation = "h",
x = 0.5,
xanchor = "center",
y = -0.2,
yanchor = "top"
)
)
p
Boxplot
library(ggplot2)
library(ggrepel)
# Statistik utama
s <- with(land, data.frame(
label = c("Min", "Max", "Mean", "Median", "Mode"),
value = c(
min(interest_rate, na.rm = TRUE),
max(interest_rate, na.rm = TRUE),
mean(interest_rate, na.rm = TRUE),
median(interest_rate, na.rm = TRUE),
as.numeric(names(sort(table(interest_rate), TRUE)[1]))
)
))
ggplot(land, aes(x = "", y = interest_rate)) +
geom_boxplot(fill = "#8ECAE6", alpha = 0.8) +
# Titik statistik
geom_point(data = s, aes(x = 1, y = value), color = "#D62828", size = 3) +
# Label + panah otomatis (ANTI OVERLAP)
geom_text_repel(
data = s,
aes(x = 1, y = value, label = paste0(label, ": ", round(value, 2))),
nudge_x = 0.25,
direction = "y",
segment.color = "grey40",
segment.size = 0.4,
size = 3,
box.padding = 0.4,
point.padding = 0.3
) +
labs(
title = "Boxplot Interest Rate Proyek",
subtitle = "Statistik Utama Ditampilkan Tanpa Overlap",
y = "Interest Rate (%)",
x = NULL
) +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)

# Offset Y untuk label (ANTI OVERLAP)
s$y_label <- s$value
s$y_label[s$label == "Mean"] <- s$value[s$label == "Mean"] + 0.25
s$y_label[s$label == "Median"] <- s$value[s$label == "Median"] - 0.25
s$y_label[s$label == "Mode"] <- s$value[s$label == "Mode"] + 0.25
s$y_label[s$label == "Min"] <- s$value[s$label == "Min"] - 0.25
# Geser posisi X (HANYA INI YANG BERUBAH)
x_box <- 1
x_arrow <- 1.30 # ujung panah
x_text <- 1.48 # >>> teks lebih ke kanan
fig <- plot_ly() %>%
# Boxplot + OUTLIER
add_boxplot(
x = rep(x_box, length(land$interest_rate)),
y = land$interest_rate,
fillcolor = "#8ECAE6",
line = list(color = "#2C3E50"),
opacity = 0.8,
width = 0.4,
boxpoints = "outliers", # <<< OUTLIER DITAMPILKAN
jitter = 0,
pointpos = 0,
marker = list(
color = "#FB8500", # warna outlier (beda dari statistik)
size = 7
),
showlegend = FALSE
) %>%
# Titik statistik (Mean, Median, dst)
add_markers(
x = rep(x_box, nrow(s)),
y = s$value,
marker = list(color = "#D62828", size = 12),
showlegend = FALSE
) %>%
# Teks statistik
add_text(
x = rep(x_text, nrow(s)),
y = s$y_label,
text = paste0(s$label, ": ", round(s$value, 2)),
textposition = "middle left",
showlegend = FALSE
) %>%
layout(
title = list(
text = "Boxplot Interest Rate Proyek<br>
<sup>Dengan Outlier dan Statistik Utama</sup>",
x = 0.05
),
xaxis = list(
range = c(0.6, 1.9),
showticklabels = FALSE,
showgrid = FALSE,
zeroline = FALSE
),
yaxis = list(
title = "Interest Rate (%)",
zeroline = FALSE
),
shapes = lapply(
1:nrow(s),
function(i) {
list(
type = "line",
x0 = x_box,
x1 = x_arrow,
y0 = s$value[i],
y1 = s$y_label[i],
line = list(color = "grey40", width = 1)
)
}
),
annotations = lapply(
1:nrow(s),
function(i) {
list(
x = x_arrow,
y = s$y_label[i],
ax = x_box,
ay = s$value[i],
xref = "x",
yref = "y",
axref = "x",
ayref = "y",
showarrow = TRUE,
arrowhead = 3,
arrowwidth = 1,
arrowcolor = "grey40",
text = ""
)
}
),
margin = list(
l = 0,
r = 0,
t = 90,
b = 10
)
)
fig
Violin Plot
library(ggplot2)
library(ggrepel)
# =====================================================
# Statistik utama
# =====================================================
s <- with(land, data.frame(
label = c("Min", "Max", "Mean", "Median", "Mode"),
value = c(
min(interest_rate, na.rm = TRUE),
max(interest_rate, na.rm = TRUE),
mean(interest_rate, na.rm = TRUE),
median(interest_rate, na.rm = TRUE),
as.numeric(names(sort(table(interest_rate), decreasing = TRUE)[1]))
)
))
# =====================================================
# Violin Plot murni + Statistik
# =====================================================
ggplot(land, aes(x = "", y = interest_rate)) +
# Violin plot (tanpa boxplot)
geom_violin(
fill = "#8ECAE6",
alpha = 0.8,
trim = FALSE
) +
# Titik statistik
geom_point(
data = s,
aes(x = 1, y = value),
color = "#D62828",
size = 3
) +
# Label statistik (anti overlap)
geom_text_repel(
data = s,
aes(
x = 1,
y = value,
label = paste0(label, ": ", round(value, 2))
),
nudge_x = 0.28,
direction = "y",
segment.color = "grey40",
segment.size = 0.4,
size = 3,
box.padding = 0.4,
point.padding = 0.3
) +
labs(
title = "Violin Plot Interest Rate Proyek",
subtitle = "Distribusi Interest Rate dengan Statistik Utama",
y = "Interest Rate (%)",
x = NULL
) +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)

library(plotly)
# =====================================================
# 1. DATA
# =====================================================
y <- land$interest_rate
y <- y[!is.na(y)]
# =====================================================
# 2. DETEKSI OUTLIER (IQR METHOD)
# =====================================================
Q1 <- quantile(y, 0.25)
Q3 <- quantile(y, 0.75)
IQR <- Q3 - Q1
lower <- Q1 - 1.5 * IQR
upper <- Q3 + 1.5 * IQR
is_outlier <- y < lower | y > upper
# =====================================================
# 3. STATISTIK UTAMA
# =====================================================
s <- data.frame(
label = c("Max", "Mean", "Median", "Mode", "Min"),
value = c(
max(y),
mean(y),
median(y),
as.numeric(names(sort(table(y), decreasing = TRUE)[1])),
min(y)
)
)
# Offset Y (ANTI OVERLAP)
s$y_label <- s$value
s$y_label[s$label == "Mean"] <- s$value[s$label == "Mean"] + 0.25
s$y_label[s$label == "Median"] <- s$value[s$label == "Median"] - 0.25
s$y_label[s$label == "Mode"] <- s$value[s$label == "Mode"] + 0.25
s$y_label[s$label == "Min"] <- s$value[s$label == "Min"] - 0.25
# =====================================================
# 4. POSISI X
# =====================================================
x_violin <- 1
x_arrow <- 1.30
x_text <- 1.52
# =====================================================
# 5. VIOLIN PLOT
# =====================================================
fig <- plot_ly() %>%
# ---- VIOLIN (DENSITY)
add_trace(
type = "violin",
x = rep(x_violin, length(y)),
y = y,
fillcolor = "#8ECAE6",
line = list(color = "#2C3E50"),
opacity = 0.85,
width = 0.55,
points = FALSE,
showlegend = FALSE
) %>%
# ---- NON-OUTLIER (ABU-ABU PEKAT)
add_markers(
x = rep(x_violin, sum(!is_outlier)),
y = y[!is_outlier],
marker = list(
color = "rgba(90,90,90,0.9)",
size = 6
),
showlegend = FALSE
) %>%
# ---- OUTLIER (MERAH)
add_markers(
x = rep(x_violin, sum(is_outlier)),
y = y[is_outlier],
marker = list(
color = "#D62828",
size = 9
),
showlegend = FALSE
) %>%
# ---- TITIK STATISTIK
add_markers(
x = rep(x_violin, nrow(s)),
y = s$value,
marker = list(color = "#FB8500", size = 13),
showlegend = FALSE
) %>%
# ---- TEKS STATISTIK
add_text(
x = rep(x_text, nrow(s)),
y = s$y_label,
text = paste0(s$label, ": ", round(s$value, 2)),
textposition = "middle left",
showlegend = FALSE
) %>%
# =====================================================
# 6. LAYOUT + PANAH
# =====================================================
layout(
title = list(
text = "Violin Plot Interest Rate Proyek<br>
<sup>Outlier (IQR), Distribusi, dan Statistik Utama</sup>",
x = 0.05
),
xaxis = list(
range = c(0.6, 1.95),
showticklabels = FALSE,
showgrid = FALSE,
zeroline = FALSE
),
yaxis = list(
title = "Interest Rate (%)",
zeroline = FALSE
),
shapes = lapply(
1:nrow(s),
function(i) {
list(
type = "line",
x0 = x_violin,
x1 = x_arrow,
y0 = s$value[i],
y1 = s$y_label[i],
line = list(color = "grey40", width = 1)
)
}
),
annotations = lapply(
1:nrow(s),
function(i) {
list(
x = x_arrow,
y = s$y_label[i],
ax = x_violin,
ay = s$value[i],
xref = "x",
yref = "y",
axref = "x",
ayref = "y",
showarrow = TRUE,
arrowhead = 3,
arrowwidth = 1,
arrowcolor = "grey40",
text = ""
)
}
),
margin = list(
l = 0,
r = 0,
t = 90,
b = 10
)
)
fig
Boxplot+Violin Plot
library(ggplot2)
library(ggrepel)
# =====================================================
# Statistik utama
# =====================================================
s <- with(land, data.frame(
label = c("Min", "Max", "Mean", "Median", "Mode"),
value = c(
min(interest_rate, na.rm = TRUE),
max(interest_rate, na.rm = TRUE),
mean(interest_rate, na.rm = TRUE),
median(interest_rate, na.rm = TRUE),
as.numeric(names(sort(table(interest_rate), decreasing = TRUE)[1]))
)
))
# =====================================================
# Violin Plot + Statistik
# =====================================================
ggplot(land, aes(x = "", y = interest_rate)) +
# Violin plot
geom_violin(
fill = "#8ECAE6",
alpha = 0.8,
trim = FALSE
) +
# (Opsional tapi recommended) garis median di tengah violin
geom_boxplot(
width = 0.08,
fill = "white",
outlier.shape = NA,
alpha = 0.7
) +
# Titik statistik
geom_point(
data = s,
aes(x = 1, y = value),
color = "#D62828",
size = 3
) +
# Label statistik (anti overlap)
geom_text_repel(
data = s,
aes(
x = 1,
y = value,
label = paste0(label, ": ", round(value, 2))
),
nudge_x = 0.28,
direction = "y",
segment.color = "grey40",
segment.size = 0.4,
size = 3,
box.padding = 0.4,
point.padding = 0.3
) +
labs(
title = "Violin Plot Interest Rate Proyek",
subtitle = "Distribusi Data dengan Statistik Utama",
y = "Interest Rate (%)",
x = NULL
) +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)

library(plotly)
# =====================================================
# 1. DATA (BERSIHKAN NA)
# =====================================================
# y = data numerik utama yang akan diplot
y <- land$interest_rate
y <- y[!is.na(y)]
# =====================================================
# 2. DETEKSI OUTLIER BERDASARKAN ATURAN IQR
# =====================================================
# CATATAN PENTING:
# - Outlier TIDAK sama dengan min / max
# - Outlier ditentukan oleh batas statistik (IQR),
# bukan urutan nilai
Q1 <- quantile(y, 0.25)
Q3 <- quantile(y, 0.75)
IQR <- Q3 - Q1
# Batas whisker boxplot
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
# Klasifikasi data
is_outlier_low <- y < lower_bound # outlier ekstrem RENDAH
is_outlier_high <- y > upper_bound # outlier ekstrem TINGGI
is_normal <- !(is_outlier_low | is_outlier_high)
# =====================================================
# 3. STATISTIK UTAMA (DESKRIPTIF)
# =====================================================
# Statistik ini DESKRIPTIF,
# BUKAN penentu outlier
s <- data.frame(
label = c("Max", "Mean", "Median", "Mode", "Min"),
value = c(
max(y),
mean(y),
median(y),
as.numeric(names(sort(table(y), decreasing = TRUE)[1])),
min(y)
)
)
# Offset Y agar teks tidak overlap
s$y_label <- s$value
s$y_label[s$label == "Mean"] <- s$value[s$label == "Mean"] + 0.25
s$y_label[s$label == "Median"] <- s$value[s$label == "Median"] - 0.25
s$y_label[s$label == "Mode"] <- s$value[s$label == "Mode"] + 0.25
s$y_label[s$label == "Min"] <- s$value[s$label == "Min"] - 0.25
# =====================================================
# 4. POSISI X (UNTUK OVERLAY & ANOTASI)
# =====================================================
x_main <- 1 # posisi violin & box
x_arrow <- 1.30 # ujung panah
x_text <- 1.55 # posisi teks (lebih ke kanan)
# =====================================================
# 5. PLOTLY: VIOLIN + BOX + DATA POINT
# =====================================================
fig <- plot_ly() %>%
# ---- VIOLIN (SEBARAN DATA)
add_trace(
type = "violin",
x = rep(x_main, length(y)),
y = y,
fillcolor = "#8ECAE6",
line = list(color = "#2C3E50"),
opacity = 0.45,
width = 0.6,
points = FALSE,
showlegend = FALSE
) %>%
# ---- BOXPLOT (RINGKASAN STATISTIK)
# Whisker berhenti di batas IQR (BUKAN min/max)
add_boxplot(
x = rep(x_main, length(y)),
y = y,
width = 0.22,
fillcolor = "rgba(255,255,255,0.9)",
line = list(color = "#2C3E50"),
boxpoints = FALSE,
showlegend = FALSE
) %>%
# ---- DATA NORMAL (ABU-ABU)
add_markers(
x = rep(x_main, sum(is_normal)),
y = y[is_normal],
marker = list(
color = "rgba(80,80,80,0.9)",
size = 6
),
name = "Normal (Dalam IQR)",
showlegend = TRUE
) %>%
# ---- OUTLIER EKSTREM RENDAH (BIRU)
add_markers(
x = rep(x_main, sum(is_outlier_low)),
y = y[is_outlier_low],
marker = list(
color = "#1F77B4",
size = 10,
line = list(color = "black", width = 0.6)
),
name = "Outlier Ekstrem Rendah",
showlegend = TRUE
) %>%
# ---- OUTLIER EKSTREM TINGGI (MERAH)
add_markers(
x = rep(x_main, sum(is_outlier_high)),
y = y[is_outlier_high],
marker = list(
color = "#D62828",
size = 10,
line = list(color = "black", width = 0.6)
),
name = "Outlier Ekstrem Tinggi",
showlegend = TRUE
) %>%
# ---- TITIK STATISTIK UTAMA
add_markers(
x = rep(x_main, nrow(s)),
y = s$value,
marker = list(color = "#FB8500", size = 13),
showlegend = FALSE
) %>%
# ---- TEKS STATISTIK
add_text(
x = rep(x_text, nrow(s)),
y = s$y_label,
text = paste0(s$label, ": ", round(s$value, 2)),
textposition = "middle left",
showlegend = FALSE
) %>%
# =====================================================
# 6. LAYOUT + PANAH PENJELAS
# =====================================================
layout(
title = list(
text = paste0(
"Box + Violin Overlay Interest Rate Proyek<br>",
"<sup>",
"Catatan: Outlier ditentukan oleh IQR, bukan oleh Min/Max",
"</sup>"
),
x = 0.05
),
xaxis = list(
range = c(0.6, 2.0),
showticklabels = FALSE,
showgrid = FALSE,
zeroline = FALSE
),
yaxis = list(
title = "Interest Rate (%)",
zeroline = FALSE
),
shapes = lapply(
1:nrow(s),
function(i) {
list(
type = "line",
x0 = x_main,
x1 = x_arrow,
y0 = s$value[i],
y1 = s$y_label[i],
line = list(color = "grey40", width = 1)
)
}
),
margin = list(l = 0, r = 0, t = 110, b = 10)
)
fig
Pie Chart
library(dplyr)
library(plotly)
# =====================================================
# 1. HITUNG FREKUENSI DAN PERSENTASE
# =====================================================
donut_data <- land %>%
count(project_type, name = "frekuensi") %>%
mutate(persen = round(frekuensi / sum(frekuensi) * 100, 1))
# =====================================================
# 2. BUAT DONUT CHART INTERAKTIF DENGAN POSISI TURUN
# =====================================================
fig <- plot_ly(
donut_data,
labels = ~project_type,
values = ~frekuensi,
type = 'pie',
hole = 0.4,
textposition = 'inside',
textinfo = 'label+percent',
hovertemplate = "<b>%{label}</b><br>Frekuensi: %{value}<br>Persentase: %{percent}<extra></extra>",
marker = list(line = list(color = '#FFFFFF', width = 2)),
domain = list(
x = c(0, 1), # full horizontal
y = c(0.05, 0.90) # turunkan chart sedikit (awal 0.0, akhir 1.0)
)
)
# =====================================================
# 3. LAYOUT
# =====================================================
fig <- fig %>%
layout(
title = list(
text = "Distribusi Project Type (Donut Chart)",
x = 0.5,
y = 0.95
),
showlegend = F
)
fig
Bar Chart
library(dplyr)
library(ggplot2)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
# =====================================================
# 1. HITUNG FREKUENSI + PERSENTASE KUMULATIF
# =====================================================
pareto_project <- land %>%
count(project_type, name = "frekuensi") %>%
arrange(desc(frekuensi)) %>%
mutate(
cum_freq = cumsum(frekuensi),
cum_pct = cum_freq / sum(frekuensi)
)
# =====================================================
# 2. PARETO CHART
# =====================================================
ggplot(pareto_project,
aes(x = reorder(project_type, -frekuensi))) +
# ---- BAR (FREKUENSI)
geom_col(
aes(y = frekuensi, fill = project_type),
width = 0.8,
alpha = 0.9
) +
# ---- LABEL FREKUENSI
geom_text(
aes(y = frekuensi, label = frekuensi),
vjust = -0.4,
size = 3,
color = "black"
) +
# ---- GARIS KUMULATIF (%)
geom_line(
aes(
y = cum_pct * max(frekuensi),
group = 1
),
color = "#D62828",
linewidth = 1.2
) +
geom_point(
aes(y = cum_pct * max(frekuensi)),
color = "#D62828",
size = 2.5
) +
# ---- LABEL % KUMULATIF
geom_text(
aes(
y = cum_pct * max(frekuensi),
label = percent(cum_pct, accuracy = 1)
),
vjust = -0.8,
color = "#D62828",
size = 3
) +
# =====================================================
# 3. AXIS & TEMA
# =====================================================
scale_y_continuous(
name = "Frekuensi",
sec.axis = sec_axis(
~ . / max(pareto_project$frekuensi),
labels = percent_format(accuracy = 1),
name = "Persentase Kumulatif"
)
) +
labs(
title = "Pareto Chart Project Type",
x = "Project Type"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
axis.text.x = element_text(
angle = 45,
hjust = 1
),
plot.title = element_text(face = "bold")
)

library(dplyr)
library(plotly)
# =====================================================
# 1. HITUNG FREKUENSI + KUMULATIF
# =====================================================
pareto_project <- land %>%
count(project_type, name = "frekuensi") %>%
arrange(desc(frekuensi)) %>%
mutate(
cum_freq = cumsum(frekuensi),
cum_pct = cum_freq / sum(frekuensi)
)
max_freq <- max(pareto_project$frekuensi)
# =====================================================
# 2. PLOT PARETO
# =====================================================
fig <- plot_ly(pareto_project, x = ~project_type)
# ---- BAR (FREKUENSI)
fig <- fig %>%
add_bars(
y = ~frekuensi,
color = ~project_type,
text = ~frekuensi,
textposition = "outside",
hovertemplate =
"<b>%{x}</b><br>" ~
"Frekuensi: %{y}<extra></extra>",
showlegend = FALSE
)
# ---- GARIS PARETO (FIX WARNING)
fig <- fig %>%
add_trace(
type = "scatter",
mode = "lines+markers", # <<< PENTING
y = ~cum_pct * max_freq,
line = list(color = "#D62828", width = 3),
marker = list(size = 7, color = "#D62828"),
customdata = ~round(cum_pct * 100, 1),
hovertemplate =
"<b>%{x}</b><br>" ~
"Kumulatif: %{customdata}%<extra></extra>",
name = "Persentase Kumulatif"
)
# =====================================================
# 3. LAYOUT
# =====================================================
fig <- fig %>%
layout(
title = list(
text = "Pareto Chart Project Type<br>
<sup>Frekuensi & Persentase Kumulatif</sup>",
x = 0.05
),
xaxis = list(
title = "Project Type",
categoryorder = "array",
categoryarray = pareto_project$project_type
),
yaxis = list(
title = "Frekuensi",
rangemode = "tozero"
),
margin = list(t = 90)
)
fig
Analisis Bivariat
Line Plot (Numerik vs Numerik)
library(tidyverse)
library(lubridate)
library(plotly)
land$date <- as.Date(land$date)
# GANTI ini kalau nama kolom revenue kamu beda
revenue_col <- "revenue"
# pastikan kolom revenue numeric
land[[revenue_col]] <- as.numeric(land[[revenue_col]])
data_rev_yearly <- land %>%
mutate(year = year(date)) %>%
group_by(year, region) %>%
summarise(
revenue = sum(.data[[revenue_col]], na.rm = TRUE),
.groups = "drop"
) %>%
arrange(region, year)
p <- plot_ly()
p <- p %>%
add_trace(
data = data_rev_yearly,
x = ~year,
y = ~revenue,
color = ~region,
type = "scatter",
mode = "lines+markers",
name = ~region,
line = list(shape = "spline", smoothing = 1.2, width = 3),
marker = list(size = 6, opacity = 0.7),
opacity = 0.8
) %>%
layout(
title = list(text = "<b>Annual Revenue per Region</b>", x = 0.5),
xaxis = list(title = "", dtick = 1, tickangle = -45),
yaxis = list(title = "Revenue", rangemode = "tozero"),
legend = list(orientation = "h", x = 0.5, xanchor = "center", y = -0.15),
margin = list(b = 60)
)
p
Scatter Plot (Numerik vs Numerik)
# ==============================
# LIBRARY
# ==============================
library(tidyverse)
library(plotly)
library(scales)
# ==============================
# DATA PREPARATION
# ==============================
scatter_sales <- land %>%
select(units_sold, profit) %>%
drop_na()
# ==============================
# LINEAR MODEL
# ==============================
lm_model <- lm(profit ~ units_sold, data = scatter_sales)
lm_sum <- summary(lm_model)
# Koefisien
intercept <- lm_sum$coefficients[1, 1]
slope <- lm_sum$coefficients[2, 1]
p_value <- lm_sum$coefficients[2, 4]
r_squared <- lm_sum$r.squared
# Format teks model
eq_text <- paste0(
"Profit = ",
formatC(intercept, format = "f", digits = 2),
" + ",
formatC(slope, format = "f", digits = 2),
" × Units Sold"
)
stat_text <- paste0(
"R² = ", round(r_squared, 3),
"<br>p-value = ", formatC(p_value, format = "e", digits = 2)
)
# Prediksi garis regresi
scatter_sales <- scatter_sales %>%
mutate(profit_pred = predict(lm_model))
# ==============================
# SCATTER PLOT + REGRESSION LINE
# ==============================
p <- plot_ly() %>%
add_markers(
data = scatter_sales,
x = ~units_sold,
y = ~profit,
marker = list(size = 9, opacity = 0.7),
hovertemplate = paste(
"Units Sold: %{x:,}<br>",
"Profit: %{y:,.2f}<br>",
"<extra></extra>"
),
name = "Observed"
) %>%
add_lines(
data = scatter_sales,
x = ~units_sold,
y = ~profit_pred,
line = list(width = 2),
name = "Linear Fit"
) %>%
layout(
title = list(
text = "<b>Units Sold vs Profit with Linear Regression</b>",
x = 0.5
),
xaxis = list(
title = "Units Sold",
zeroline = TRUE
),
yaxis = list(
title = "Profit",
tickformat = ",.2f",
zeroline = TRUE
),
annotations = list(
list(
x = 0.02,
y = 0.98,
xref = "paper",
yref = "paper",
text = paste0(
"<b>Linear Model</b><br>",
eq_text, "<br>",
stat_text
),
showarrow = FALSE,
align = "left",
bgcolor = "rgba(255,255,255,0.85)",
bordercolor = "black",
borderwidth = 1
)
),
legend = list(
orientation = "h",
x = 0.5,
xanchor = "center",
y = -0.25
)
)
p
Boxplot (Numerik vs Kategorik)
library(plotly)
fig <- plot_ly(
data = land,
x = ~project_type,
y = ~interest_rate,
type = "box",
boxpoints = "outliers",
fillcolor = "#8ECAE6",
opacity = 0.8,
line = list(color = "black"),
marker = list(
color = "#D62828", # warna outlier
size = 6
)
) %>%
layout(
title = list(
text = "<b>Interest Rate per Jenis Proyek</b>",
x = 0.5
),
xaxis = list(
title = "Jenis Proyek"
),
yaxis = list(
title = "Interest Rate (%)",
ticksuffix = "%"
),
template = "simple_white"
)
fig
Violin Plot (Numerik vs Kategorik)
library(plotly)
fig <- plot_ly(
data = land,
x = ~project_type,
y = ~interest_rate,
type = "violin",
fillcolor = "#8ECAE6",
opacity = 0.8,
line = list(color = "black"),
box = list(visible = TRUE), # boxplot di dalam violin
meanline = list(visible = TRUE), # garis mean
points = "outliers", # tampilkan outlier
marker = list(
color = "#D62828",
size = 6
)
) %>%
layout(
title = list(
text = "<b>Interest Rate per Jenis Proyek</b>",
x = 0.5
),
xaxis = list(
title = "Jenis Proyek"
),
yaxis = list(
title = "Interest Rate (%)",
ticksuffix = "%"
),
template = "simple_white"
)
fig
Bar Chart (Kategorik vs Kategorik)
library(dplyr)
library(plotly)
# Hitung units sold per project type & sales channel
bar_data <- land %>%
group_by(project_type, sales_channel) %>%
summarise(
units_sold = sum(units_sold, na.rm = TRUE),
.groups = "drop"
)
# Urutkan project_type dari total units sold tertinggi
order_project <- bar_data %>%
group_by(project_type) %>%
summarise(total_units = sum(units_sold), .groups = "drop") %>%
arrange(desc(total_units))
bar_data <- bar_data %>%
mutate(
project_type = factor(
project_type,
levels = order_project$project_type
)
)
fig <- plot_ly(
data = bar_data,
x = ~project_type,
y = ~units_sold,
color = ~sales_channel,
type = "bar"
) %>%
layout(
title = list(
text = "<b>Units Sold berdasarkan Project Type<br><sup>Dilapisi Sales Channel</sup></b>",
x = 0.5
),
xaxis = list(
title = "Project Type",
categoryorder = "array"
),
yaxis = list(
title = "Units Sold"
),
barmode = "stack",
legend = list(
orientation = "h",
x = 0.5,
xanchor = "center",
y = -0.3
),
template = "simple_white"
)
Analisis Multivariat
Heatmap Korelasi
library(tidyverse)
# ======================
# 1. Ambil semua kolom numerik
# ======================
land_num <- land %>% select(where(is.numeric))
# ======================
# 2. Pilih kolom finansial
# ======================
keywords <- c("price", "capital", "sale", "profit", "margin", "cost", "total")
finance_vars <- land_num %>%
select(any_of(names(land_num)[str_detect(names(land_num), paste(keywords, collapse = "|"))]))
# ======================
# 3. Hitung korelasi
# ======================
corr_mat <- round(cor(finance_vars, use = "complete.obs"), 2)
# ======================
# 4. Ubah ke long format dan ambil upper-right triangle
# ======================
corr_long <- as.data.frame(as.table(corr_mat))
colnames(corr_long) <- c("Var1", "Var2", "Correlation")
# Buat faktor untuk menentukan urutan kolom
levels_order <- colnames(finance_vars)
corr_long <- corr_long %>%
mutate(
Var1 = factor(Var1, levels = levels_order),
Var2 = factor(Var2, levels = levels_order)
) %>%
filter(as.numeric(Var1) < as.numeric(Var2)) # hanya atas kanan
# ======================
# 5. Plot heatmap
# ======================
ggplot(corr_long, aes(x = Var1, y = Var2, fill = Correlation)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "#B2182B", mid = "white", high = "#2166AC", midpoint = 0) +
geom_text(aes(label = Correlation), size = 3) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(size = 10),
axis.title = element_blank()
) +
labs(title = "Upper-Right Triangle Correlation - Variabel Finansial")

Cluster Plot
library(tidyverse)
library(plotly)
# ======================
# 1. Ambil kolom numerik
# ======================
land_num <- land %>% select(where(is.numeric))
# ======================
# 2. Pilih variabel finansial
# ======================
keywords <- c("price", "capital", "sale", "profit", "margin", "cost", "total")
finance_vars <- land_num %>%
select(any_of(names(land_num)[str_detect(names(land_num), paste(keywords, collapse = "|"))]))
if (ncol(finance_vars) == 0) {
message("⚠️ Tidak ada variabel numerik finansial ditemukan. Cluster plot tidak dapat dibuat.")
} else {
# ======================
# 3. Standarisasi
# ======================
finance_scaled <- scale(finance_vars)
# ======================
# 4. K-means clustering
# ======================
set.seed(123)
k <- 3
km_res <- kmeans(finance_scaled, centers = k, nstart = 25)
# ======================
# 5. PCA untuk 2D visualisasi
# ======================
pca_res <- prcomp(finance_scaled)
pca_plot <- data.frame(
PC1 = pca_res$x[,1],
PC2 = pca_res$x[,2],
Cluster = factor(km_res$cluster)
)
# ======================
# 6. Tambahkan hover data asli
# ======================
hover_text <- apply(finance_vars, 1, function(row){
paste(paste(names(finance_vars), ": ", round(row, 2)), collapse = "<br>")
})
pca_plot$hover_text <- paste("Cluster: ", pca_plot$Cluster, "<br>", hover_text)
# ======================
# 7. Plotly interactive cluster plot
# ======================
plot_ly(
data = pca_plot,
x = ~PC1,
y = ~PC2,
color = ~Cluster,
colors = c("#1F77B4", "#FF7F0E", "#2CA02C"),
type = "scatter",
mode = "markers",
text = ~hover_text,
hoverinfo = "text"
) %>%
layout(
title = paste("Interactive Cluster Plot (k =", k, ") - Variabel Finansial"),
xaxis = list(title = "PC1"),
yaxis = list(title = "PC2")
)
}
Bubble Plot
library(tidyverse)
library(plotly)
# ======================
# 1. Pilih kolom yang dibutuhkan
# ======================
finance_vars <- land %>%
select(date, year, month, revenue, construction_cost, profit)
# ======================
# 2. Group per Tahun & Bulan
# ======================
finance_agg <- finance_vars %>%
group_by(year, month) %>%
summarise(
total_revenue = sum(revenue, na.rm = TRUE),
total_cost = sum(construction_cost, na.rm = TRUE),
total_profit = sum(profit, na.rm = TRUE),
.groups = "drop"
)
# ======================
# 3. Buat hover text
# ======================
finance_agg <- finance_agg %>%
mutate(
hover_text = paste(
"Tahun: ", year,
"<br>Bulan: ", month,
"<br>Total Revenue: ", round(total_revenue, 2),
"<br>Total Cost: ", round(total_cost, 2),
"<br>Total Profit: ", round(total_profit, 2)
)
)
# ======================
# 4. Bubble Plot interaktif
# ======================
plot_ly(
data = finance_agg,
x = ~total_revenue,
y = ~total_cost,
size = ~total_profit,
sizes = c(10, 50),
type = "scatter",
mode = "markers",
marker = list(sizemode = "area", opacity = 0.7, color = ~year, colorscale = "Viridis"),
text = ~hover_text,
hoverinfo = "text"
) %>%
layout(
title = "Bubble Plot per Tahun & Bulan (Agregat Finansial)",
xaxis = list(title = "Total Revenue"),
yaxis = list(title = "Total Cost")
)
## Warning: `line.width` does not currently support multiple values.
Statistik Inferensial Parametrik
Uji Z
library(tidyverse)
# ======================
# 1. Tentukan data profit
# ======================
# Misal kita ingin uji rata-rata profit tahun 2026
current_year <- 2026
profit_data <- land %>%
filter(year == current_year) %>%
pull(profit)
# ======================
# 2. Tentukan target (mu0) dari rata-rata historis
# ======================
# Misal rata-rata profit tahun sebelumnya (2025)
historical_year <- current_year - 1
mu0 <- land %>%
filter(year == historical_year) %>%
summarise(mean_profit = mean(profit, na.rm = TRUE)) %>%
pull(mean_profit)
cat("Target (mu0) berdasarkan rata-rata historis tahun", historical_year, "=", round(mu0,2), "\n")
## Target (mu0) berdasarkan rata-rata historis tahun 2025 = 213560617698
## Target (mu0) berdasarkan rata-rata historis tahun 2025 = 213560617698
# ======================
# 3. Hitung Uji Z
# ======================
n <- length(profit_data)
x_bar <- mean(profit_data, na.rm = TRUE)
sigma <- sd(profit_data, na.rm = TRUE) # jika populasi sigma tidak diketahui, pakai SD sampel
z_stat <- (x_bar - mu0) / (sigma / sqrt(n))
# ======================
# 4. Hitung p-value
# ======================
p_value <- 2 * (1 - pnorm(abs(z_stat)))
# ======================
# 5. Tampilkan hasil
# ======================
cat("Uji Z:\n")
## Uji Z:
## Uji Z:
cat("Z =", round(z_stat,3), "\n")
## Z = -0.635
## Z = -0.635
cat("p-value =", round(p_value,5), "\n")
## p-value = 0.52567
## p-value = 0.52567
# ======================
# 6. Interpretasi singkat
# ======================
if(p_value < 0.05){
cat("Hasil: Rata-rata profit tahun", current_year, "berbeda signifikan dari rata-rata historis.\n")
} else {
cat("Hasil: Rata-rata profit tahun", current_year, "tidak berbeda signifikan dari rata-rata historis.\n")
}
## Hasil: Rata-rata profit tahun 2026 tidak berbeda signifikan dari rata-rata historis.
## Hasil: Rata-rata profit tahun 2026 tidak berbeda signifikan dari rata-rata historis.
Uji t
library(tidyverse)
# ======================
# 1. Pilih dua project_type
# ======================
types <- unique(land$project_type)[1:2] # ambil 2 project_type pertama
profit_group <- land %>%
filter(project_type %in% types) %>%
select(project_type, profit)
# ======================
# 2. Cek ringkasan data
# ======================
profit_group %>% group_by(project_type) %>% summarise(
mean_profit = mean(profit, na.rm = TRUE),
sd_profit = sd(profit, na.rm = TRUE),
n = n()
)
# ======================
# 3. Uji t independen
# ======================
t_test <- t.test(profit ~ project_type, data = profit_group, var.equal = TRUE) # asumsi varian sama
# ======================
# 4. Tampilkan hasil
# ======================
t_test
##
## Two Sample t-test
##
## data: profit by project_type
## t = -3.9333, df = 725, p-value = 9.184e-05
## alternative hypothesis: true difference in means between group Commercial and group Residential is not equal to 0
## 95 percent confidence interval:
## -52492156922 -17537708738
## sample estimates:
## mean in group Commercial mean in group Residential
## 196420443165 231435375995
##
## Two Sample t-test
##
## data: profit by project_type
## t = -3.9333, df = 725, p-value = 9.184e-05
## alternative hypothesis: true difference in means between group Commercial and group Residential is not equal to 0
## 95 percent confidence interval:
## -52492156922 -17537708738
## sample estimates:
## mean in group Commercial mean in group Residential
## 196420443165 231435375995
# ======================
# 5. Interpretasi singkat
# ======================
if(t_test$p.value < 0.05){
cat("Hasil: Rata-rata profit antara kedua project_type berbeda signifikan.\n")
} else {
cat("Hasil: Rata-rata profit antara kedua project_type tidak berbeda signifikan.\n")
}
## Hasil: Rata-rata profit antara kedua project_type berbeda signifikan.
## Hasil: Rata-rata profit antara kedua project_type berbeda signifikan.
ANOVA
library(tidyverse)
# ======================
# 1. Cek jumlah grup
# ======================
unique(land$region) # pastikan ada lebih dari 2 region
## [1] "Jabodetabek" "Jawa" "Kalimantan" "Sulawesi" "Sumatra"
## [1] "Jabodetabek" "Jawa" "Kalimantan" "Sulawesi" "Sumatra"
# ======================
# 2. Ringkasan profit per region
# ======================
land %>%
group_by(region) %>%
summarise(
mean_profit = mean(profit, na.rm = TRUE),
sd_profit = sd(profit, na.rm = TRUE),
n = n()
)
# ======================
# 3. ANOVA satu arah
# ======================
anova_model <- aov(profit ~ region, data = land)
# ======================
# 4. Lihat hasil ANOVA
# ======================
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## region 4 5.411e+23 1.353e+23 11.58 3.44e-09 ***
## Residuals 960 1.121e+25 1.168e+22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Df Sum Sq Mean Sq F value Pr(>F)
## region 4 5.411e+23 1.353e+23 11.58 3.44e-09 ***
## Residuals 960 1.121e+25 1.168e+22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ======================
# 5. Jika signifikan, lanjut post-hoc Tukey HSD
# ======================
if(summary(anova_model)[[1]]$`Pr(>F)`[1] < 0.05){
tukey_result <- TukeyHSD(anova_model)
print(tukey_result)
cat("Hasil: Ada perbedaan profit signifikan antar region.\n")
} else {
cat("Hasil: Tidak ada perbedaan profit signifikan antar region.\n")
}
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = profit ~ region, data = land)
##
## $region
## diff lwr upr p adj
## Jawa-Jabodetabek -16042509464 -46106271328 14021252400 0.5899057
## Kalimantan-Jabodetabek -58645811782 -88709573645 -28582049918 0.0000012
## Sulawesi-Jabodetabek -56705333130 -86769094994 -26641571267 0.0000031
## Sumatra-Jabodetabek -48954264174 -79018026037 -18890502310 0.0000936
## Kalimantan-Jawa -42603302318 -72667064181 -12539540454 0.0010850
## Sulawesi-Jawa -40662823666 -70726585530 -10599061803 0.0021477
## Sumatra-Jawa -32911754710 -62975516573 -2847992846 0.0237312
## Sulawesi-Kalimantan 1940478651 -28123283212 32004240515 0.9997833
## Sumatra-Kalimantan 9691547608 -20372214256 39755309472 0.9039191
## Sumatra-Sulawesi 7751068957 -22312692907 37814830820 0.9554618
##
## Hasil: Ada perbedaan profit signifikan antar region.
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = profit ~ region, data = land)
##
## $region
## diff lwr upr p adj
## Jawa-Jabodetabek -16042509464 -46106271328 14021252400 0.5899057
## Kalimantan-Jabodetabek -58645811782 -88709573645 -28582049918 0.0000012
## Sulawesi-Jabodetabek -56705333130 -86769094994 -26641571267 0.0000031
## Sumatra-Jabodetabek -48954264174 -79018026037 -18890502310 0.0000936
## Kalimantan-Jawa -42603302318 -72667064181 -12539540454 0.0010850
## Sulawesi-Jawa -40662823666 -70726585530 -10599061803 0.0021477
## Sumatra-Jawa -32911754710 -62975516573 -2847992846 0.0237312
## Sulawesi-Kalimantan 1940478651 -28123283212 32004240515 0.9997833
## Sumatra-Kalimantan 9691547608 -20372214256 39755309472 0.9039191
## Sumatra-Sulawesi 7751068957 -22312692907 37814830820 0.9554618
##
## Hasil: Ada perbedaan profit signifikan antar region.
Statistik Inferensial Non-Parametrik
Mann–Whitney U Test
library(tidyverse)
# ======================
# 1. Pilih dua project_type
# ======================
types <- unique(land$project_type)[1:2] # ambil dua project_type pertama
profit_group <- land %>%
filter(project_type %in% types) %>%
select(project_type, profit)
# ======================
# 2. Ringkasan data
# ======================
profit_group %>%
group_by(project_type) %>%
summarise(
mean_profit = mean(profit, na.rm = TRUE),
median_profit = median(profit, na.rm = TRUE),
sd_profit = sd(profit, na.rm = TRUE),
n = n()
)
# ======================
# 3. Mann–Whitney U Test
# ======================
# di R: wilcox.test untuk Mann-Whitney
mw_test <- wilcox.test(profit ~ project_type, data = profit_group, exact = FALSE)
mw_test
##
## Wilcoxon rank sum test with continuity correction
##
## data: profit by project_type
## W = 47590, p-value = 0.0001139
## alternative hypothesis: true location shift is not equal to 0
##
## Wilcoxon rank sum test with continuity correction
##
## data: profit by project_type
## W = 47590, p-value = 0.0001139
## alternative hypothesis: true location shift is not equal to 0
# ======================
# 4. Interpretasi
# ======================
if(mw_test$p.value < 0.05){
cat("Hasil: Profit antara kedua project_type berbeda signifikan (Mann-Whitney U Test).\n")
} else {
cat("Hasil: Profit antara kedua project_type tidak berbeda signifikan.\n")
}
## Hasil: Profit antara kedua project_type berbeda signifikan (Mann-Whitney U Test).
## Hasil: Profit antara kedua project_type berbeda signifikan (Mann-Whitney U Test).
Kruskal–Wallis Test
library(tidyverse)
if(!require(PMCMRplus)) install.packages("PMCMRplus")
## Loading required package: PMCMRplus
## Loading required package: PMCMRplus
library(PMCMRplus)
# ======================
# 1️⃣ Bersihkan data
# ======================
land_clean <- land %>%
filter(!is.na(profit), is.finite(profit), !is.na(region)) %>%
mutate(region = factor(region)) # pastikan region = faktor
# ======================
# 2️⃣ Hapus grup dengan <2 observasi
# ======================
valid_groups <- land_clean %>%
group_by(region) %>%
summarise(n = n()) %>%
filter(n >= 2) %>%
pull(region)
land_clean2 <- land_clean %>%
filter(region %in% valid_groups)
# ======================
# 3️⃣ Pastikan data valid
# ======================
if(nrow(land_clean2) == 0 | length(levels(land_clean2$region)) < 2){
stop("⚠️ Tidak cukup data untuk menjalankan Kruskal-Wallis Test.")
}
# ======================
# 4️⃣ Kruskal-Wallis Test
# ======================
kruskal_test <- kruskal.test(profit ~ region, data = land_clean2)
print(kruskal_test)
##
## Kruskal-Wallis rank sum test
##
## data: profit by region
## Kruskal-Wallis chi-squared = 41.678, df = 4, p-value = 1.945e-08
##
## Kruskal-Wallis rank sum test
##
## data: profit by region
## Kruskal-Wallis chi-squared = 41.678, df = 4, p-value = 1.945e-08
# ======================
# 5️⃣ Post-hoc Nemenyi jika signifikan
# ======================
if(kruskal_test$p.value < 0.05){
posthoc <- kwAllPairsNemenyiTest(profit ~ region, data = land_clean2)
print(posthoc)
}
## Warning in kwAllPairsNemenyiTest.default(c(1.5212e+11, 4.17566e+11,
## 2.20799e+11, : Ties are present, p-values are not corrected.
##
## Pairwise comparisons using Tukey-Kramer-Nemenyi all-pairs test with Tukey-Dist approximation
## data: profit by region
## Jabodetabek Jawa Kalimantan Sulawesi
## Jawa 0.80438 - - -
## Kalimantan 6.8e-06 0.00108 - -
## Sulawesi 2.3e-05 0.00276 0.99924 -
## Sumatra 0.00027 0.01716 0.93911 0.98451
##
## P value adjustment method: single-step
## alternative hypothesis: two.sided
## Warning in kwAllPairsNemenyiTest.default(c(1.5212e+11, 4.17566e+11,
## 2.20799e+11, : Ties are present, p-values are not corrected.
##
## Pairwise comparisons using Tukey-Kramer-Nemenyi all-pairs test with Tukey-Dist approximation
## data: profit by region
## Jabodetabek Jawa Kalimantan Sulawesi
## Jawa 0.80438 - - -
## Kalimantan 6.8e-06 0.00108 - -
## Sulawesi 2.3e-05 0.00276 0.99924 -
## Sumatra 0.00027 0.01716 0.93911 0.98451
##
## P value adjustment method: single-step
## alternative hypothesis: two.sided
# ======================
# 6️⃣ Interpretasi & Kesimpulan
# ======================
cat("\nKesimpulan:\n")
##
## Kesimpulan:
##
## Kesimpulan:
if(kruskal_test$p.value < 0.05){
cat("Profit berbeda signifikan antar region (p =", round(kruskal_test$p.value,5), ").\n")
cat("Post-hoc Nemenyi dapat digunakan untuk mengetahui pasangan region yang berbeda signifikan.\n")
} else {
cat("Profit tidak berbeda signifikan antar region (p =", round(kruskal_test$p.value,5), ").\n")
}
## Profit berbeda signifikan antar region (p = 0 ).
## Post-hoc Nemenyi dapat digunakan untuk mengetahui pasangan region yang berbeda signifikan.
## Profit berbeda signifikan antar region (p = 0 ).
## Post-hoc Nemenyi dapat digunakan untuk mengetahui pasangan region yang berbeda signifikan.
Chi-Square Test
library(tidyverse)
# ======================
# 1️⃣ Buat tabel kontingensi
# ======================
contingency_table <- table(land$project_type, land$region)
contingency_table
##
## Jabodetabek Jawa Kalimantan Sulawesi Sumatra
## Commercial 47 50 32 54 52
## Industrial 16 19 18 22 22
## MixedUse 34 26 35 19 27
## Residential 96 98 108 98 92
##
## Jabodetabek Jawa Kalimantan Sulawesi Sumatra
## Commercial 47 50 32 54 52
## Industrial 16 19 18 22 22
## MixedUse 34 26 35 19 27
## Residential 96 98 108 98 92
# ======================
# 2️⃣ Chi-Square Test
# ======================
chi_test <- chisq.test(contingency_table)
chi_test
##
## Pearson's Chi-squared test
##
## data: contingency_table
## X-squared = 15.427, df = 12, p-value = 0.2189
##
## Pearson's Chi-squared test
##
## data: contingency_table
## X-squared = 15.427, df = 12, p-value = 0.2189
# ======================
# 3️⃣ Interpretasi & Kesimpulan
# ======================
cat("\nKesimpulan:\n")
##
## Kesimpulan:
##
## Kesimpulan:
if(chi_test$p.value < 0.05){
cat("Ada hubungan signifikan antara project_type dan region (p =", round(chi_test$p.value,5), ").\n")
} else {
cat("Tidak ada hubungan signifikan antara project_type dan region (p =", round(chi_test$p.value,5), ").\n")
}
## Tidak ada hubungan signifikan antara project_type dan region (p = 0.21893 ).
## Tidak ada hubungan signifikan antara project_type dan region (p = 0.21893 ).