title: “Visualisasi Data” subtitle: “Exercises ~ Week 2” author: - “Kelompok 3” - “Naifah Edria Arta (52250056)” - “Frizzy Lithmensyah (52250062)” - “Lulu Najla Salsabila (52250069)” - “Naila Syahrani Putri (52250070)” - “Ni. MD Aurora Sekarningrum (52250072)” date: “November 02, 2025” output: rmdformats::readthedown: self_contained: true thumbnails: true lightbox: true gallery: true number_sections: true lib_dir: libs df_print: paged code_folding: show code_download: true —
# --- 1. Load package yang dibutuhkan ---
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(modeest) # untuk menghitung modus
## Warning: package 'modeest' was built under R version 4.5.2
# --- 2. Import dataset ---
dataset <- read.csv("https://raw.githubusercontent.com/YanDraa/Dataweek6Statistika/main/4_Central_Tendency_Introduction_to_Statistics.csv")
# --- 3. Cek struktur data ---
str(dataset)
## 'data.frame': 200 obs. of 9 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ CustomerID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Age : int 32 37 63 41 42 66 47 21 30 33 ...
## $ Gender : chr "M" "F" "M" "M" ...
## $ StoreLocation : chr "West" "South" "West" "North" ...
## $ ProductCategory: chr "Electronics" "Books" "Electronics" "Sports" ...
## $ TotalPurchase : int 528 72 327 391 514 381 510 102 559 27 ...
## $ NumberOfVisits : int 4 4 4 7 7 6 5 4 2 5 ...
## $ FeedbackScore : int 1 5 2 1 5 3 1 2 2 2 ...
summary(dataset)
## X CustomerID Age Gender
## Min. : 1.00 Min. : 1.00 Min. :18.00 Length:200
## 1st Qu.: 50.75 1st Qu.: 50.75 1st Qu.:31.00 Class :character
## Median :100.50 Median :100.50 Median :39.00 Mode :character
## Mean :100.50 Mean :100.50 Mean :39.99
## 3rd Qu.:150.25 3rd Qu.:150.25 3rd Qu.:48.25
## Max. :200.00 Max. :200.00 Max. :70.00
## StoreLocation ProductCategory TotalPurchase NumberOfVisits
## Length:200 Length:200 Min. : 11.0 Min. : 1.000
## Class :character Class :character 1st Qu.: 68.0 1st Qu.: 4.000
## Mode :character Mode :character Median : 108.5 Median : 5.000
## Mean : 211.8 Mean : 5.165
## 3rd Qu.: 381.2 3rd Qu.: 7.000
## Max. :1128.0 Max. :11.000
## FeedbackScore
## Min. :1.0
## 1st Qu.:1.0
## Median :3.0
## Mean :2.8
## 3rd Qu.:4.0
## Max. :5.0
# --- 4. Hitung ukuran tendensi sentral ---
# TotalPurchase
mean_total <- mean(dataset$TotalPurchase, na.rm = TRUE)
median_total <- median(dataset$TotalPurchase, na.rm = TRUE)
mode_total <- mfv(dataset$TotalPurchase, na_rm = TRUE)
# Age
mean_age <- mean(dataset$Age, na.rm = TRUE)
median_age <- median(dataset$Age, na.rm = TRUE)
mode_age <- mfv(dataset$Age, na_rm = TRUE)
# NumberOfVisits
mean_visit <- mean(dataset$NumberOfVisits, na.rm = TRUE)
median_visit <- median(dataset$NumberOfVisits, na.rm = TRUE)
mode_visit <- mfv(dataset$NumberOfVisits, na_rm = TRUE)
# --- 5. Tampilkan hasil ---
cat("===== Central Tendency =====\n")
## ===== Central Tendency =====
cat("TotalPurchase -> Mean:", mean_total, " | Median:", median_total, " | Mode:", mode_total, "\n")
## TotalPurchase -> Mean: 211.795 | Median: 108.5 | Mode: 33
cat("Age -> Mean:", mean_age, " | Median:", median_age, " | Mode:", mode_age, "\n")
## Age -> Mean: 39.99 | Median: 39 | Mode: 18
cat("NumberOfVisits -> Mean:", mean_visit, " | Median:", median_visit, " | Mode:", mode_visit, "\n")
## NumberOfVisits -> Mean: 5.165 | Median: 5 | Mode: 5
# --- 6. Visualisasi Data ---
## 6a. Histogram TotalPurchase
ggplot(dataset, aes(x = TotalPurchase)) +
geom_histogram(binwidth = 10, fill = "#4e79a7", color = "white") +
geom_vline(aes(xintercept = median_total), color = "red", linetype = "dashed", linewidth = 1) +
labs(title = "Distribusi TotalPurchase",
x = "Total Purchase",
y = "Frekuensi",
subtitle = "Garis merah menunjukkan median") +
theme_minimal()
## 6b. Boxplot TotalPurchase
ggplot(dataset, aes(y = TotalPurchase)) +
geom_boxplot(fill = "#f28e2b", color = "black") +
labs(title = "Boxplot TotalPurchase",
y = "Total Purchase") +
theme_minimal()
## 6c. Histogram Age
ggplot(dataset, aes(x = Age)) +
geom_histogram(binwidth = 2, fill = "#59a14f", color = "white") +
geom_vline(aes(xintercept = mean_age), color = "blue", linetype = "dashed", linewidth = 1) +
labs(title = "Distribusi Usia Pelanggan (Age)",
x = "Usia",
y = "Frekuensi",
subtitle = "Garis biru menunjukkan mean") +
theme_minimal()
## 6d. Boxplot Age
ggplot(dataset, aes(y = Age)) +
geom_boxplot(fill = "#edc948", color = "black") +
labs(title = "Boxplot Usia Pelanggan",
y = "Age") +
theme_minimal()
## 6e. Histogram NumberOfVisits
ggplot(dataset, aes(x = NumberOfVisits)) +
geom_histogram(binwidth = 1, fill = "#e15759", color = "white") +
geom_vline(aes(xintercept = mean_visit), color = "blue", linetype = "dashed", linewidth = 1) +
labs(title = "Distribusi Jumlah Kunjungan (NumberOfVisits)",
x = "Jumlah Kunjungan",
y = "Frekuensi",
subtitle = "Garis biru menunjukkan mean") +
theme_minimal()
## 6f. Boxplot NumberOfVisits
ggplot(dataset, aes(y = NumberOfVisits)) +
geom_boxplot(fill = "#b07aa1", color = "black") +
labs(title = "Boxplot Jumlah Kunjungan (NumberOfVisits)",
y = "Number of Visits") +
theme_minimal()