title: “Visualisasi Data” subtitle: “Exercises ~ Week 2” author: - “Kelompok 3” - “Naifah Edria Arta (52250056)” - “Frizzy Lithmensyah (52250062)” - “Lulu Najla Salsabila (52250069)” - “Naila Syahrani Putri (52250070)” - “Ni. MD Aurora Sekarningrum (52250072)” date: “November 02, 2025” output: rmdformats::readthedown: self_contained: true thumbnails: true lightbox: true gallery: true number_sections: true lib_dir: libs df_print: paged code_folding: show code_download: true —


# --- 1. Load package yang dibutuhkan ---
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(modeest)   # untuk menghitung modus
## Warning: package 'modeest' was built under R version 4.5.2
# --- 2. Import dataset ---
dataset <- read.csv("https://raw.githubusercontent.com/YanDraa/Dataweek6Statistika/main/4_Central_Tendency_Introduction_to_Statistics.csv")

# --- 3. Cek struktur data ---
str(dataset)
## 'data.frame':    200 obs. of  9 variables:
##  $ X              : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CustomerID     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Age            : int  32 37 63 41 42 66 47 21 30 33 ...
##  $ Gender         : chr  "M" "F" "M" "M" ...
##  $ StoreLocation  : chr  "West" "South" "West" "North" ...
##  $ ProductCategory: chr  "Electronics" "Books" "Electronics" "Sports" ...
##  $ TotalPurchase  : int  528 72 327 391 514 381 510 102 559 27 ...
##  $ NumberOfVisits : int  4 4 4 7 7 6 5 4 2 5 ...
##  $ FeedbackScore  : int  1 5 2 1 5 3 1 2 2 2 ...
summary(dataset)
##        X            CustomerID          Age           Gender         
##  Min.   :  1.00   Min.   :  1.00   Min.   :18.00   Length:200        
##  1st Qu.: 50.75   1st Qu.: 50.75   1st Qu.:31.00   Class :character  
##  Median :100.50   Median :100.50   Median :39.00   Mode  :character  
##  Mean   :100.50   Mean   :100.50   Mean   :39.99                     
##  3rd Qu.:150.25   3rd Qu.:150.25   3rd Qu.:48.25                     
##  Max.   :200.00   Max.   :200.00   Max.   :70.00                     
##  StoreLocation      ProductCategory    TotalPurchase    NumberOfVisits  
##  Length:200         Length:200         Min.   :  11.0   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:  68.0   1st Qu.: 4.000  
##  Mode  :character   Mode  :character   Median : 108.5   Median : 5.000  
##                                        Mean   : 211.8   Mean   : 5.165  
##                                        3rd Qu.: 381.2   3rd Qu.: 7.000  
##                                        Max.   :1128.0   Max.   :11.000  
##  FeedbackScore
##  Min.   :1.0  
##  1st Qu.:1.0  
##  Median :3.0  
##  Mean   :2.8  
##  3rd Qu.:4.0  
##  Max.   :5.0
# --- 4. Hitung ukuran tendensi sentral ---

# TotalPurchase
mean_total <- mean(dataset$TotalPurchase, na.rm = TRUE)
median_total <- median(dataset$TotalPurchase, na.rm = TRUE)
mode_total <- mfv(dataset$TotalPurchase, na_rm = TRUE)

# Age
mean_age <- mean(dataset$Age, na.rm = TRUE)
median_age <- median(dataset$Age, na.rm = TRUE)
mode_age <- mfv(dataset$Age, na_rm = TRUE)

# NumberOfVisits
mean_visit <- mean(dataset$NumberOfVisits, na.rm = TRUE)
median_visit <- median(dataset$NumberOfVisits, na.rm = TRUE)
mode_visit <- mfv(dataset$NumberOfVisits, na_rm = TRUE)

# --- 5. Tampilkan hasil ---
cat("===== Central Tendency =====\n")
## ===== Central Tendency =====
cat("TotalPurchase -> Mean:", mean_total, " | Median:", median_total, " | Mode:", mode_total, "\n")
## TotalPurchase -> Mean: 211.795  | Median: 108.5  | Mode: 33
cat("Age -> Mean:", mean_age, " | Median:", median_age, " | Mode:", mode_age, "\n")
## Age -> Mean: 39.99  | Median: 39  | Mode: 18
cat("NumberOfVisits -> Mean:", mean_visit, " | Median:", median_visit, " | Mode:", mode_visit, "\n")
## NumberOfVisits -> Mean: 5.165  | Median: 5  | Mode: 5
# --- 6. Visualisasi Data ---

## 6a. Histogram TotalPurchase
ggplot(dataset, aes(x = TotalPurchase)) +
  geom_histogram(binwidth = 10, fill = "#4e79a7", color = "white") +
  geom_vline(aes(xintercept = median_total), color = "red", linetype = "dashed", linewidth = 1) +
  labs(title = "Distribusi TotalPurchase",
       x = "Total Purchase",
       y = "Frekuensi",
       subtitle = "Garis merah menunjukkan median") +
  theme_minimal()

## 6b. Boxplot TotalPurchase
ggplot(dataset, aes(y = TotalPurchase)) +
  geom_boxplot(fill = "#f28e2b", color = "black") +
  labs(title = "Boxplot TotalPurchase",
       y = "Total Purchase") +
  theme_minimal()

## 6c. Histogram Age
ggplot(dataset, aes(x = Age)) +
  geom_histogram(binwidth = 2, fill = "#59a14f", color = "white") +
  geom_vline(aes(xintercept = mean_age), color = "blue", linetype = "dashed", linewidth = 1) +
  labs(title = "Distribusi Usia Pelanggan (Age)",
       x = "Usia",
       y = "Frekuensi",
       subtitle = "Garis biru menunjukkan mean") +
  theme_minimal()

## 6d. Boxplot Age
ggplot(dataset, aes(y = Age)) +
  geom_boxplot(fill = "#edc948", color = "black") +
  labs(title = "Boxplot Usia Pelanggan",
       y = "Age") +
  theme_minimal()

## 6e. Histogram NumberOfVisits
ggplot(dataset, aes(x = NumberOfVisits)) +
  geom_histogram(binwidth = 1, fill = "#e15759", color = "white") +
  geom_vline(aes(xintercept = mean_visit), color = "blue", linetype = "dashed", linewidth = 1) +
  labs(title = "Distribusi Jumlah Kunjungan (NumberOfVisits)",
       x = "Jumlah Kunjungan",
       y = "Frekuensi",
       subtitle = "Garis biru menunjukkan mean") +
  theme_minimal()

## 6f. Boxplot NumberOfVisits
ggplot(dataset, aes(y = NumberOfVisits)) +
  geom_boxplot(fill = "#b07aa1", color = "black") +
  labs(title = "Boxplot Jumlah Kunjungan (NumberOfVisits)",
       y = "Number of Visits") +
  theme_minimal()