R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(readxl)
## Warning: package 'readxl' was built under R version 4.5.2
online_retail <- read_xlsx("online_retail.xlsx")
head(online_retail)
## # A tibble: 6 Ă— 8
##   InvoiceNo StockCode Description         Quantity InvoiceDate         UnitPrice
##   <chr>     <chr>     <chr>                  <dbl> <dttm>                  <dbl>
## 1 536365    85123A    WHITE HANGING HEAR…        6 2010-12-01 08:26:00      2.55
## 2 536365    71053     WHITE METAL LANTERN        6 2010-12-01 08:26:00      3.39
## 3 536365    84406B    CREAM CUPID HEARTS…        8 2010-12-01 08:26:00      2.75
## 4 536365    84029G    KNITTED UNION FLAG…        6 2010-12-01 08:26:00      3.39
## 5 536365    84029E    RED WOOLLY HOTTIE …        6 2010-12-01 08:26:00      3.39
## 6 536365    22752     SET 7 BABUSHKA NES…        2 2010-12-01 08:26:00      7.65
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
library(readxl)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
data_raw <- read_xlsx("online_retail.xlsx")
cat("Data berhasil dimuat. jumlah baris awal:", nrow(data_raw), "\n")
## Data berhasil dimuat. jumlah baris awal: 541909
online_retail_clean <- data_raw %>%
  mutate(TotalPrice = Quantity * UnitPrice) %>%
  filter(!is.na(CustomerID)) %>%
  filter(!grepl("^C", InvoiceNo)) %>%
  filter(Quantity > 0, UnitPrice > 0) %>%
  filter(TotalPrice < 1000)
cat("Data setelah cleaning dan engineering. Jumlah baris akhir:", nrow(online_retail_clean), "\n")
## Data setelah cleaning dan engineering. Jumlah baris akhir: 397568
online_retail_clean <- online_retail_clean %>%
  mutate(TotalPrice = Quantity * UnitPrice)
data_model <- online_retail_clean %>%
  select(X = Quantity, Y = TotalPrice)

cat("\nVariabel X (Independen): Quantity\n")
## 
## Variabel X (Independen): Quantity
cat("Variabel Y (Dependen): TotalPrice\n")
## Variabel Y (Dependen): TotalPrice
model_regresi <- lm(Y ~ X, data = data_model)

cat("\n--- RINGKASAN MODEL REGRESI LINEAR ---\n")
## 
## --- RINGKASAN MODEL REGRESI LINEAR ---
print(summary(model_regresi))
## 
## Call:
## lm(formula = Y ~ X, data = data_model)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2225.85   -10.08    -5.13     2.10   896.34 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 11.064902   0.057192   193.5   <2e-16 ***
## X            0.755162   0.001566   482.2   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 34.02 on 397566 degrees of freedom
## Multiple R-squared:  0.369,  Adjusted R-squared:  0.369 
## F-statistic: 2.325e+05 on 1 and 397566 DF,  p-value: < 2.2e-16
plot_regresi <- ggplot(data_model, aes(x = X, y = Y)) +
  geom_point(alpha = 0.3, color = "#007BFF") +
  geom_smooth(method = "lm", col = "#DC3545", se = TRUE) +
  labs(title = "Regresi Linear: Total Harga vs. Kuantitas Barang",
       x = "Kuantitas Barang (Quantity) - Variabel X",
       y = "Total Harga (TotalPrice) - Variabel Y",
       caption = "Data dibatasi TotalPrice < 1000 GBP untuk visualisasi") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"))

print(plot_regresi)
## `geom_smooth()` using formula = 'y ~ x'

koefisien <- coef(model_regresi)
cat("\nPersamaan Regresi (Y = intercept + slope * X):\n")
## 
## Persamaan Regresi (Y = intercept + slope * X):
cat(sprintf("TotalPrice = %.2f + %.2f * Quantity\n", koefisien[1], koefisien[2]))
## TotalPrice = 11.06 + 0.76 * Quantity
online_retail_clean <- data_raw %>%
  filter(!is.na(CustomerID))

cat("   - Aksi 1: Hapus NA di CustomerID. Baris terhapus:", nrow(data_raw) - nrow(online_retail_clean), "\n")
##    - Aksi 1: Hapus NA di CustomerID. Baris terhapus: 135080
cat("   - Sisa baris setelah Aksi 1:", nrow(online_retail_clean), "\n\n")
##    - Sisa baris setelah Aksi 1: 406829
cat("3. Perbaikan Struktur Data (Tipe Data):\n")
## 3. Perbaikan Struktur Data (Tipe Data):
cat("   - InvoiceDate diubah menjadi Tipe Data: ", class(online_retail_clean$InvoiceDate), "\n")
##    - InvoiceDate diubah menjadi Tipe Data:  POSIXct POSIXt
cat("   - CustomerID diubah menjadi Tipe Data: ", class(online_retail_clean$CustomerID), "\n\n")
##    - CustomerID diubah menjadi Tipe Data:  numeric
online_retail_clean <- online_retail_clean %>%
  filter(Quantity > 0, UnitPrice > 0)

cat("   - Aksi 4: Hapus Quantity <= 0 dan UnitPrice <= 0. Sisa baris:", nrow(online_retail_clean), "\n\n")
##    - Aksi 4: Hapus Quantity <= 0 dan UnitPrice <= 0. Sisa baris: 397884
online_retail_final <- online_retail_clean %>%
  mutate(TotalPrice = Quantity * UnitPrice)
cat("5. Ringkasan Proses Cleaning:\n")
## 5. Ringkasan Proses Cleaning:
cat("   - Total baris awal:", nrow(data_raw), "\n")
##    - Total baris awal: 541909
cat("   - Total baris akhir:", nrow(online_retail_final), "\n")
##    - Total baris akhir: 397884
cat("   - Struktur Data Akhir:\n")
##    - Struktur Data Akhir:
print(glimpse(online_retail))
## Rows: 541,909
## Columns: 8
## $ InvoiceNo   <chr> "536365", "536365", "536365", "536365", "536365", "536365"…
## $ StockCode   <chr> "85123A", "71053", "84406B", "84029G", "84029E", "22752", …
## $ Description <chr> "WHITE HANGING HEART T-LIGHT HOLDER", "WHITE METAL LANTERN…
## $ Quantity    <dbl> 6, 6, 8, 6, 6, 2, 6, 6, 6, 32, 6, 6, 8, 6, 6, 3, 2, 3, 3, …
## $ InvoiceDate <dttm> 2010-12-01 08:26:00, 2010-12-01 08:26:00, 2010-12-01 08:2…
## $ UnitPrice   <dbl> 2.55, 3.39, 2.75, 3.39, 3.39, 7.65, 4.25, 1.85, 1.85, 1.69…
## $ CustomerID  <dbl> 17850, 17850, 17850, 17850, 17850, 17850, 17850, 17850, 17…
## $ Country     <chr> "United Kingdom", "United Kingdom", "United Kingdom", "Uni…
## # A tibble: 541,909 Ă— 8
##    InvoiceNo StockCode Description        Quantity InvoiceDate         UnitPrice
##    <chr>     <chr>     <chr>                 <dbl> <dttm>                  <dbl>
##  1 536365    85123A    WHITE HANGING HEA…        6 2010-12-01 08:26:00      2.55
##  2 536365    71053     WHITE METAL LANTE…        6 2010-12-01 08:26:00      3.39
##  3 536365    84406B    CREAM CUPID HEART…        8 2010-12-01 08:26:00      2.75
##  4 536365    84029G    KNITTED UNION FLA…        6 2010-12-01 08:26:00      3.39
##  5 536365    84029E    RED WOOLLY HOTTIE…        6 2010-12-01 08:26:00      3.39
##  6 536365    22752     SET 7 BABUSHKA NE…        2 2010-12-01 08:26:00      7.65
##  7 536365    21730     GLASS STAR FROSTE…        6 2010-12-01 08:26:00      4.25
##  8 536366    22633     HAND WARMER UNION…        6 2010-12-01 08:28:00      1.85
##  9 536366    22632     HAND WARMER RED P…        6 2010-12-01 08:28:00      1.85
## 10 536367    84879     ASSORTED COLOUR B…       32 2010-12-01 08:34:00      1.69
## # ℹ 541,899 more rows
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” forcats   1.0.1     âś” stringr   1.5.2
## âś” lubridate 1.9.4     âś” tibble    3.3.0
## âś” purrr     1.2.0     âś” tidyr     1.3.1
## âś” readr     2.1.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych)
## Warning: package 'psych' was built under R version 4.5.2
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.5.2
## corrplot 0.95 loaded
data <- read_excel("online_retail.xlsx")
library(dplyr)
library(corrplot)
library(psych)
summary(select(data, where(is.numeric)))
##     Quantity            UnitPrice            CustomerID    
##  Min.   :-80995.000   Min.   :-11062.060   Min.   :12346   
##  1st Qu.:     1.000   1st Qu.:     1.250   1st Qu.:13953   
##  Median :     3.000   Median :     2.080   Median :15152   
##  Mean   :     9.552   Mean   :     4.611   Mean   :15288   
##  3rd Qu.:    10.000   3rd Qu.:     4.130   3rd Qu.:16791   
##  Max.   : 80995.000   Max.   : 38970.000   Max.   :18287   
##                                            NA's   :135080
numeric_data <- select(data, where(is.numeric))
cor_matrix <- cor(numeric_data, use = "complete.obs")
corrplot(cor_matrix, method = "circle", type = "upper", tl.cex = 0.8)

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

pressure2 <- pressure
plot(pressure2)

You can also embed plots, for example:

library(tidyverse)
library(readxl)

# 1. Baca file Excel
online_retail <- read_excel("online_retail.xlsx")

# 2. Buat kolom TotalSales (Quantity * UnitPrice)
online_retail <- online_retail %>%
  mutate(TotalSales = Quantity * UnitPrice)

# 3. Hitung 5 produk terlaris berdasarkan StockCode
produk_terlaris <- online_retail %>%
  group_by(StockCode) %>%
  summarise(TotalSales = sum(TotalSales), .groups = "drop") %>%
  arrange(desc(TotalSales)) %>%
  slice_head(n = 5)

# 4. Buat grafik batang horizontal
ggplot(produk_terlaris, aes(x = reorder(StockCode, TotalSales), y = TotalSales)) +
  geom_bar(stat = "identity", fill = "maroon") +
  coord_flip() +
  labs(
    title = "5 Produk Terlaris Berdasarkan StockCode",
    x = "StockCode",
    y = "Total Penjualan"
  ) +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 10))

colnames(online_retail)
## [1] "InvoiceNo"   "StockCode"   "Description" "Quantity"    "InvoiceDate"
## [6] "UnitPrice"   "CustomerID"  "Country"     "TotalSales"
library(readxl)
library(ggplot2)

# Baca file Excel
online_retail <- read_excel("online_retail.xlsx")

# Buat kolom bulan
online_retail$bulan <- format(online_retail$InvoiceDate, "%b")

# Hitung jumlah transaksi per bulan
tab <- as.data.frame(table(online_retail$bulan))
names(tab) <- c("bulan", "realisasi")

# Urutkan bulan Jan–Dec
tab$bulan <- factor(tab$bulan, levels = month.abb)
tab <- tab[order(tab$bulan), ]

# Tambah target, selisih, status
tab$target  <- round(mean(tab$realisasi) * 1.10)
tab$selisih <- tab$realisasi - tab$target
tab$status  <- ifelse(tab$realisasi >= tab$target, "Tercapai", "Belum Tercapai")

# Grafik
ggplot(tab, aes(x = bulan)) +
  geom_col(aes(y = target), fill = "lightblue", alpha = 0.6) +
  geom_line(aes(y = realisasi, group = 1), color = "darkblue", linewidth = 1) +
  geom_point(aes(y = realisasi, color = status), size = 3) +
  scale_color_manual(values = c("Tercapai" = "forestgreen", "Belum Tercapai" = "red")) +
  theme_minimal()