Eksplorasi Visualisasi Data: Analisis Tren dan Hubungan Nilai Rumah di Amerika Serikat

1. Import library dan data

# Memuat paket
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Membaca dataset
housing <- read.csv("C:/Users/LENOVO/OneDrive/Documents/UGM/UGM Semester 3/Prak komstat/landdata-states.csv")

2. Histogram distribusi Home.Value

# Histogram dasar menggunakan base R
hist(housing$Home.Value)

# Histogram menggunakan ggplot2
ggplot(housing, aes(x = Home.Value)) +
  geom_histogram() +
  labs(title = "Distribusi Home Value", x = "Home Value", y = "Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3. Scatter plot Home.Value vs Date untuk negara bagian MA dan TX

# Scatter plot base R
plot(Home.Value ~ Date, col = factor(State),
     data = filter(housing, State %in% c("MA", "TX")))

legend("topleft", legend = c("MA", "TX"),
       col = c("black", "red"), pch = 1)

# Scatter plot ggplot2
ggplot(filter(housing, State %in% c("MA", "TX")),
       aes(x = Date, y = Home.Value)) +
  geom_point() +
  labs(title = "Home Value dari MA dan TX", x = "Date", y = "Home Value")

4. Scatter plot Structure.Cost vs log(Land.Value) untuk tahun 2001.25

# Filter data tahun 2001.25
hp <- filter(housing, Date == 2001.25)

# Scatter plot dasar
ggplot(hp,
       aes(y = Structure.Cost, x = log(Land.Value))) +
  geom_point() +
  labs(title = "Structure Cost vs log(Land Value) (2001.25)",
       x = "log(Land Value)", y = "Structure Cost")

5. Menambahkan prediksi regresi linear

# Prediksi menggunakan model regresi linear
hp$pred.sc <- predict(lm(Structure.Cost ~ log(Land.Value), data = hp))

# Plot dengan garis prediksi
p <- ggplot(hp, aes(x = log(Land.Value), y = Structure.Cost)) +
  geom_point(aes(color = Home.Value)) +
  geom_line(aes(y = pred.sc)) +
  labs(title = "Regresi Linear: Structure Cost ~ log(Land Value)",
       x = "log(Land Value)", y = "Structure Cost")

print(p)

6. Line chart Home.Value per State dari waktu ke waktu

# Line chart: satu garis per State
ggplot(housing, aes(x = Date, y = Home.Value, group = State, color = State)) +
  geom_line(alpha = 0.8) +
  labs(title = "Home Value per State (Line per State)",
       x = "Date (Year)", y = "Home Value") +
  theme_minimal()