Eksplorasi Visualisasi Data: Analisis Tren dan Hubungan Nilai Rumah
di Amerika Serikat
1. Import library dan data
# Memuat paket
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Membaca dataset
housing <- read.csv("C:/Users/LENOVO/OneDrive/Documents/UGM/UGM Semester 3/Prak komstat/landdata-states.csv")
2. Histogram distribusi Home.Value
# Histogram dasar menggunakan base R
hist(housing$Home.Value)

# Histogram menggunakan ggplot2
ggplot(housing, aes(x = Home.Value)) +
geom_histogram() +
labs(title = "Distribusi Home Value", x = "Home Value", y = "Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3. Scatter plot Home.Value vs Date untuk negara bagian MA dan
TX
# Scatter plot base R
plot(Home.Value ~ Date, col = factor(State),
data = filter(housing, State %in% c("MA", "TX")))
legend("topleft", legend = c("MA", "TX"),
col = c("black", "red"), pch = 1)

# Scatter plot ggplot2
ggplot(filter(housing, State %in% c("MA", "TX")),
aes(x = Date, y = Home.Value)) +
geom_point() +
labs(title = "Home Value dari MA dan TX", x = "Date", y = "Home Value")

4. Scatter plot Structure.Cost vs log(Land.Value) untuk tahun
2001.25
# Filter data tahun 2001.25
hp <- filter(housing, Date == 2001.25)
# Scatter plot dasar
ggplot(hp,
aes(y = Structure.Cost, x = log(Land.Value))) +
geom_point() +
labs(title = "Structure Cost vs log(Land Value) (2001.25)",
x = "log(Land Value)", y = "Structure Cost")

5. Menambahkan prediksi regresi linear
# Prediksi menggunakan model regresi linear
hp$pred.sc <- predict(lm(Structure.Cost ~ log(Land.Value), data = hp))
# Plot dengan garis prediksi
p <- ggplot(hp, aes(x = log(Land.Value), y = Structure.Cost)) +
geom_point(aes(color = Home.Value)) +
geom_line(aes(y = pred.sc)) +
labs(title = "Regresi Linear: Structure Cost ~ log(Land Value)",
x = "log(Land Value)", y = "Structure Cost")
print(p)

6. Line chart Home.Value per State dari waktu ke waktu
# Line chart: satu garis per State
ggplot(housing, aes(x = Date, y = Home.Value, group = State, color = State)) +
geom_line(alpha = 0.8) +
labs(title = "Home Value per State (Line per State)",
x = "Date (Year)", y = "Home Value") +
theme_minimal()
