# -------------------------------------------
# MEMUAT DATA
# -------------------------------------------
# Bersihkan environment
rm(list = ls())
# Load data CSV
data_rumah <- read.csv("C:/Users/ASUS/Downloads/AnReg/data.csv")
# Melihat data
head(data_rumah)
## date price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2014-05-02 00:00:00 313000 3 1.50 1340 7912 1.5
## 2 2014-05-02 00:00:00 2384000 5 2.50 3650 9050 2.0
## 3 2014-05-02 00:00:00 342000 3 2.00 1930 11947 1.0
## 4 2014-05-02 00:00:00 420000 3 2.25 2000 8030 1.0
## 5 2014-05-02 00:00:00 550000 4 2.50 1940 10500 1.0
## 6 2014-05-02 00:00:00 490000 2 1.00 880 6380 1.0
## waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1 0 0 3 1340 0 1955 2005
## 2 0 4 5 3370 280 1921 0
## 3 0 0 4 1930 0 1966 0
## 4 0 0 4 1000 1000 1963 0
## 5 0 0 4 1140 800 1976 1992
## 6 0 0 3 880 0 1938 1994
## street city statezip country
## 1 18810 Densmore Ave N Shoreline WA 98133 USA
## 2 709 W Blaine St Seattle WA 98119 USA
## 3 26206-26214 143rd Ave SE Kent WA 98042 USA
## 4 857 170th Pl NE Bellevue WA 98008 USA
## 5 9105 170th Ave NE Redmond WA 98052 USA
## 6 522 NE 88th St Seattle WA 98115 USA
tail(data_rumah)
## date price bedrooms bathrooms sqft_living sqft_lot
## 4595 2014-07-09 00:00:00 210614.3 3 2.50 1610 7223
## 4596 2014-07-09 00:00:00 308166.7 3 1.75 1510 6360
## 4597 2014-07-09 00:00:00 534333.3 3 2.50 1460 7573
## 4598 2014-07-09 00:00:00 416904.2 3 2.50 3010 7014
## 4599 2014-07-10 00:00:00 203400.0 4 2.00 2090 6630
## 4600 2014-07-10 00:00:00 220600.0 3 2.50 1490 8102
## floors waterfront view condition sqft_above sqft_basement yr_built
## 4595 2 0 0 3 1610 0 1994
## 4596 1 0 0 4 1510 0 1954
## 4597 2 0 0 3 1460 0 1983
## 4598 2 0 0 3 3010 0 2009
## 4599 1 0 0 3 1070 1020 1974
## 4600 2 0 0 4 1490 0 1990
## yr_renovated street city statezip country
## 4595 0 26306 127th Ave SE Kent WA 98030 USA
## 4596 1979 501 N 143rd St Seattle WA 98133 USA
## 4597 2009 14855 SE 10th Pl Bellevue WA 98007 USA
## 4598 0 759 Ilwaco Pl NE Renton WA 98059 USA
## 4599 0 5148 S Creston St Seattle WA 98178 USA
## 4600 0 18717 SE 258th St Covington WA 98042 USA
str(data_rumah)
## 'data.frame': 4600 obs. of 18 variables:
## $ date : chr "2014-05-02 00:00:00" "2014-05-02 00:00:00" "2014-05-02 00:00:00" "2014-05-02 00:00:00" ...
## $ price : num 313000 2384000 342000 420000 550000 ...
## $ bedrooms : num 3 5 3 3 4 2 2 4 3 4 ...
## $ bathrooms : num 1.5 2.5 2 2.25 2.5 1 2 2.5 2.5 2 ...
## $ sqft_living : int 1340 3650 1930 2000 1940 880 1350 2710 2430 1520 ...
## $ sqft_lot : int 7912 9050 11947 8030 10500 6380 2560 35868 88426 6200 ...
## $ floors : num 1.5 2 1 1 1 1 1 2 1 1.5 ...
## $ waterfront : int 0 0 0 0 0 0 0 0 0 0 ...
## $ view : int 0 4 0 0 0 0 0 0 0 0 ...
## $ condition : int 3 5 4 4 4 3 3 3 4 3 ...
## $ sqft_above : int 1340 3370 1930 1000 1140 880 1350 2710 1570 1520 ...
## $ sqft_basement: int 0 280 0 1000 800 0 0 0 860 0 ...
## $ yr_built : int 1955 1921 1966 1963 1976 1938 1976 1989 1985 1945 ...
## $ yr_renovated : int 2005 0 0 0 1992 1994 0 0 0 2010 ...
## $ street : chr "18810 Densmore Ave N" "709 W Blaine St" "26206-26214 143rd Ave SE" "857 170th Pl NE" ...
## $ city : chr "Shoreline" "Seattle" "Kent" "Bellevue" ...
## $ statezip : chr "WA 98133" "WA 98119" "WA 98042" "WA 98008" ...
## $ country : chr "USA" "USA" "USA" "USA" ...
# -------------------------------------------
# MEMILIH VARIABEL UNTUK REGRESI SEDERHANA
# -------------------------------------------
sqft_living <- data_rumah$sqft_living
price <- data_rumah$price
# Membuat data frame analisis
data_analisis <- data.frame(sqft_living, price)
# Menampilkan data
print(head(data_analisis))
## sqft_living price
## 1 1340 313000
## 2 3650 2384000
## 3 1930 342000
## 4 2000 420000
## 5 1940 550000
## 6 880 490000
# -------------------------------------------
# STATISTIK DESKRIPTIF
# -------------------------------------------
summary(data_analisis)
## sqft_living price
## Min. : 370 Min. : 0
## 1st Qu.: 1460 1st Qu.: 322875
## Median : 1980 Median : 460944
## Mean : 2139 Mean : 551963
## 3rd Qu.: 2620 3rd Qu.: 654963
## Max. :13540 Max. :26590000
# Standar deviasi
sd(sqft_living, na.rm = TRUE)
## [1] 963.2069
sd(price, na.rm = TRUE)
## [1] 563834.7
# -------------------------------------------
# UJI KORELASI PEARSON
# -------------------------------------------
hasil_korelasi <- cor.test(
sqft_living,
price,
method = "pearson"
)
print(hasil_korelasi)
##
## Pearson's product-moment correlation
##
## data: sqft_living and price
## t = 32.334, df = 4598, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4065677 0.4536665
## sample estimates:
## cor
## 0.43041
# -------------------------------------------
# SCATTER PLOT DAN GARIS REGRESI
# -------------------------------------------
plot(sqft_living, price,
main = "Scatter Plot Luas Rumah vs Harga Rumah",
xlab = "Luas Rumah (sqft_living)",
ylab = "Harga Rumah (price)",
pch = 19,
col = "blue")
abline(lm(price ~ sqft_living),
col = "red",
lwd = 2)

# -------------------------------------------
# UJI KORELASI SPEARMAN
# -------------------------------------------
cor.test(sqft_living, price, method = "spearman")
## Warning in cor.test.default(sqft_living, price, method = "spearman"): Cannot
## compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: sqft_living and price
## S = 5981876170, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6312643
# -------------------------------------------
# VISUALISASI GGPlot2
# -------------------------------------------
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
ggplot(data_analisis,
aes(x = sqft_living, y = price)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "Hubungan Luas Rumah dan Harga Rumah",
x = "Luas Rumah",
y = "Harga Rumah") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# -------------------------------------------
# MATRIKS KORELASI
# -------------------------------------------
# Pilih variabel numerik saja
data_numeric <- data_rumah[, c(
"price",
"bedrooms",
"bathrooms",
"sqft_living",
"floors",
"condition"
)]
# Hitung korelasi
matriks_korelasi <- cor(data_numeric,
use = "complete.obs")
print(matriks_korelasi)
## price bedrooms bathrooms sqft_living floors condition
## price 1.00000000 0.20033629 0.3271099 0.43041003 0.1514608 0.03491454
## bedrooms 0.20033629 1.00000000 0.5459199 0.59488406 0.1778949 0.02507986
## bathrooms 0.32710992 0.54591993 1.0000000 0.76115370 0.4864276 -0.11999434
## sqft_living 0.43041003 0.59488406 0.7611537 1.00000000 0.3448503 -0.06282598
## floors 0.15146080 0.17789490 0.4864276 0.34485027 1.0000000 -0.27501339
## condition 0.03491454 0.02507986 -0.1199943 -0.06282598 -0.2750134 1.00000000
# Heatmap korelasi
heatmap(matriks_korelasi)
