# -------------------------------------------
# MEMUAT DATA
# -------------------------------------------

# Bersihkan environment
rm(list = ls())

# Load data CSV
data_rumah <- read.csv("C:/Users/ASUS/Downloads/AnReg/data.csv")

# Melihat data
head(data_rumah)
##                  date   price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2014-05-02 00:00:00  313000        3      1.50        1340     7912    1.5
## 2 2014-05-02 00:00:00 2384000        5      2.50        3650     9050    2.0
## 3 2014-05-02 00:00:00  342000        3      2.00        1930    11947    1.0
## 4 2014-05-02 00:00:00  420000        3      2.25        2000     8030    1.0
## 5 2014-05-02 00:00:00  550000        4      2.50        1940    10500    1.0
## 6 2014-05-02 00:00:00  490000        2      1.00         880     6380    1.0
##   waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1          0    0         3       1340             0     1955         2005
## 2          0    4         5       3370           280     1921            0
## 3          0    0         4       1930             0     1966            0
## 4          0    0         4       1000          1000     1963            0
## 5          0    0         4       1140           800     1976         1992
## 6          0    0         3        880             0     1938         1994
##                     street      city statezip country
## 1     18810 Densmore Ave N Shoreline WA 98133     USA
## 2          709 W Blaine St   Seattle WA 98119     USA
## 3 26206-26214 143rd Ave SE      Kent WA 98042     USA
## 4          857 170th Pl NE  Bellevue WA 98008     USA
## 5        9105 170th Ave NE   Redmond WA 98052     USA
## 6           522 NE 88th St   Seattle WA 98115     USA
tail(data_rumah)
##                     date    price bedrooms bathrooms sqft_living sqft_lot
## 4595 2014-07-09 00:00:00 210614.3        3      2.50        1610     7223
## 4596 2014-07-09 00:00:00 308166.7        3      1.75        1510     6360
## 4597 2014-07-09 00:00:00 534333.3        3      2.50        1460     7573
## 4598 2014-07-09 00:00:00 416904.2        3      2.50        3010     7014
## 4599 2014-07-10 00:00:00 203400.0        4      2.00        2090     6630
## 4600 2014-07-10 00:00:00 220600.0        3      2.50        1490     8102
##      floors waterfront view condition sqft_above sqft_basement yr_built
## 4595      2          0    0         3       1610             0     1994
## 4596      1          0    0         4       1510             0     1954
## 4597      2          0    0         3       1460             0     1983
## 4598      2          0    0         3       3010             0     2009
## 4599      1          0    0         3       1070          1020     1974
## 4600      2          0    0         4       1490             0     1990
##      yr_renovated             street      city statezip country
## 4595            0 26306 127th Ave SE      Kent WA 98030     USA
## 4596         1979     501 N 143rd St   Seattle WA 98133     USA
## 4597         2009   14855 SE 10th Pl  Bellevue WA 98007     USA
## 4598            0   759 Ilwaco Pl NE    Renton WA 98059     USA
## 4599            0  5148 S Creston St   Seattle WA 98178     USA
## 4600            0  18717 SE 258th St Covington WA 98042     USA
str(data_rumah)
## 'data.frame':    4600 obs. of  18 variables:
##  $ date         : chr  "2014-05-02 00:00:00" "2014-05-02 00:00:00" "2014-05-02 00:00:00" "2014-05-02 00:00:00" ...
##  $ price        : num  313000 2384000 342000 420000 550000 ...
##  $ bedrooms     : num  3 5 3 3 4 2 2 4 3 4 ...
##  $ bathrooms    : num  1.5 2.5 2 2.25 2.5 1 2 2.5 2.5 2 ...
##  $ sqft_living  : int  1340 3650 1930 2000 1940 880 1350 2710 2430 1520 ...
##  $ sqft_lot     : int  7912 9050 11947 8030 10500 6380 2560 35868 88426 6200 ...
##  $ floors       : num  1.5 2 1 1 1 1 1 2 1 1.5 ...
##  $ waterfront   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ view         : int  0 4 0 0 0 0 0 0 0 0 ...
##  $ condition    : int  3 5 4 4 4 3 3 3 4 3 ...
##  $ sqft_above   : int  1340 3370 1930 1000 1140 880 1350 2710 1570 1520 ...
##  $ sqft_basement: int  0 280 0 1000 800 0 0 0 860 0 ...
##  $ yr_built     : int  1955 1921 1966 1963 1976 1938 1976 1989 1985 1945 ...
##  $ yr_renovated : int  2005 0 0 0 1992 1994 0 0 0 2010 ...
##  $ street       : chr  "18810 Densmore Ave N" "709 W Blaine St" "26206-26214 143rd Ave SE" "857 170th Pl NE" ...
##  $ city         : chr  "Shoreline" "Seattle" "Kent" "Bellevue" ...
##  $ statezip     : chr  "WA 98133" "WA 98119" "WA 98042" "WA 98008" ...
##  $ country      : chr  "USA" "USA" "USA" "USA" ...
# -------------------------------------------
# MEMILIH VARIABEL UNTUK REGRESI SEDERHANA
# -------------------------------------------

sqft_living <- data_rumah$sqft_living
price <- data_rumah$price

# Membuat data frame analisis
data_analisis <- data.frame(sqft_living, price)

# Menampilkan data
print(head(data_analisis))
##   sqft_living   price
## 1        1340  313000
## 2        3650 2384000
## 3        1930  342000
## 4        2000  420000
## 5        1940  550000
## 6         880  490000
# -------------------------------------------
# STATISTIK DESKRIPTIF
# -------------------------------------------

summary(data_analisis)
##   sqft_living        price         
##  Min.   :  370   Min.   :       0  
##  1st Qu.: 1460   1st Qu.:  322875  
##  Median : 1980   Median :  460944  
##  Mean   : 2139   Mean   :  551963  
##  3rd Qu.: 2620   3rd Qu.:  654963  
##  Max.   :13540   Max.   :26590000
# Standar deviasi
sd(sqft_living, na.rm = TRUE)
## [1] 963.2069
sd(price, na.rm = TRUE)
## [1] 563834.7
# -------------------------------------------
# UJI KORELASI PEARSON
# -------------------------------------------

hasil_korelasi <- cor.test(
  sqft_living,
  price,
  method = "pearson"
)

print(hasil_korelasi)
## 
##  Pearson's product-moment correlation
## 
## data:  sqft_living and price
## t = 32.334, df = 4598, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4065677 0.4536665
## sample estimates:
##     cor 
## 0.43041
# -------------------------------------------
# SCATTER PLOT DAN GARIS REGRESI 
# -------------------------------------------

plot(sqft_living, price,
     main = "Scatter Plot Luas Rumah vs Harga Rumah",
     xlab = "Luas Rumah (sqft_living)",
     ylab = "Harga Rumah (price)",
     pch = 19,
     col = "blue")

abline(lm(price ~ sqft_living),
       col = "red",
       lwd = 2)

# -------------------------------------------
# UJI KORELASI SPEARMAN
# -------------------------------------------

cor.test(sqft_living, price, method = "spearman")
## Warning in cor.test.default(sqft_living, price, method = "spearman"): Cannot
## compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  sqft_living and price
## S = 5981876170, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6312643
# -------------------------------------------
# VISUALISASI GGPlot2
# -------------------------------------------

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
ggplot(data_analisis,
       aes(x = sqft_living, y = price)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Hubungan Luas Rumah dan Harga Rumah",
       x = "Luas Rumah",
       y = "Harga Rumah") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# -------------------------------------------
# MATRIKS KORELASI
# -------------------------------------------

# Pilih variabel numerik saja
data_numeric <- data_rumah[, c(
  "price",
  "bedrooms",
  "bathrooms",
  "sqft_living",
  "floors",
  "condition"
)]

# Hitung korelasi
matriks_korelasi <- cor(data_numeric,
                        use = "complete.obs")

print(matriks_korelasi)
##                  price   bedrooms  bathrooms sqft_living     floors   condition
## price       1.00000000 0.20033629  0.3271099  0.43041003  0.1514608  0.03491454
## bedrooms    0.20033629 1.00000000  0.5459199  0.59488406  0.1778949  0.02507986
## bathrooms   0.32710992 0.54591993  1.0000000  0.76115370  0.4864276 -0.11999434
## sqft_living 0.43041003 0.59488406  0.7611537  1.00000000  0.3448503 -0.06282598
## floors      0.15146080 0.17789490  0.4864276  0.34485027  1.0000000 -0.27501339
## condition   0.03491454 0.02507986 -0.1199943 -0.06282598 -0.2750134  1.00000000
# Heatmap korelasi
heatmap(matriks_korelasi)