# -------------------------------------------
# HANDS ON KORELASI
# Dataset: Electric Vehicle Population
# -------------------------------------------

# Bersihkan environment
rm(list = ls())

# Load library
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
library(ppcor)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
# -------------------------------------------
# IMPORT DATA
# -------------------------------------------
df <- read_csv("Sanaheeee.csv")
## Rows: 124716 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): VIN (1-10), County, City, State, Make, Model, Electric Vehicle Typ...
## dbl  (6): Postal Code, Model Year, Electric Range, Base MSRP, Legislative Di...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Pilih variabel numerik yang relevan
data_ev <- df %>%
  dplyr::select(all_of(c("Model Year", "Electric Range"))) %>%
  na.omit()

head(data_ev)
## # A tibble: 6 × 2
##   `Model Year` `Electric Range`
##          <dbl>            <dbl>
## 1         2020              322
## 2         2019              220
## 3         2021               22
## 4         2019              289
## 5         2017               14
## 6         2015               84
# -------------------------------------------
# STATISTIK DESKRIPTIF
# -------------------------------------------
summary(data_ev)
##    Model Year   Electric Range  
##  Min.   :1997   Min.   :  0.00  
##  1st Qu.:2018   1st Qu.:  0.00  
##  Median :2020   Median : 25.00  
##  Mean   :2019   Mean   : 79.47  
##  3rd Qu.:2022   3rd Qu.:200.00  
##  Max.   :2023   Max.   :337.00
# Standar Deviasi
sd(data_ev$`Model Year`)
## [1] 2.976174
sd(data_ev$`Electric Range`)
## [1] 100.332
# -------------------------------------------
# UJI KORELASI PEARSON
# -------------------------------------------

hasil_pearson <- cor.test(data_ev$`Electric Range`,
                          data_ev$`Model Year`,
                          method = "pearson")

print(hasil_pearson)
## 
##  Pearson's product-moment correlation
## 
## data:  data_ev$`Electric Range` and data_ev$`Model Year`
## t = -134.75, df = 124714, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3613234 -0.3516341
## sample estimates:
##        cor 
## -0.3564883
# -------------------------------------------
# SCATTER PLOT
# -------------------------------------------
plot(data_ev$`Electric Range`,
     data_ev$`Model Year`,
     main = "Scatter Plot Electric Range vs Model Year",
     xlab = "Electric Range",
     ylab = "Model Year",
     pch = 19)

abline(lm(`Model Year` ~ `Electric Range`, data = data_ev),
       col = "red", lwd = 2)

# -------------------------------------------
# UJI SPEARMAN
# -------------------------------------------
cor.test(data_ev$`Electric Range`,
         data_ev$`Model Year`,
         method = "spearman",
         exact = FALSE)
## 
##  Spearman's rank correlation rho
## 
## data:  data_ev$`Electric Range` and data_ev$`Model Year`
## S = 5.3706e+14, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6611327
# -------------------------------------------
# UJI KENDALL
# -------------------------------------------
cor.test(data_ev$`Electric Range`,
         data_ev$`Model Year`,
         method = "kendall")
## 
##  Kendall's rank correlation tau
## 
## data:  data_ev$`Electric Range` and data_ev$`Model Year`
## z = -203.84, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##        tau 
## -0.4315935
# -------------------------------------------
# MATRIKS KORELASI
# -------------------------------------------
matriks_korelasi <- cor(data_ev)
matriks_korelasi
##                Model Year Electric Range
## Model Year      1.0000000     -0.3564883
## Electric Range -0.3564883      1.0000000
heatmap(matriks_korelasi)