# -------------------------------------------
# HANDS ON KORELASI
# Dataset: Electric Vehicle Population
# -------------------------------------------
# Bersihkan environment
rm(list = ls())
# Load library
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
library(ppcor)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
# -------------------------------------------
# IMPORT DATA
# -------------------------------------------
df <- read_csv("Sanaheeee.csv")
## Rows: 124716 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): VIN (1-10), County, City, State, Make, Model, Electric Vehicle Typ...
## dbl (6): Postal Code, Model Year, Electric Range, Base MSRP, Legislative Di...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Pilih variabel numerik yang relevan
data_ev <- df %>%
dplyr::select(all_of(c("Model Year", "Electric Range"))) %>%
na.omit()
head(data_ev)
## # A tibble: 6 × 2
## `Model Year` `Electric Range`
## <dbl> <dbl>
## 1 2020 322
## 2 2019 220
## 3 2021 22
## 4 2019 289
## 5 2017 14
## 6 2015 84
# -------------------------------------------
# STATISTIK DESKRIPTIF
# -------------------------------------------
summary(data_ev)
## Model Year Electric Range
## Min. :1997 Min. : 0.00
## 1st Qu.:2018 1st Qu.: 0.00
## Median :2020 Median : 25.00
## Mean :2019 Mean : 79.47
## 3rd Qu.:2022 3rd Qu.:200.00
## Max. :2023 Max. :337.00
# Standar Deviasi
sd(data_ev$`Model Year`)
## [1] 2.976174
sd(data_ev$`Electric Range`)
## [1] 100.332
# -------------------------------------------
# UJI KORELASI PEARSON
# -------------------------------------------
hasil_pearson <- cor.test(data_ev$`Electric Range`,
data_ev$`Model Year`,
method = "pearson")
print(hasil_pearson)
##
## Pearson's product-moment correlation
##
## data: data_ev$`Electric Range` and data_ev$`Model Year`
## t = -134.75, df = 124714, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3613234 -0.3516341
## sample estimates:
## cor
## -0.3564883
# -------------------------------------------
# SCATTER PLOT
# -------------------------------------------
plot(data_ev$`Electric Range`,
data_ev$`Model Year`,
main = "Scatter Plot Electric Range vs Model Year",
xlab = "Electric Range",
ylab = "Model Year",
pch = 19)
abline(lm(`Model Year` ~ `Electric Range`, data = data_ev),
col = "red", lwd = 2)

# -------------------------------------------
# UJI SPEARMAN
# -------------------------------------------
cor.test(data_ev$`Electric Range`,
data_ev$`Model Year`,
method = "spearman",
exact = FALSE)
##
## Spearman's rank correlation rho
##
## data: data_ev$`Electric Range` and data_ev$`Model Year`
## S = 5.3706e+14, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6611327
# -------------------------------------------
# UJI KENDALL
# -------------------------------------------
cor.test(data_ev$`Electric Range`,
data_ev$`Model Year`,
method = "kendall")
##
## Kendall's rank correlation tau
##
## data: data_ev$`Electric Range` and data_ev$`Model Year`
## z = -203.84, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## -0.4315935
# -------------------------------------------
# MATRIKS KORELASI
# -------------------------------------------
matriks_korelasi <- cor(data_ev)
matriks_korelasi
## Model Year Electric Range
## Model Year 1.0000000 -0.3564883
## Electric Range -0.3564883 1.0000000
heatmap(matriks_korelasi)
