library(broom) #untuk merapikan tampilan data
library(glmnet) #metode seleksi LASSO dan RIDGE
## Loading required package: Matrix
## Loaded glmnet 4.1-6
library(glmnetUtils) #package tambahan dari glmnet yang memungkinkan syntax glmnet bisa dinput menggunakan object data.frame
##
## Attaching package: 'glmnetUtils'
## The following objects are masked from 'package:glmnet':
##
## cv.glmnet, glmnet
library(leaps) #menggunakan fungsi regsubset sebagai metode seleksi peubah dengan bestforward
library(varbvs) #Data
library(ggplot2)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:purrr':
##
## compact
library(readr)
library(dplyr)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(repr)
library(MASS) #Data
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(MuMIn) #Model Averaging
library(reader)
## Loading required package: NCmisc
##
## Attaching package: 'reader'
##
## The following objects are masked from 'package:NCmisc':
##
## cat.path, get.ext, rmv.ext
library("GGally")
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library("ggplot2")
library("dplyr")
library("factoextra")
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("readxl")
library("ggpubr")
##
## Attaching package: 'ggpubr'
##
## The following object is masked from 'package:plyr':
##
## mutate
library("lmtest")
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library("car")
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library("corrplot")
## corrplot 0.92 loaded
datacluster <- read.csv("C:/Berliana/Materi Kuliah/Semester 5/Pengantar Sains Data/UAS/dataCluster.csv", sep = ";")
head(datacluster)
## Kode Provinsi X1 X2 X3 X4 X5 X6 X7 X8
## 1 11 Aceh 1.952 0.166 4.093 92.284 199.428 5.055 14.04 31.092
## 2 12 Sumatera Utara 5.477 0.123 3.741 166.876 106.022 5.072 12.24 30.420
## 3 13 Sumatera Barat 2.048 0.424 2.148 108.354 28.211 6.593 14.39 32.955
## 4 14 Riau 2.366 0.390 2.335 155.593 57.071 3.649 11.63 30.949
## 5 15 Jambi 1.313 0.091 1.704 176.671 204.015 4.631 9.66 30.349
## 6 16 Sumatera Selatan 3.134 0.140 5.167 188.755 131.193 5.334 12.20 33.039
## X9 X10 X11 X12
## 1 3.35 15.43 6.59 566.520
## 2 37.53 9.14 6.91 595.835
## 3 29.48 6.56 6.88 683.944
## 4 33.43 7.04 6.32 755.177
## 5 46.88 7.97 5.13 754.782
## 6 38.15 12.98 5.51 800.098
str(datacluster)
## 'data.frame': 31 obs. of 14 variables:
## $ Kode : int 11 12 13 14 15 16 17 18 19 21 ...
## $ Provinsi: chr "Aceh" "Sumatera Utara" "Sumatera Barat" "Riau" ...
## $ X1 : num 1.95 5.48 2.05 2.37 1.31 ...
## $ X2 : num 0.166 0.123 0.424 0.39 0.091 0.14 0.179 0.07 0.161 0.339 ...
## $ X3 : num 4.09 3.74 2.15 2.33 1.7 ...
## $ X4 : num 92.3 166.9 108.4 155.6 176.7 ...
## $ X5 : num 199.4 106 28.2 57.1 204 ...
## $ X6 : num 5.05 5.07 6.59 3.65 4.63 ...
## $ X7 : num 14.04 12.24 14.39 11.63 9.66 ...
## $ X8 : num 31.1 30.4 33 30.9 30.3 ...
## $ X9 : num 3.35 37.53 29.48 33.43 46.88 ...
## $ X10 : num 15.43 9.14 6.56 7.04 7.97 ...
## $ X11 : num 6.59 6.91 6.88 6.32 5.13 ...
## $ X12 : num 567 596 684 755 755 ...
Berdasarkan output diatas dapat diketahui apabila terdapat 14 variabel dengan 31 amatan. 14 variabel tersebut adalah kode, provinsi, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, dan X12.Dari 14 variabel tersebut, 13 diantaranya adalah peubah numerik dan 1 karakter.
summary(datacluster)
## Kode Provinsi X1 X2
## Min. :11.00 Length:31 Min. : 0.2600 Min. :0.0410
## 1st Qu.:18.50 Class :character 1st Qu.: 0.9705 1st Qu.:0.1385
## Median :36.00 Mode :character Median : 1.5980 Median :0.2240
## Mean :43.55 Mean : 3.1456 Mean :0.3042
## 3rd Qu.:64.50 3rd Qu.: 3.2340 3rd Qu.:0.3670
## Max. :81.00 Max. :17.8660 Max. :1.7400
## X3 X4 X5 X6
## Min. :0.866 Min. : 29.71 Min. : 10.89 Min. : 3.536
## 1st Qu.:1.832 1st Qu.: 84.94 1st Qu.: 57.95 1st Qu.: 4.676
## Median :2.459 Median : 166.88 Median : 87.71 Median : 5.064
## Mean :2.816 Mean : 306.46 Mean :127.28 Mean : 5.598
## 3rd Qu.:3.494 3rd Qu.: 227.81 3rd Qu.:201.72 3rd Qu.: 5.946
## Max. :6.924 Max. :3114.88 Max. :360.88 Max. :10.814
## X7 X8 X9 X10
## Min. : 7.81 Min. :22.14 Min. : 3.35 Min. : 4.450
## 1st Qu.:12.01 1st Qu.:28.59 1st Qu.:22.14 1st Qu.: 6.635
## Median :13.86 Median :30.35 Median :29.48 Median : 8.990
## Mean :13.67 Mean :30.21 Mean :28.31 Mean :10.062
## 3rd Qu.:14.88 3rd Qu.:32.90 3rd Qu.:35.55 3rd Qu.:12.890
## Max. :21.09 Max. :36.02 Max. :46.88 Max. :21.210
## X11 X12
## Min. : 3.320 Min. : 327.2
## 1st Qu.: 4.580 1st Qu.: 561.4
## Median : 5.630 Median : 683.9
## Mean : 6.094 Mean : 701.7
## 3rd Qu.: 6.875 3rd Qu.: 794.8
## Max. :10.950 Max. :1179.6
Berdasarkan output diatas dapat diketahui nilai statistika deskriptif dari setiap peubah.
ggplot(datacluster) +
aes(x = Provinsi, y = X1) +
geom_col(fill = "#4682B4") +
labs(
x = "Provinsi",
title = "Presentase Jumlah Penduduk menurut Provinsi"
) +
theme_minimal()
Berdasarkan hasil grafik diatas dapat dilihat bahwa provinsi yang memiliki presentase jumlah penduduk > 5% adalah provinsi Jawa Barat, Jawa Tengah, Jawa Timur, dan Sumatera Utara.
ggplot(datacluster) +
aes(x = X2, y = X3) +
geom_area() +
labs(
title = "Hubungan antara total kasus terkonfirmasi positif covid-19 dengan total meninggal akibat covid"
) +
theme_minimal()
Berdasarkan Area Plot diatas dapt dilihat bahwa semakin kecil total kasus terkonfirmasi akbibat positif Covid-19 dibagi total pendudukan, maka akan semakin besar total meninggal akibat covid-19 terkonfirmasi positif covid-19.
ggplot(datacluster) +
aes(x = "", y = X4) +
geom_boxplot(fill = "#B22222") +
labs(
title = "Rata- rata volume lalu lintas per meter jalan raya"
) +
theme_minimal()
Berdasarkan Box Plot diatas dapt dilihat bahwa terdapat pencilan pada peubah rata-rata volume lalu lintas per meter jalan raya hal ini dikarenakan selisih nilai minimum dan maksimum sangat tinggi. Selain itu, dapat dilihat juga bila median sebesar 166.88 dan mean sebesar 306.46.
ggplot(datacluster) +
aes(x = X5, y = X2) +
geom_line(colour = "#FF8C00") +
labs(title = "Hubungan antara peubah X5 dan X2") +
theme_minimal() +
theme(
plot.title = element_text(size = 20L,
face = "bold",
hjust = 0.5)
)
Line plot diatas menjelaskan mengenai hubungan antara total penduduk dibagi total penduduk yang diperiksa covid-19 (X5) dengan Total kasus terkonfirmasi positif Covid-19 dibagi total penduduk. Dapat disimpulkan bahwa semakin banyak total penduduk yang diperiks covid-19, maka akan semakin sedikit total kasus terkonfirmasi covid-19 dan begitupun sebaliknya.
ggplot(datacluster) +
aes(x = X12) +
geom_histogram(bins = 30L, fill = "#112446") +
theme_minimal()
Plot diatas menunjukan ketersediaan tempat tidur di rumah sakit
ggplot(datacluster) +
aes(x = "", y = X11) +
geom_boxplot(fill = "#112446") +
theme_minimal()
Plot diatas menunjukan boxplot tingkat penggangguran tempat tidur di rumah sakit.
ggplot(datacluster) +
aes(x = "", y = X8) +
geom_violin(adjust = 1L, scale = "area", fill = "#112446") +
theme_minimal()
Plot diatas merupakan violin plot dari presentase penduduk dewasa berusia 20 tahun ke atas yang merokok
ggplot(datacluster) +
aes(x = X7) +
geom_density(adjust = 1L, fill = "#228B22") +
theme_minimal()
Plot diatas merupakan density plot dari peubah angka morbiditas