Read data

library(readxl)
data01 = read_xlsx("D:/firlan/Documents/College/Semester 4/Analisis Eksplorasi Data/UAS/Practice/data UAS AED tahun lalu.xlsx", sheet = "Sheet1")
data02 = read_xlsx("D:/firlan/Documents/College/Semester 4/Analisis Eksplorasi Data/UAS/Practice/data UAS AED tahun lalu.xlsx", sheet = "Sheet2")
data03 = read_xlsx("D:/firlan/Documents/College/Semester 4/Analisis Eksplorasi Data/UAS/Practice/data UAS AED tahun lalu.xlsx", sheet = "Sheet3")

data01
## # A tibble: 50 × 3
##    Participant Durasi Produktivitas
##          <dbl>  <dbl>         <dbl>
##  1           1    7.5            85
##  2           2    6.2            72
##  3           3    8.1            91
##  4           4    6.8            78
##  5           5    7.9            87
##  6           6    5.6            63
##  7           7    6.9            76
##  8           8    7.2            81
##  9           9    6.5            70
## 10          10    8.3            93
## # ℹ 40 more rows
data02
## # A tibble: 50 × 9
##      No.  Usia   IMT Konsumsi Kalori Haria…¹ `Jumlah Langkah` `Kadar Kolesterol`
##    <dbl> <dbl> <dbl>                   <dbl>            <dbl>              <dbl>
##  1     1    35    25                    2000             8000                180
##  2     2    42    29                    1800             6000                220
##  3     3    28    22                    2200             9000                160
##  4     4    55    31                    2500             5000                240
##  5     5    46    26                    1900             7000                200
##  6     6    39    24                    2100             8000                180
##  7     7    52    28                    1800             4000                220
##  8     8    31    23                    2300             9000                170
##  9     9    48    27                    2200             6000                200
## 10    10    41    25                    1900             7000                190
## # ℹ 40 more rows
## # ℹ abbreviated name: ¹​`Konsumsi Kalori Harian`
## # ℹ 3 more variables: `Tekanan Darah` <dbl>, Merokok <chr>,
## #   `Riwayat Penyakit Jantung` <chr>
data03
## # A tibble: 15 × 2
##    Hari    Penjualan
##    <chr>       <dbl>
##  1 Hari 1         20
##  2 Hari 2         25
##  3 Hari 3         18
##  4 Hari 4         22
##  5 Hari 5         30
##  6 Hari 6         35
##  7 Hari 7         28
##  8 Hari 8         19
##  9 Hari 9         24
## 10 Hari 10        29
## 11 Hari 11        32
## 12 Hari 12        26
## 13 Hari 13        21
## 14 Hari 14        27
## 15 Hari 15        23

Eksplorasi lewat Visualisasi Data

Ringkasan data dan statistik 5 serangkai

str(data01)
## tibble [50 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Participant  : num [1:50] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Durasi       : num [1:50] 7.5 6.2 8.1 6.8 7.9 5.6 6.9 7.2 6.5 8.3 ...
##  $ Produktivitas: num [1:50] 85 72 91 78 87 63 76 81 70 93 ...
summary(data02)
##       No.             Usia            IMT        Konsumsi Kalori Harian
##  Min.   : 1.00   Min.   :28.00   Min.   :22.00   Min.   :1800          
##  1st Qu.:13.25   1st Qu.:36.25   1st Qu.:24.00   1st Qu.:1900          
##  Median :25.50   Median :42.50   Median :26.00   Median :2050          
##  Mean   :25.50   Mean   :42.52   Mean   :26.00   Mean   :2058          
##  3rd Qu.:37.75   3rd Qu.:48.75   3rd Qu.:27.75   3rd Qu.:2200          
##  Max.   :50.00   Max.   :56.00   Max.   :31.00   Max.   :2500          
##  Jumlah Langkah Kadar Kolesterol Tekanan Darah   Merokok         
##  Min.   :4000   Min.   :160.0    Min.   :110   Length:50         
##  1st Qu.:6000   1st Qu.:180.0    1st Qu.:120   Class :character  
##  Median :7000   Median :190.0    Median :120   Mode  :character  
##  Mean   :6780   Mean   :194.2    Mean   :124                     
##  3rd Qu.:8000   3rd Qu.:200.0    3rd Qu.:130                     
##  Max.   :9000   Max.   :240.0    Max.   :140                     
##  Riwayat Penyakit Jantung
##  Length:50               
##  Class :character        
##  Mode  :character        
##                          
##                          
## 

Memilih baris dan kolom yang diinginkan (filter dan select)

Kolom

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data02kolom2 = data02 %>% select(2, 4) #Memilih kolom 2 dan 4
data02kolom2
## # A tibble: 50 × 2
##     Usia `Konsumsi Kalori Harian`
##    <dbl>                    <dbl>
##  1    35                     2000
##  2    42                     1800
##  3    28                     2200
##  4    55                     2500
##  5    46                     1900
##  6    39                     2100
##  7    52                     1800
##  8    31                     2300
##  9    48                     2200
## 10    41                     1900
## # ℹ 40 more rows

Memilih kolom yang hanya berisi data numeric

library(dplyr)
data02numeric = data02 %>% select_if(is.numeric)
data02numeric
## # A tibble: 50 × 7
##      No.  Usia   IMT Konsumsi Kalori Haria…¹ `Jumlah Langkah` `Kadar Kolesterol`
##    <dbl> <dbl> <dbl>                   <dbl>            <dbl>              <dbl>
##  1     1    35    25                    2000             8000                180
##  2     2    42    29                    1800             6000                220
##  3     3    28    22                    2200             9000                160
##  4     4    55    31                    2500             5000                240
##  5     5    46    26                    1900             7000                200
##  6     6    39    24                    2100             8000                180
##  7     7    52    28                    1800             4000                220
##  8     8    31    23                    2300             9000                170
##  9     9    48    27                    2200             6000                200
## 10    10    41    25                    1900             7000                190
## # ℹ 40 more rows
## # ℹ abbreviated name: ¹​`Konsumsi Kalori Harian`
## # ℹ 1 more variable: `Tekanan Darah` <dbl>

Baris

data01filter = data01 %>% filter(Participant >= 5 & Participant <= 17) #Memilih baris ke 5 hingga 17
data01filter
## # A tibble: 13 × 3
##    Participant Durasi Produktivitas
##          <dbl>  <dbl>         <dbl>
##  1           5    7.9            87
##  2           6    5.6            63
##  3           7    6.9            76
##  4           8    7.2            81
##  5           9    6.5            70
##  6          10    8.3            93
##  7          11    7.7            84
##  8          12    6.1            67
##  9          13    7.8            89
## 10          14    6.4            74
## 11          15    8.2            92
## 12          16    7.3            80
## 13          17    6.6            68
data02filter = data02 %>% filter(Merokok == 'Ya') #Memilih data yang merokok
data02filter
## # A tibble: 18 × 9
##      No.  Usia   IMT Konsumsi Kalori Haria…¹ `Jumlah Langkah` `Kadar Kolesterol`
##    <dbl> <dbl> <dbl>                   <dbl>            <dbl>              <dbl>
##  1     2    42    29                    1800             6000                220
##  2     5    46    26                    1900             7000                200
##  3     7    52    28                    1800             4000                220
##  4     8    31    23                    2300             9000                170
##  5    11    37    28                    2100             5000                180
##  6    14    32    26                    1800             6000                170
##  7    17    56    30                    2400             5000                220
##  8    18    47    25                    2100             6000                180
##  9    20    34    24                    2000             8000                170
## 10    21    51    27                    1800             4000                200
## 11    25    36    26                    2100             8000                190
## 12    28    33    27                    2200             6000                170
## 13    32    37    28                    1800             6000                180
## 14    34    35    25                    1900             8000                170
## 15    36    31    23                    2300             9000                180
## 16    39    38    26                    2100             8000                190
## 17    42    32    27                    2200             6000                170
## 18    46    36    26                    1800             6000                180
## # ℹ abbreviated name: ¹​`Konsumsi Kalori Harian`
## # ℹ 3 more variables: `Tekanan Darah` <dbl>, Merokok <chr>,
## #   `Riwayat Penyakit Jantung` <chr>

Mengakses baris tertentu

data02filterusia = data02[c(22), 2]
data02filterusia
## # A tibble: 1 × 1
##    Usia
##   <dbl>
## 1    30

Membuat data ke bentuk data frame

a <- as.data.frame(data01)
colnames(data01) <- c("Participant", "Durasi", "Produktivitas")
a
##    Participant Durasi Produktivitas
## 1            1    7.5            85
## 2            2    6.2            72
## 3            3    8.1            91
## 4            4    6.8            78
## 5            5    7.9            87
## 6            6    5.6            63
## 7            7    6.9            76
## 8            8    7.2            81
## 9            9    6.5            70
## 10          10    8.3            93
## 11          11    7.7            84
## 12          12    6.1            67
## 13          13    7.8            89
## 14          14    6.4            74
## 15          15    8.2            92
## 16          16    7.3            80
## 17          17    6.6            68
## 18          18    7.4            83
## 19          19    5.9            62
## 20          20    7.1            79
## 21          21    6.7            75
## 22          22    7.6            88
## 23          23    6.3            71
## 24          24    8.4            94
## 25          25    7.0            77
## 26          26    6.0            66
## 27          27    8.0            90
## 28          28    7.2            82
## 29          29    6.5            69
## 30          30    7.9            86
## 31          31    6.8            73
## 32          32    8.1            91
## 33          33    7.3            81
## 34          34    6.6            68
## 35          35    8.2            93
## 36          36    7.6            85
## 37          37    6.1            67
## 38          38    7.7            88
## 39          39    6.4            74
## 40          40    8.3            95
## 41          41    7.1            78
## 42          42    6.5            70
## 43          43    8.0            90
## 44          44    7.2            82
## 45          45    6.7            73
## 46          46    7.9            87
## 47          47    6.3            72
## 48          48    8.4            94
## 49          49    7.0            77
## 50          50    6.0            68

Boxplot

boxplot(data02$`Jumlah Langkah`, horizontal = T, main = "Boxplot sebaran jumlah langkah", col = "pink")

Boxplot lebih dari 1 dengan X kategorik

boxplot(`Konsumsi Kalori Harian` ~ `Riwayat Penyakit Jantung`, data = data02, col = "skyblue", main = "Sebaran konsumsi kalori harian 
        per kategori riwayat penyakit jantung")

Boxplot lebih dari 1 dengan X kategorik diurutkan berdasarkan median

boxplot(`Konsumsi Kalori Harian` ~ reorder(data02$`Riwayat Penyakit Jantung`, data02$`Konsumsi Kalori Harian`, FUN = median), data = data02, col = "skyblue", main = "Sebaran konsumsi kalori harian 
        per kategori riwayat penyakit jantung")

Histogram

Variabel dengan frekuensinya

hist(data01$Durasi) 

Histogram per Kategori

library(ggplot2)

ggplot(data = data02, aes(x=`Tekanan Darah`)) +
  geom_histogram(aes(fill=Merokok)) +
  scale_fill_brewer(palette="Set2") +
  facet_wrap( ~ Merokok, ncol=1) +
  xlab("Tekanan Darah") +
  ylab("Frekuensi") +
  theme_bw() +
  ggtitle("Angka Tekanan Darah per Kategori Merokok")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Scatter plot

Scatter Plot Hubungan durasi tidur dengan produktivitas

ggplot(data01) +
  geom_point(aes(x = Durasi,y = Produktivitas),color="orange",size=3) +
  ggtitle("Scatter Plot Durasi Tidur dengan Produktivitas") +
  ylab("Produktivitas") +
  xlab("Durasi Tidur") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))

Scatter Plot Hubungan Usia dengan Kadar Kolesterol dan dibedakan berdasarkan merokok atau tidak

ggplot(data02) +
  geom_point(aes(x = Usia, y = `Kadar Kolesterol`, color = Merokok), size=3) +
  ggtitle("Scatter Plot Usia dengan Kadar Kolesterol dan dibedakan berdasarkan merokok atau tidak") +
  ylab("Kadar Kolesterol") +
  xlab("Usia") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))

Correlogram, Corrplot, Heatmap, dan Pair Plot untuk melihat besaran korelasi

Correlogram

library(GGally)
## Warning: package 'GGally' was built under R version 4.3.2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
ggcorr(data02, method = c("everything","pearson"),geom = "tile") 
## Warning in ggcorr(data02, method = c("everything", "pearson"), geom = "tile"):
## data in column(s) 'Merokok', 'Riwayat Penyakit Jantung' are not numeric and
## were ignored

Corrplot

library(corrplot)
## corrplot 0.92 loaded
#Menghitung nilai korelasi
corr02 = cor(data02numeric)
corr02
##                                 No.        Usia         IMT
## No.                     1.000000000  0.07971784 -0.02455513
## Usia                    0.079717843  1.00000000  0.60724087
## IMT                    -0.024555126  0.60724087  1.00000000
## Konsumsi Kalori Harian -0.043682751  0.10400083  0.12349421
## Jumlah Langkah         -0.076415334 -0.44921765 -0.63870192
## Kadar Kolesterol       -0.011089970  0.73619766  0.57121412
## Tekanan Darah          -0.007072482  0.42952994  0.30455480
##                        Konsumsi Kalori Harian Jumlah Langkah Kadar Kolesterol
## No.                               -0.04368275    -0.07641533      -0.01108997
## Usia                               0.10400083    -0.44921765       0.73619766
## IMT                                0.12349421    -0.63870192       0.57121412
## Konsumsi Kalori Harian             1.00000000     0.22045524       0.06910793
## Jumlah Langkah                     0.22045524     1.00000000      -0.40353442
## Kadar Kolesterol                   0.06910793    -0.40353442       1.00000000
## Tekanan Darah                     -0.24437588    -0.51170847       0.62631678
##                        Tekanan Darah
## No.                     -0.007072482
## Usia                     0.429529945
## IMT                      0.304554795
## Konsumsi Kalori Harian  -0.244375883
## Jumlah Langkah          -0.511708474
## Kadar Kolesterol         0.626316780
## Tekanan Darah            1.000000000
#Membuat corrplot
corrplot(corr02, method = "number")

Corrplot bentuk lain

corrplot.mixed(corr02, upper = 'ellipse', lower = 'number', order = "original",
               tl.col="black", tl.pos = "lt",diag = 'l',
               number.digits=2, number.cex=0.55)

Heatmap

library(heatmaply)
## Warning: package 'heatmaply' was built under R version 4.3.3
## Loading required package: plotly
## Warning: package 'plotly' was built under R version 4.3.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## Loading required package: viridis
## Warning: package 'viridis' was built under R version 4.3.3
## Loading required package: viridisLite
## 
## ======================
## Welcome to heatmaply version 1.5.0
## 
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
## 
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags: 
##   https://stackoverflow.com/questions/tagged/heatmaply
## ======================
#corr02 = cor(data02numeric)
heatmaply(corr02)

Pair plot

ggpairs(data02, upper = list(continuous = wrap('cor', size = 2)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Goodness Fits of Test

Kolmogorov-Smirnov Test

Hanya untuk data yang isinya tidak ada yang berulang H0: data menyebar normal H1: data tidak menyebar normal

Terima H0 berarti data menyebar normal

ks.test(data02$IMT, "pnorm", 0, 1) #Mengecek apakah data menyebar normal dengan rata-rata 0 dan ragam 1
## Warning in ks.test.default(data02$IMT, "pnorm", 0, 1): ties should not be
## present for the Kolmogorov-Smirnov test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  data02$IMT
## D = 1, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(data02$Usia, "punif") #Mengecek apakah data menyebar uniform
## Warning in ks.test.default(data02$Usia, "punif"): ties should not be present
## for the Kolmogorov-Smirnov test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  data02$Usia
## D = 1, p-value < 2.2e-16
## alternative hypothesis: two-sided

D-tabel (apabila data lebih dari 35)

#Untuk alpha = 0,05
#Misal untuk data sebanyak 52

n = 52
Dtabel = 1.358/sqrt(n)
Dtabel
## [1] 0.1883207

Saphiro-Wilk Test

Harus input sebagai vector, bukan string

H0: data menyebar normal H1: data tidak menyebar normal

Terima H0 berarti data menyebar normal

data02usia = data02 %>% select(`Usia`)
data02usia
## # A tibble: 50 × 1
##     Usia
##    <dbl>
##  1    35
##  2    42
##  3    28
##  4    55
##  5    46
##  6    39
##  7    52
##  8    31
##  9    48
## 10    41
## # ℹ 40 more rows
is.numeric(data02usia$Usia) #Input dalam bentuk vector
## [1] TRUE
shapiro.test(data02usia$Usia)
## 
##  Shapiro-Wilk normality test
## 
## data:  data02usia$Usia
## W = 0.96617, p-value = 0.161

Chi-Square

country <- c("US","Germany","France","Norway","Spain","China")
exp.percent <- c(0.25,0.12,0.18,0.14,0.11,0.20)
medals <- c(33,6,18,15,12,36)

medal.data <- data.frame(country, exp.percent, medals)
medal.data
##   country exp.percent medals
## 1      US        0.25     33
## 2 Germany        0.12      6
## 3  France        0.18     18
## 4  Norway        0.14     15
## 5   Spain        0.11     12
## 6   China        0.20     36
chisq.test(medal.data$medals, p=exp.percent)
## 
##  Chi-squared test for given probabilities
## 
## data:  medal.data$medals
## X-squared = 12.102, df = 5, p-value = 0.03342

Deteksi Outlier

Contoh data yang digunakan

datacar <- ggplot2::mpg
head(datacar)
## # A tibble: 6 × 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa…
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa…
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa…
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa…
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa…
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa…

Melalui histogram, boxplot, dan adjusted boxplot

Histog

ggplot(datacar) +
  aes(x = hwy) +
  geom_histogram(bins = 20, fill = "coral") +
  theme_minimal()

Dari gambar di atas terlihat mungkin terdapat outlier.

ggplot(datacar) +
  aes(x = "", y = hwy) +
  geom_boxplot(fill = "orange") +
  theme_minimal() 

Dari gambar di atas terlihat mungkin terdapat outlier.

Adjusted Boxplot

library(robustbase)
## Warning: package 'robustbase' was built under R version 4.3.2
adj_box = adjbox(datacar$hwy, plot = FALSE)
## The default of 'doScale' is FALSE now for stability;
##   set options(mc_doScale_quiet=TRUE) to suppress this (once per session) message
adjbox(datacar$hwy, main = "Adjusted Boxplot")
text(1, adj_box$out, labels = adj_box$out, pos = 4, cex = 0.8, col = "blue")

Melalui Interquartile Range (IQR)

Mencari nilai Q1, Q3, dan InterQuartile

summary(datacar$hwy)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   12.00   18.00   24.00   23.44   27.00   44.00
IQRhwy = IQR(datacar$hwy)
IQRhwy
## [1] 9
lower_bound <- 18 - 1.5 * IQRhwy
upper_bound <- 27 + 1.5 * IQRhwy
outlier_iqr <- which(datacar$hwy < lower_bound | datacar$hwy > upper_bound)
outlier_iqr
## [1] 213 222 223

Angka yang muncul bukan merupakan nilai outlier, melainkan indeks dari outlier. Sehingga perlu untuk akses data.

outlierhwy = datacar[c(213,222,223), 9]
outlierhwy
## # A tibble: 3 × 1
##     hwy
##   <int>
## 1    44
## 2    44
## 3    41

Analisis Regresi

Membuat dataframe dengan nama variabel baru

data02no = data02 %>% select(-1, -8, -9)

dataframe02 = as.data.frame(data02no)
colnames(dataframe02) <- c("Y","X1","X2","X3","X4","X5")
dataframe02
##     Y X1   X2   X3  X4  X5
## 1  35 25 2000 8000 180 120
## 2  42 29 1800 6000 220 130
## 3  28 22 2200 9000 160 110
## 4  55 31 2500 5000 240 140
## 5  46 26 1900 7000 200 120
## 6  39 24 2100 8000 180 130
## 7  52 28 1800 4000 220 140
## 8  31 23 2300 9000 170 110
## 9  48 27 2200 6000 200 120
## 10 41 25 1900 7000 190 130
## 11 37 28 2100 5000 180 120
## 12 50 29 2300 8000 200 130
## 13 45 24 2000 9000 190 120
## 14 32 26 1800 6000 170 110
## 15 38 27 2200 7000 200 130
## 16 43 23 1900 8000 210 140
## 17 56 30 2400 5000 220 120
## 18 47 25 2100 6000 180 110
## 19 40 26 1900 7000 190 130
## 20 34 24 2000 8000 170 120
## 21 51 27 1800 4000 200 140
## 22 30 23 2300 9000 180 110
## 23 49 28 2200 6000 210 120
## 24 42 25 1900 7000 200 130
## 25 36 26 2100 8000 190 120
## 26 53 29 1800 5000 230 140
## 27 44 24 2000 9000 180 110
## 28 33 27 2200 6000 170 120
## 29 39 23 1900 7000 200 130
## 30 54 25 2400 8000 220 120
## 31 47 26 2100 5000 190 130
## 32 37 28 1800 6000 180 110
## 33 44 24 2000 7000 190 120
## 34 35 25 1900 8000 170 130
## 35 52 27 2200 4000 200 140
## 36 31 23 2300 9000 180 110
## 37 46 28 2200 6000 210 120
## 38 41 25 1900 7000 200 130
## 39 38 26 2100 8000 190 120
## 40 55 30 1800 5000 230 140
## 41 45 24 2000 9000 180 110
## 42 32 27 2200 6000 170 120
## 43 39 23 1900 7000 200 130
## 44 50 29 2400 8000 220 120
## 45 47 26 2100 5000 190 130
## 46 36 26 1800 6000 180 110
## 47 53 27 2000 7000 190 120
## 48 44 25 2200 8000 210 130
## 49 33 24 1900 5000 200 140
## 50 51 28 2100 6000 180 120

Membentuk model awal

model2 = lm(Y ~ X1+X2+X3+X4+X5, data = dataframe02)
anova(model2)
## Analysis of Variance Table
## 
## Response: Y
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## X1         1 1073.95 1073.95 40.6709 9.399e-08 ***
## X2         1    2.49    2.49  0.0943    0.7603    
## X3         1   28.64   28.64  1.0845    0.3034    
## X4         1  640.29  640.29 24.2478 1.238e-05 ***
## X5         1    5.26    5.26  0.1991    0.6576    
## Residuals 44 1161.86   26.41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model2)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5, data = dataframe02)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.685 -3.394 -0.617  3.618 10.942 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.897e+01  2.109e+01  -0.900 0.373278    
## X1           6.815e-01  5.409e-01   1.260 0.214386    
## X2           1.994e-03  4.320e-03   0.462 0.646652    
## X3          -6.421e-04  7.699e-04  -0.834 0.408780    
## X4           2.586e-01  6.273e-02   4.123 0.000163 ***
## X5          -4.999e-02  1.120e-01  -0.446 0.657614    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.139 on 44 degrees of freedom
## Multiple R-squared:  0.6011, Adjusted R-squared:  0.5557 
## F-statistic: 13.26 on 5 and 44 DF,  p-value: 6.801e-08

Karena R-square rendah, dilakukan pengecekan pencilan dan amatan berpengaruh

Cek Multikolinearitas

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
car::vif(model2)
##       X1       X2       X3       X4       X5 
## 2.548581 1.258993 2.392554 2.446731 2.280875

Pendeteksian Outlier

olsrr::ols_plot_resid_lev(model2)

influence.measures(model2)
## Influence measures of
##   lm(formula = Y ~ X1 + X2 + X3 + X4 + X5, data = dataframe02) :
## 
##       dfb.1_    dfb.X1    dfb.X2    dfb.X3   dfb.X4    dfb.X5    dffit cov.r
## 1   1.70e-02 -0.041486  0.032782 -0.064452  0.04855 -0.026814 -0.11217 1.169
## 2   1.25e-01 -0.365971  0.587024 -0.276658 -0.28471  0.142844 -0.82747 0.817
## 3  -5.48e-02  0.065845 -0.069511  0.003321  0.05863  0.004225 -0.19972 1.270
## 4   3.43e-01 -0.104715 -0.334789  0.001956 -0.08508 -0.134829 -0.56344 1.413
## 5   4.39e-02 -0.004239 -0.079013  0.008512  0.06412 -0.067766  0.11858 1.195
## 6  -7.48e-02  0.012091  0.048724  0.069090 -0.10576  0.141005  0.18000 1.242
## 7   1.55e-02 -0.013024 -0.027551 -0.032034  0.02409 -0.000223  0.07830 1.314
## 8  -2.83e-02  0.059896 -0.107908 -0.009213  0.03123  0.022692 -0.21748 1.229
## 9   3.27e-02 -0.026988  0.045805 -0.061832  0.03743 -0.057776  0.11307 1.182
## 10 -4.21e-03  0.001923 -0.016616  0.012990 -0.01375  0.023760  0.04582 1.203
## 11 -6.31e-02 -0.093005 -0.066717  0.142799  0.17584  0.017357 -0.33741 1.138
## 12 -5.30e-01  0.461400  0.119331  0.437013 -0.27822  0.350469  0.59538 1.331
## 13 -5.25e-02  0.017770 -0.133535  0.248859  0.06205  0.009080  0.38062 0.997
## 14 -3.09e-01 -0.035246  0.236545  0.139767  0.05446  0.245490 -0.45604 1.172
## 15  2.55e-01 -0.140375 -0.142407 -0.146357  0.12017 -0.213748 -0.33773 0.961
## 16 -5.87e-03 -0.012862 -0.005083  0.009275  0.00902  0.014383  0.03428 1.409
## 17 -6.64e-04  0.002162  0.096496 -0.085210  0.08512 -0.101922  0.21875 1.390
## 18  5.33e-01 -0.334070  0.088553 -0.489390  0.15294 -0.465067  0.65416 0.939
## 19  1.22e-02 -0.015975  0.014692 -0.018275  0.01663 -0.020543 -0.03516 1.223
## 20  2.88e-05 -0.001556  0.000944 -0.003557  0.00689 -0.004086 -0.01089 1.234
## 21  9.37e-02 -0.059663 -0.087572 -0.218701 -0.10633  0.159326  0.48480 1.080
## 22 -1.13e-01  0.222644 -0.215735  0.015699 -0.10771  0.162379 -0.49648 1.004
## 23  3.53e-04 -0.000117  0.000532 -0.000865  0.00146 -0.001644  0.00273 1.245
## 24 -1.56e-03  0.005978  0.011822 -0.004576 -0.00643 -0.004765 -0.02611 1.205
## 25  8.93e-02 -0.109507  0.024068 -0.149765  0.03984 -0.026244 -0.21609 1.053
## 26  9.99e-03 -0.011903  0.030952 -0.006466 -0.02230 -0.002033 -0.05717 1.347
## 27  1.22e-01  0.015679 -0.205895  0.198114  0.09735 -0.216805  0.49762 0.924
## 28  3.62e-02 -0.136752 -0.169181  0.061596  0.31789 -0.118303 -0.40411 1.144
## 29 -7.13e-02  0.126319  0.025438  0.046399 -0.07655  0.010200 -0.16380 1.260
## 30  2.01e-02 -0.313437  0.262437 -0.080535  0.45504 -0.240008  0.61739 1.201
## 31  8.12e-02 -0.114766  0.141177 -0.211930 -0.08348  0.062755  0.30578 1.120
## 32 -1.59e-01 -0.169780  0.255821  0.011435  0.02742  0.214735 -0.41259 1.291
## 33  1.43e-01 -0.146641 -0.016976 -0.093268  0.08847 -0.093532  0.19991 1.118
## 34 -3.84e-02  0.044430 -0.016502  0.051887 -0.06738  0.064890  0.08798 1.428
## 35 -2.35e-02 -0.180481  0.400974 -0.378446 -0.19632  0.287147  0.67043 1.133
## 36 -9.54e-02  0.187553 -0.181734  0.013225 -0.09073  0.136786 -0.41823 1.081
## 37 -2.22e-02  0.007377 -0.033525  0.054550 -0.09230  0.103643 -0.17235 1.185
## 38 -4.25e-03  0.016259  0.032155 -0.012446 -0.01748 -0.012960 -0.07101 1.191
## 39  5.27e-02 -0.064652  0.014210 -0.088419  0.02352 -0.015494 -0.12757 1.148
## 40 -2.11e-02  0.029036 -0.036484  0.017368  0.01550  0.006956  0.06592 1.393
## 41  1.39e-01  0.017952 -0.235749  0.226839  0.11147 -0.248240  0.56977 0.840
## 42  4.37e-02 -0.165424 -0.204653  0.074511  0.38454 -0.143108 -0.48884 1.070
## 43 -7.13e-02  0.126319  0.025438  0.046399 -0.07655  0.010200 -0.16380 1.260
## 44  8.07e-02 -0.059536 -0.032044 -0.066711 -0.04215  0.017907 -0.13986 1.406
## 45  8.12e-02 -0.114766  0.141177 -0.211930 -0.08348  0.062755  0.30578 1.120
## 46 -2.48e-01  0.024011  0.183820  0.119999 -0.06085  0.237227 -0.32834 1.267
## 47 -6.48e-02  0.320632 -0.215571  0.207085 -0.11204 -0.050481  0.50229 0.606
## 48  2.95e-02  0.017113 -0.022196 -0.023022 -0.02546 -0.018692 -0.07018 1.249
## 49 -3.44e-01  0.677299 -0.118340  0.573058 -0.07512 -0.254952 -1.00247 0.751
## 50 -9.82e-02  0.417668  0.058359  0.019976 -0.45427  0.096203  0.67136 0.695
##      cook.d    hat inf
## 1  2.13e-03 0.0501    
## 2  1.07e-01 0.1531    
## 3  6.76e-03 0.1292    
## 4  5.31e-02 0.2761   *
## 5  2.39e-03 0.0660    
## 6  5.49e-03 0.1092    
## 7  1.04e-03 0.1316    
## 8  8.00e-03 0.1128    
## 9  2.17e-03 0.0571    
## 10 3.58e-04 0.0507    
## 11 1.90e-02 0.1108    
## 12 5.90e-02 0.2533    
## 13 2.38e-02 0.0810    
## 14 3.46e-02 0.1585    
## 15 1.87e-02 0.0610    
## 16 2.00e-04 0.1859    
## 17 8.12e-03 0.1964    
## 18 6.88e-02 0.1411    
## 19 2.11e-04 0.0638    
## 20 2.02e-05 0.0697    
## 21 3.87e-02 0.1351    
## 22 4.03e-02 0.1159    
## 23 1.27e-06 0.0777    
## 24 1.16e-04 0.0491    
## 25 7.79e-03 0.0461    
## 26 5.57e-04 0.1501    
## 27 4.01e-02 0.0959    
## 28 2.72e-02 0.1333    
## 29 4.55e-03 0.1148    
## 30 6.29e-02 0.2127    
## 31 1.56e-02 0.0943    
## 32 2.86e-02 0.1937    
## 33 6.72e-03 0.0593    
## 34 1.32e-03 0.1998   *
## 35 7.36e-02 0.2035    
## 36 2.89e-02 0.1159    
## 37 5.02e-03 0.0777    
## 38 8.58e-04 0.0491    
## 39 2.75e-03 0.0461    
## 40 7.41e-04 0.1782    
## 41 5.17e-02 0.0959    
## 42 3.93e-02 0.1333    
## 43 4.55e-03 0.1148    
## 44 3.33e-03 0.1928    
## 45 1.56e-02 0.0943    
## 46 1.81e-02 0.1618    
## 47 3.84e-02 0.0462    
## 48 8.39e-04 0.0875    
## 49 1.55e-01 0.1769    
## 50 6.96e-02 0.0910

Kombinasi model tanpa dan dengan amatan yang termasuk laverage dan pencilan

model2_tanpa47 = lm(Y~X1+X2+X3+X4+X5, data=dataframe02[-47,])
model2_tanpa50 = lm(Y~X1+X2+X3+X4+X5, data=dataframe02[-50,])
model2_tanpa49 = lm(Y~X1+X2+X3+X4+X5, data=dataframe02[-49,])
model2_tanpa12 = lm(Y~X1+X2+X3+X4+X5, data=dataframe02[-12,])
model2_tanpa4 = lm(Y~X1+X2+X3+X4+X5, data=dataframe02[-4,])
model2_tanpa4750 = lm(Y~X1+X2+X3+X4+X5, data=dataframe02[-c(47,50),])
model2_tanpa4749 = lm(Y~X1+X2+X3+X4+X5, data=dataframe02[-c(47,49),])

summary(model2_tanpa47)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5, data = dataframe02[-47, 
##     ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0432  -3.0228  -0.2936   2.6243  10.5265 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.767e+01  2.016e+01  -0.876    0.386    
## X1           5.158e-01  5.219e-01   0.988    0.329    
## X2           2.883e-03  4.145e-03   0.696    0.490    
## X3          -7.944e-04  7.386e-04  -1.076    0.288    
## X4           2.653e-01  6.000e-02   4.422 6.55e-05 ***
## X5          -4.458e-02  1.070e-01  -0.417    0.679    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.909 on 43 degrees of freedom
## Multiple R-squared:  0.6299, Adjusted R-squared:  0.5869 
## F-statistic: 14.64 on 5 and 43 DF,  p-value: 2.216e-08
summary(model2_tanpa50)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5, data = dataframe02[-50, 
##     ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.9578 -3.0717 -0.6841  2.7800 11.4422 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.698e+01  2.032e+01  -0.836    0.408    
## X1           4.640e-01  5.306e-01   0.875    0.387    
## X2           1.751e-03  4.159e-03   0.421    0.676    
## X3          -6.569e-04  7.410e-04  -0.887    0.380    
## X4           2.860e-01  6.174e-02   4.633 3.34e-05 ***
## X5          -6.036e-02  1.079e-01  -0.559    0.579    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.946 on 43 degrees of freedom
## Multiple R-squared:  0.6296, Adjusted R-squared:  0.5865 
## F-statistic: 14.62 on 5 and 43 DF,  p-value: 2.264e-08
summary(model2_tanpa49)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5, data = dataframe02[-49, 
##     ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.5880 -3.5247 -0.8706  3.2973 11.3088 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.200e+01  2.052e+01  -0.585    0.562    
## X1           3.295e-01  5.445e-01   0.605    0.548    
## X2           2.485e-03  4.156e-03   0.598    0.553    
## X3          -1.066e-03  7.652e-04  -1.393    0.171    
## X4           2.631e-01  6.030e-02   4.364 7.87e-05 ***
## X5          -2.255e-02  1.084e-01  -0.208    0.836    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.937 on 43 degrees of freedom
## Multiple R-squared:  0.6284, Adjusted R-squared:  0.5852 
## F-statistic: 14.54 on 5 and 43 DF,  p-value: 2.413e-08
summary(model2_tanpa12)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5, data = dataframe02[-12, 
##     ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2160  -3.1696  -0.4858   3.0525  11.2731 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.7901917 23.7510911  -0.328 0.744509    
## X1           0.4319961  0.5931766   0.728 0.470392    
## X2           0.0014786  0.0043466   0.340 0.735377    
## X3          -0.0009784  0.0008369  -1.169 0.248810    
## X4           0.2760616  0.0649803   4.248 0.000113 ***
## X5          -0.0892241  0.1183572  -0.754 0.455045    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.136 on 43 degrees of freedom
## Multiple R-squared:  0.6028, Adjusted R-squared:  0.5566 
## F-statistic: 13.05 on 5 and 43 DF,  p-value: 9.566e-08
summary(model2_tanpa4)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5, data = dataframe02[-4, 
##     ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.7290 -3.0205 -0.5285  3.7852 10.9419 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.622e+01  2.258e+01  -1.161  0.25185    
## X1           7.382e-01  5.455e-01   1.353  0.18306    
## X2           3.443e-03  4.610e-03   0.747  0.45925    
## X3          -6.436e-04  7.714e-04  -0.834  0.40868    
## X4           2.640e-01  6.312e-02   4.182  0.00014 ***
## X5          -3.485e-02  1.134e-01  -0.307  0.76016    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.148 on 43 degrees of freedom
## Multiple R-squared:  0.5861, Adjusted R-squared:  0.5379 
## F-statistic: 12.18 on 5 and 43 DF,  p-value: 2.229e-07
summary(model2_tanpa4750)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5, data = dataframe02[-c(47, 
##     50), ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3479  -2.9873  -0.5536   2.7732   8.8884 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.550e+01  1.918e+01  -0.808    0.424    
## X1           2.784e-01  5.059e-01   0.550    0.585    
## X2           2.671e-03  3.940e-03   0.678    0.502    
## X3          -8.174e-04  7.019e-04  -1.164    0.251    
## X4           2.946e-01  5.834e-02   5.049 9.07e-06 ***
## X5          -5.526e-02  1.018e-01  -0.543    0.590    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.665 on 42 degrees of freedom
## Multiple R-squared:  0.6643, Adjusted R-squared:  0.6244 
## F-statistic: 16.62 on 5 and 42 DF,  p-value: 4.9e-09
summary(model2_tanpa4749)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5, data = dataframe02[-c(47, 
##     49), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.9216 -3.1845 -0.5937  2.8180 10.8472 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.039e+01  1.940e+01  -0.535   0.5952    
## X1           1.446e-01  5.198e-01   0.278   0.7823    
## X2           3.424e-03  3.945e-03   0.868   0.3903    
## X3          -1.240e-03  7.263e-04  -1.707   0.0952 .  
## X4           2.703e-01  5.704e-02   4.738 2.48e-05 ***
## X5          -1.591e-02  1.024e-01  -0.155   0.8773    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.664 on 42 degrees of freedom
## Multiple R-squared:  0.6631, Adjusted R-squared:  0.623 
## F-statistic: 16.53 on 5 and 42 DF,  p-value: 5.271e-09

Sejauh ini didapati model dengan R-squared tertinggi yaitu model tanpa amatan ke 47 dan 50.

Uji Asumsi

1. Nilai Harapan Galat = 0 (T-test)

t.test(model2_tanpa4750$residuals, mu=0, conf.level = 0.95)
## 
##  One Sample t-test
## 
## data:  model2_tanpa4750$residuals
## t = 4.5504e-17, df = 47, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -1.28059  1.28059
## sample estimates:
##    mean of x 
## 2.896627e-17
plot(model2_tanpa4750, 1)

P-value > alpha (0,05) –> Terima H0 (Nilai Harapan Galat = 0)

2. Ragam Galat Homogen (Breusch-Pagan)

library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
bptest(model2_tanpa4750)
## 
##  studentized Breusch-Pagan test
## 
## data:  model2_tanpa4750
## BP = 2.2868, df = 5, p-value = 0.8082

P-value > alpha (0,05) –> Terima H0 (Ragam Galat Homogen)

3. Galat Saling Bebas (Durbin-Watson)

tanpa4750 = dataframe02[-c(47,50),]

plot(x = 1:dim(tanpa4750)[1],
     y = model2_tanpa4750$residuals,
     type = 'b',
     ylab = "Residuals",
     xlab = "Obeservation")

dwtest(model2_tanpa4750)
## 
##  Durbin-Watson test
## 
## data:  model2_tanpa4750
## DW = 1.8807, p-value = 0.4112
## alternative hypothesis: true autocorrelation is greater than 0

P-value > alpha (0,05) –> Terima H0 (Galat Saling Bebas)

4. Galat Menyebar Normal (Saphiro-Wilk)

plot(model2_tanpa4750, 2)

shapiro.test(model2_tanpa4750$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  model2_tanpa4750$residuals
## W = 0.98142, p-value = 0.6387

P-value > alpha (0,05) –> Terima H0 (Galat Menyebar Normal)

library(olsrr)
## Warning: package 'olsrr' was built under R version 4.3.2
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
ols_step_best_subset(model2_tanpa4750)
##    Best Subsets Regression   
## -----------------------------
## Model Index    Predictors
## -----------------------------
##      1         X4             
##      2         X1 X4          
##      3         X2 X3 X4       
##      4         X1 X2 X3 X4    
##      5         X1 X2 X3 X4 X5 
## -----------------------------
## 
##                                                     Subsets Regression Summary                                                     
## -----------------------------------------------------------------------------------------------------------------------------------
##                        Adj.        Pred                                                                                             
## Model    R-Square    R-Square    R-Square     C(p)       AIC         SBIC        SBC         MSEP         FPE       HSP       APC  
## -----------------------------------------------------------------------------------------------------------------------------------
##   1        0.6281      0.6200      0.6022    2.5358    288.5865    152.4923    294.2001    1056.9484    22.9364    0.4893    0.4043 
##   2        0.6519      0.6364      0.6121    1.5563    287.4105    151.7837    294.8953    1011.7606    22.3838    0.4788    0.3945 
##   3        0.6576      0.6343      0.5932    2.8404    288.6150    153.3313    297.9710    1018.2723    22.9577    0.4928    0.4047 
##   4        0.6620      0.6305      0.5677    4.2945    289.9994    155.0939    301.2266    1029.2339    23.6383    0.5097    0.4166 
##   5        0.6643      0.6244      0.5455    6.0000    291.6640    157.1194    304.7624    1046.9947    24.4861    0.5309    0.4316 
## -----------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria 
##  SBIC: Sawa's Bayesian Information Criteria 
##  SBC: Schwarz Bayesian Criteria 
##  MSEP: Estimated error of prediction, assuming multivariate normality 
##  FPE: Final Prediction Error 
##  HSP: Hocking's Sp 
##  APC: Amemiya Prediction Criteria
modelfinal = lm(Y ~ X1+X4, data=dataframe02[-c(47,50),])
summary(modelfinal)
## 
## Call:
## lm(formula = Y ~ X1 + X4, data = dataframe02[-c(47, 50), ])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.329 -2.658 -1.124  3.767  9.553 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -29.29859    8.45268  -3.466  0.00117 ** 
## X1            0.67103    0.38246   1.754  0.08616 .  
## X4            0.27761    0.04507   6.160 1.81e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.59 on 45 degrees of freedom
## Multiple R-squared:  0.6519, Adjusted R-squared:  0.6364 
## F-statistic: 42.13 on 2 and 45 DF,  p-value: 4.886e-11
ols_step_both_p(model2_tanpa4750, details = TRUE)
## Stepwise Selection Method 
## -------------------------
## 
## Candidate Terms: 
## 
## 1. X1 
## 2. X2 
## 3. X3 
## 4. X4 
## 5. X5 
## 
## 
## Step   => 0 
## Model  => Y ~ 1 
## R2     => 0 
## 
## Initiating stepwise selection... 
## 
## Step      => 1 
## Selected  => X4 
## Model     => Y ~ X4 
## R2        => 0.628 
## 
## Step      => 2 
## Selected  => X1 
## Model     => Y ~ X4 + X1 
## R2        => 0.652 
## 
## 
## No more variables to be added or removed.
## 
## 
##                              Stepwise Summary                              
## -------------------------------------------------------------------------
## Step    Variable        AIC        SBC       SBIC        R2       Adj. R2 
## -------------------------------------------------------------------------
##  0      Base Model    334.060    337.803    195.850    0.00000    0.00000 
##  1      X4 (+)        288.587    294.200    152.492    0.62807    0.61998 
##  2      X1 (+)        287.411    294.895    151.784    0.65188    0.63641 
## -------------------------------------------------------------------------
## 
## Final Model Output 
## ------------------
## 
##                          Model Summary                          
## ---------------------------------------------------------------
## R                       0.807       RMSE                 4.444 
## R-Squared               0.652       MSE                 21.067 
## Adj. R-Squared          0.636       Coef. Var           10.896 
## Pred R-Squared          0.612       AIC                287.411 
## MAE                     3.648       SBC                294.895 
## ---------------------------------------------------------------
##  RMSE: Root Mean Square Error 
##  MSE: Mean Square Error 
##  MAE: Mean Absolute Error 
##  AIC: Akaike Information Criteria 
##  SBC: Schwarz Bayesian Criteria 
## 
##                                ANOVA                                 
## --------------------------------------------------------------------
##                 Sum of                                              
##                Squares        DF    Mean Square      F         Sig. 
## --------------------------------------------------------------------
## Regression    1775.229         2        887.615    42.133    0.0000 
## Residual       948.021        45         21.067                     
## Total         2723.250        47                                    
## --------------------------------------------------------------------
## 
##                                     Parameter Estimates                                     
## -------------------------------------------------------------------------------------------
##       model       Beta    Std. Error    Std. Beta      t        Sig       lower      upper 
## -------------------------------------------------------------------------------------------
## (Intercept)    -29.299         8.453                 -3.466    0.001    -46.323    -12.274 
##          X4      0.278         0.045        0.677     6.160    0.000      0.187      0.368 
##          X1      0.671         0.382        0.193     1.754    0.086     -0.099      1.441 
## -------------------------------------------------------------------------------------------