(Kelompok 9)

Khansa Mutiara Kheeda (162112133110)

Naila Shakira (162112133052)

Andi Syaiful Rahmat (502210010029)

Set Libray

melb <- read.csv("E:/UNHAS/SEMESTER 5 (PMM UNAIR)/5 (KAMIS & JUMAT) EKSPLORASI DAN VISUALISASI DATA/PEKAN 11/melb_data.csv")

Dataset yang digunakan dalam tugas ini berisi informasi tentang perumahan di Melbourne. Data ini dapat di download pada link berikut https://www.kaggle.com/datasets/dansbecker/melbourne-housing-snapshot. Data ini berisikan 13580 baris informasi sehingga dalam pengolahan data yang dilakukan hanya diambil 1000 baris data terakhir. Pengambilan data 1000 baris terakhir didasarkan pada letak missing value pada dataset yang dominan berada pada akhir data

Library

library(tidyverse)
library(gridExtra)
library(heatmaply)    
library(visdat)       
library(reshape2)   
library(tidyr)        
library(ggplot2)      
library(psych)        
library(DataExplorer)

Mengambil 1000 data

melb = melb%>% slice(12581:13580)

Membuat Data Frame

price = melb$Price
rooms = melb$Rooms
distance = melb$Distance
bathroom = melb$Bathroom
car = melb$Car
landsize = melb$Landsize
buildingarea = melb$BuildingArea
yearbuilt = melb$YearBuilt
lattitude = melb$Lattitude
longtitude = melb$Longtitude

melb = data.frame(price, rooms, distance, bathroom, car, landsize, buildingarea, yearbuilt, lattitude, longtitude)

Mengecek Data Hilang

sapply(melb, function(x)sum(is.na(x)))

##        price        rooms     distance     bathroom          car     landsize 
##            0            0            0            0           46            0 
## buildingarea    yearbuilt    lattitude   longtitude 
##          483          425            0            0

vis_miss(melb, cluster = TRUE)

Dari data di atas diketahui terdapat 3 variabel yang mempunyai data hilang (NA). Ketiga variabel tersebut ialah data pada kolom Car, Building Area, dan Year Built, yang masing-masing memiliki data hilang sebanyak 46, 483, dan 425 baris. Secara keselurahan, penyebaran data hilang (missing value) pada data tersebut ditunjukkan oleh grafik di atas dengan persentase data yang hilang sebesar 9,5% dan sebesar 90,5% data yang terisi.

Mengisi Missing Value

Untuk mengatasi hal ini, kita gantikan semua missing value pada ketiga atribut tersebut dengan nilai rata-rata dari masing-masing atribut dengan outlier.

rata.car = mean(car, na.rm = T)
car[which(is.na(car))] = rata.car

rata.buildingarea = mean(buildingarea, na.rm = T)
buildingarea[which(is.na(buildingarea))] = rata.buildingarea

rata.yearbuilt = mean(yearbuilt, na.rm = T)
yearbuilt[which(is.na(yearbuilt))] = rata.yearbuilt

Membuat Scatterplot Untuk Data Frame Dengan Outliers

Berikut adalah grafik scatterplot dari ketiga atribut yang masih memiliki outlier.

outs_plot = ggplot(data = melb, aes(x = car, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Plot (1)")

outs_plot1 = ggplot(data = melb, aes(x = buildingarea, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Plot (2)")

outs_plot2 = ggplot(data = melb, aes(x = yearbuilt, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Plot (3)")

outs_plot3 = ggplot(data = melb, aes(x = yearbuilt+buildingarea+car, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Plot (1, 2, & 3)")

gridExtra::grid.arrange(outs_plot, outs_plot1, outs_plot2, ncol=3)

gridExtra::grid.arrange(outs_plot3, ncol=1)

Menetapkan Model Regresi Linear Terhadap Data Frame Dengan Outliers

model   <- lm(price ~ car, data = melb)
summary(model)

## 
## Call:
## lm(formula = price ~ car, data = melb)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -886355 -431176 -156220  250052 3883824 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1026086      43063  23.828  < 2e-16 ***
## car            77545      19740   3.928 9.17e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 611700 on 952 degrees of freedom
##   (46 observations deleted due to missingness)
## Multiple R-squared:  0.01595,    Adjusted R-squared:  0.01492 
## F-statistic: 15.43 on 1 and 952 DF,  p-value: 9.173e-05

model1 <- lm(price ~ buildingarea, data = melb)
summary(model1)

## 
## Call:
## lm(formula = price ~ buildingarea, data = melb)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -904892 -453673 -159942  261009 2870631 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.188e+06  2.811e+04  42.251   <2e-16 ***
## buildingarea 8.586e+00  1.430e+01   0.601    0.548    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 634100 on 515 degrees of freedom
##   (483 observations deleted due to missingness)
## Multiple R-squared:  0.0006997,  Adjusted R-squared:  -0.001241 
## F-statistic: 0.3606 on 1 and 515 DF,  p-value: 0.5484

model2 <- lm(price ~ yearbuilt, data = melb)
summary(model2)

## 
## Call:
## lm(formula = price ~ yearbuilt, data = melb)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -966994 -401271 -176273  228791 3533288 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 14531484.4  1395120.0  10.416   <2e-16 ***
## yearbuilt      -6781.3      709.7  -9.555   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 601800 on 573 degrees of freedom
##   (425 observations deleted due to missingness)
## Multiple R-squared:  0.1374, Adjusted R-squared:  0.1359 
## F-statistic:  91.3 on 1 and 573 DF,  p-value: < 2.2e-16

model3 <- lm(price ~ yearbuilt+buildingarea+car, data = melb)
summary(model3)

## 
## Call:
## lm(formula = price ~ yearbuilt + buildingarea + car, data = melb)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1699737  -322776  -107803   205303  3122997 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  16963710.9  1395107.9  12.159   <2e-16 ***
## yearbuilt       -8347.2      713.4 -11.700   <2e-16 ***
## buildingarea     3349.4      306.5  10.926   <2e-16 ***
## car             53899.2    23384.5   2.305   0.0216 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 512300 on 476 degrees of freedom
##   (520 observations deleted due to missingness)
## Multiple R-squared:  0.3414, Adjusted R-squared:  0.3373 
## F-statistic: 82.26 on 3 and 476 DF,  p-value: < 2.2e-16

Mencari Jarak Cook Untuk Setiap Pengamatan Dalam Data Frame

cooksD <- cooks.distance(model)
cooksD1 <- cooks.distance(model1)
cooksD2 <- cooks.distance(model2)
cooksD3 <- cooks.distance(model3)

Membuat Plot Jarak Cook Dengan Garis Horisontal pada 4/n Untuk Melihat Pengamatan Mana yang Melebihi Ambang Batas

n <- nrow(melb)
plot(cooksD, main = "Cooks Distance for Influential Obs (1)")
abline(h = 4/n, lty = 5, col = "red") #add cutoff line

plot(cooksD1, main = "Cooks Distance for Influential Obs (2)")
abline(h = 4/n, lty = 5, col = "red") #add cutoff line

plot(cooksD2, main = "Cooks Distance for Influential Obs (3)")
abline(h = 4/n, lty = 5, col = "red") #add cutoff line

plot(cooksD3, main = "Cooks Distance for Influential Obs (1, 2, & 3)")
abline(h = 4/n, lty = 5, col = "red") #add cutoff line

Mengidentifikasi Influential Points

influential_obs <- as.numeric(names(cooksD)[(cooksD > (4/n))])
influential_obs

##  [1]  21  37  67  83  87 112 124 125 130 157 174 183 192 193 208 237 303 335 339
## [20] 349 384 421 434 517 525 540 558 609 618 620 643 698 717 729 757 762 765 767
## [39] 787 834 848 887 889 891 895 900 904 907 911 939 948 976 999

influential_obs1 <- as.numeric(names(cooksD1)[(cooksD1 > (4/n))])
influential_obs1

##  [1]  21  37  64  67  96 192 193 208 309 396 505 517 522 559 618 620 645 666 717
## [20] 762 765 769 788 855 887 889 895 904 939 999

influential_obs2 <- as.numeric(names(cooksD2)[(cooksD2 > (4/n))])
influential_obs2

##  [1]   3   4  21  29  37  64  67  74  77  84  86  89  96  97  98 164 169 182 183
## [20] 192 208 214 264 277 307 309 314 321 323 352 359 384 396 505 508 517 522 559
## [39] 617 618 620 623 636 645 646 709 710 717 718 728 757 762 765 769 787 788 789
## [58] 798 808 834 855 887 889 895 904 929 939 942 974 978 999

influential_obs3 <- as.numeric(names(cooksD3)[(cooksD3 > (4/n))])
influential_obs3

##  [1]   4  21  37  64  67  82  86  96 128 165 189 192 208 296 314 321 323 336 348
## [20] 352 361 384 386 461 465 505 517 522 559 601 617 618 620 636 698 709 717 728
## [39] 731 757 762 765 769 786 788 791 855 887 889 895 900 904 911 920 939 942 948
## [58] 999

Mendefinisikan Data Frame Terbaru Dengan Influential Points Yang Telah Dihapus

define new data frame with influential points removed

outliers_removed <- melb[-influential_obs,]
outliers_removed1 <- melb[-influential_obs1,]
outliers_removed2 <- melb[-influential_obs2,]
outliers_removed3 <- melb[-influential_obs3,]

Membuat Scatterplot Dengan Outliers Present

outliers_present <- ggplot(data = melb, aes(x = car , y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Present (1)")

outliers_present1 <- ggplot(data = melb, aes(x = buildingarea, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Present (2)")

outliers_present2 <- ggplot(data = melb, aes(x = yearbuilt, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Present (3)")

outliers_present3 <- ggplot(data = melb, aes(x = yearbuilt+buildingarea+car, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Present (1, 2, & 3)")

Membuat Scatterplot Dengan Ooutliers Removed

outliers_removed <- ggplot(data = outliers_removed, aes(x = car, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Removed (1)")

outliers_removed1 <- ggplot(data = outliers_removed1, aes(x = buildingarea, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Removed (2)")

outliers_removed2 <- ggplot(data = outliers_removed2, aes(x = yearbuilt, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Removed (3)")

outliers_removed3 <- ggplot(data = outliers_removed3, aes(x = yearbuilt+buildingarea+car, y = price)) +
  geom_point() +
  geom_smooth(method = lm) +
  ggtitle("Outliers Removed (1, 2, & 3)")

Menampilkan Scatterplot

plot both scatterplots side by side

gridExtra::grid.arrange(outliers_present, outliers_removed, ncol = 2)

gridExtra::grid.arrange(outliers_present1, outliers_removed1, ncol = 2)

gridExtra::grid.arrange(outliers_present2, outliers_removed2, ncol = 2)

gridExtra::grid.arrange(outliers_present3, outliers_removed3, ncol = 2)

Menghitung Nilai DFFITS

dffit  <- as.data.frame(dffits(model))
head(dffit)

dffit1 <- as.data.frame(dffits(model1))
head(dffit1)

dffit2 <- as.data.frame(dffits(model2))
head(dffit2)

dffit3 <- as.data.frame(dffits(model3))
head(dffit3)

#find number of predictors in model
p  <- length(model$coefficients)-1
p1 <- length(model1$coefficients)-1
p2 <- length(model2$coefficients)-1
p3 <- length(model3$coefficients)-1

#find number of observations
n <- nrow(melb)

#calculate DFFITS threshold value
threshdf  <- 2*sqrt(p/n)
threshdf1 <- 2*sqrt(p1/n)
threshdf2 <- 2*sqrt(p2/n)
threshdf3 <- 2*sqrt(p3/n)

threshdf

## [1] 0.06324555

threshdf1

## [1] 0.06324555

threshdf2

## [1] 0.06324555

threshdf3

## [1] 0.1095445

## sort observations by DFFITS, descending
sort_dffit <- dffit[order(-dffit),]
sort_dffit1 <- dffit1[order(-dffit1),]
sort_dffit2 <- dffit2[order(-dffit2),]
sort_dffit3 <- dffit2[order(-dffit3),]

head(sort_dffit)

## [1] 0.4068252 0.3431635 0.2995003 0.2171134 0.2154575 0.2140119

head(sort_dffit1)

## [1] 0.2034640 0.1836657 0.1716274 0.1699512 0.1603054 0.1553892

head(sort_dffit2)

## [1] 0.4304203 0.3264060 0.2708635 0.2678575 0.2274852 0.2170418

head(sort_dffit3)

## [1]  0.326405982 -0.017145393  0.000232594  0.047296398 -0.040001980
## [6]  0.013443488

Membuat Visualisasi Untuk Nilai DFFITS

# plot DFFITS values for each observation
plot(dffits(model), type = 'h')

#add horizontal lines at absolute values for threshold
abline(h = threshdf, lty = 2)
abline(h = -threshdf, lty = 2)

plot(dffits(model1), type = 'h')

#add horizontal lines at absolute values for threshold
abline(h = threshdf1, lty = 2)
abline(h = -threshdf1, lty = 2)

plot(dffits(model2), type = 'h')

#add horizontal lines at absolute values for threshold
abline(h = threshdf2, lty = 2)
abline(h = -threshdf2, lty = 2)

plot(dffits(model3), type = 'h')

#add horizontal lines at absolute values for threshold
abline(h = threshdf3, lty = 2)
abline(h = -threshdf3, lty = 2)

Menghitung Nilai DFBETAS

#calculate DFBETAS for each observation in the model
dfbeta <- as.data.frame(dfbetas(model))
dfbeta1 <- as.data.frame(dfbetas(model1))
dfbeta2 <- as.data.frame(dfbetas(model2))
dfbeta3 <- as.data.frame(dfbetas(model3))

#display DFBETAS for each observation
head(dfbeta)

head(dfbeta1)

head(dfbeta2)

head(dfbeta3)

#calculate DFBETAS threshold value
threshdb <- 2/sqrt(n)
threshdb

## [1] 0.06324555

Membuat Visualisasi Untuk Nilai DFBETAS

#specify 2 rows and 1 column in plotting region
par(mfrow=c(2,1))

#plot DFBETAS for disp with threshold lines
plotdb <- {plot(dfbeta, type='h')
  abline(h = threshdb, lty = 2)
  abline(h = -threshdb, lty = 2)}

plotdb1 <- {plot(dfbeta1, type='h')
  abline(h = threshdb, lty = 2)
  abline(h = -threshdb, lty = 2)}

plotdb2 <- {plot(dfbeta2, type='h')
  abline(h = threshdb, lty = 2)
  abline(h = -threshdb, lty = 2)}

plotdb3_1 <- {plot(dfbeta3$car, type='h')
  abline(h = threshdb, lty = 2)
  abline(h = -threshdb, lty = 2)}

plotdb3_2 <- {plot(dfbeta3$buildingarea, type='h')
  abline(h = threshdb, lty = 2)
  abline(h = -threshdb, lty = 2)}

plotdb3_3 <- {plot(dfbeta3$yearbuilt, type='h')
  abline(h = threshdb, lty = 2)
  abline(h = -threshdb, lty = 2)}

Menghitung Nilai COVRATIO

summary(influence.measures(model))

## Potentially influential observations of
##   lm(formula = price ~ car, data = melb) :
## 
##     dfb.1_ dfb.car dffit   cov.r   cook.d hat    
## 4    0.00   0.00    0.00    1.01_*  0.00   0.00  
## 5   -0.01   0.02    0.02    1.01_*  0.00   0.01  
## 21   0.11  -0.08    0.12    0.99_*  0.01   0.00  
## 26   0.04  -0.03    0.04    1.01_*  0.00   0.00  
## 37   0.05   0.01    0.13    0.97_*  0.01   0.00  
## 40   0.03  -0.04   -0.05    1.01_*  0.00   0.01  
## 52   0.01  -0.01    0.01    1.01_*  0.00   0.00  
## 60  -0.02   0.02    0.03    1.01_*  0.00   0.01  
## 64   0.03   0.01    0.08    0.99_*  0.00   0.00  
## 67  -0.18   0.27    0.30_*  0.97_*  0.04   0.01  
## 96   0.04   0.01    0.09    0.99_*  0.00   0.00  
## 97   0.03   0.01    0.08    0.99_*  0.00   0.00  
## 112  0.04   0.01    0.09    0.99_*  0.00   0.00  
## 124  0.09  -0.12   -0.13    1.01_*  0.01   0.01_*
## 125  0.15  -0.19   -0.19_*  1.02_*  0.02   0.02_*
## 130  0.11  -0.14   -0.15_*  1.01_*  0.01   0.01_*
## 141  0.00   0.00    0.00    1.01_*  0.00   0.01  
## 157  0.15  -0.19   -0.20_*  1.02_*  0.02   0.02_*
## 162  0.03   0.00    0.07    0.99_*  0.00   0.00  
## 164  0.03   0.00    0.07    0.99_*  0.00   0.00  
## 183  0.08   0.01    0.21_*  0.92_*  0.02   0.00  
## 188  0.01  -0.01    0.01    1.01_*  0.00   0.00  
## 192 -0.04   0.08    0.12    0.99_*  0.01   0.00  
## 193  0.05   0.01    0.12    0.98_*  0.01   0.00  
## 213  0.00  -0.01   -0.01    1.01_*  0.00   0.01  
## 223  0.02  -0.02    0.02    1.01_*  0.00   0.00  
## 224  0.01  -0.02   -0.02    1.01_*  0.00   0.01  
## 228  0.03  -0.04   -0.05    1.01_*  0.00   0.01  
## 242 -0.02   0.01   -0.02    1.01_*  0.00   0.00  
## 253  0.04  -0.03    0.04    1.01_*  0.00   0.00  
## 264 -0.02   0.02   -0.02    1.01_*  0.00   0.00  
## 288  0.01  -0.01   -0.01    1.02_*  0.00   0.02_*
## 299  0.01  -0.01   -0.02    1.01_*  0.00   0.01  
## 318  0.03   0.00    0.07    0.99_*  0.00   0.00  
## 324  0.03   0.00    0.08    0.99_*  0.00   0.00  
## 335 -0.13   0.19    0.22_*  0.99_*  0.02   0.01  
## 337  0.03   0.01    0.08    0.99_*  0.00   0.00  
## 339  0.09  -0.13   -0.13    1.01_*  0.01   0.01_*
## 349 -0.09   0.13    0.15_*  1.00    0.01   0.01  
## 361 -0.01   0.02    0.02    1.01_*  0.00   0.01  
## 386  0.02  -0.02   -0.03    1.01_*  0.00   0.01  
## 393 -0.03   0.03   -0.03    1.01_*  0.00   0.00  
## 394  0.03  -0.05   -0.06    1.01_*  0.00   0.01  
## 399  0.03  -0.03    0.03    1.01_*  0.00   0.00  
## 415  0.03   0.00    0.07    0.99_*  0.00   0.00  
## 421  0.10  -0.07    0.11    0.99_*  0.01   0.00  
## 434 -0.07   0.16    0.21_*  0.96_*  0.02   0.00  
## 435 -0.03   0.05    0.05    1.01_*  0.00   0.01  
## 460  0.02  -0.02    0.02    1.01_*  0.00   0.00  
## 465  0.03  -0.05   -0.06    1.01_*  0.00   0.01  
## 481  0.03   0.00    0.07    0.99_*  0.00   0.00  
## 505  0.03   0.00    0.07    0.99_*  0.00   0.00  
## 516  0.05  -0.07   -0.08    1.01_*  0.00   0.01_*
## 517  0.04   0.01    0.10    0.98_*  0.00   0.00  
## 522  0.03   0.00    0.07    0.99_*  0.00   0.00  
## 525  0.07  -0.10   -0.11    1.01_*  0.01   0.01_*
## 529 -0.01   0.01    0.01    1.01_*  0.00   0.01  
## 540 -0.04   0.08    0.12    0.99_*  0.01   0.00  
## 558 -0.33   0.40    0.41_*  1.03_*  0.08   0.04_*
## 559  0.03   0.01    0.09    0.99_*  0.00   0.00  
## 609  0.17  -0.15    0.17_*  1.00    0.01   0.00  
## 618 -0.08   0.13    0.14_*  1.00    0.01   0.01  
## 620  0.05   0.01    0.13    0.97_*  0.01   0.00  
## 628  0.01  -0.01   -0.02    1.01_*  0.00   0.01  
## 637  0.03   0.00    0.08    0.99_*  0.00   0.00  
## 643  0.08  -0.11   -0.11    1.01_*  0.01   0.01_*
## 666  0.01  -0.01   -0.01    1.01_*  0.00   0.01_*
## 672  0.03  -0.03    0.03    1.01_*  0.00   0.00  
## 683 -0.02   0.02    0.02    1.01_*  0.00   0.01_*
## 698  0.08  -0.11   -0.11    1.02_*  0.01   0.02_*
## 700  0.02  -0.02   -0.02    1.02_*  0.00   0.02_*
## 709 -0.04   0.04   -0.04    1.01_*  0.00   0.00  
## 713  0.03  -0.05   -0.06    1.01_*  0.00   0.01  
## 717  0.11  -0.08    0.11    0.99_*  0.01   0.00  
## 729  0.15  -0.14    0.15_*  1.00    0.01   0.00  
## 741  0.01  -0.02   -0.02    1.01_*  0.00   0.01  
## 762  0.20  -0.15    0.22_*  0.96_*  0.02   0.00  
## 767  0.04   0.01    0.10    0.98_*  0.00   0.00  
## 769  0.03   0.01    0.09    0.99_*  0.00   0.00  
## 787  0.04   0.01    0.11    0.98_*  0.01   0.00  
## 788  0.04   0.01    0.09    0.99_*  0.00   0.00  
## 792 -0.03   0.04    0.04    1.01_*  0.00   0.01_*
## 829  0.02  -0.03   -0.03    1.01_*  0.00   0.01  
## 831 -0.01   0.01   -0.01    1.01_*  0.00   0.00  
## 834 -0.14   0.18    0.19_*  1.02_*  0.02   0.02_*
## 838  0.04  -0.04    0.04    1.01_*  0.00   0.00  
## 855  0.03   0.01    0.08    0.99_*  0.00   0.00  
## 868  0.04  -0.03    0.04    1.01_*  0.00   0.00  
## 877  0.02  -0.02   -0.03    1.01_*  0.00   0.01  
## 882  0.03  -0.05   -0.05    1.01_*  0.00   0.01  
## 884  0.01  -0.02   -0.02    1.01_*  0.00   0.01  
## 887  0.05   0.01    0.11    0.98_*  0.01   0.00  
## 889  0.05   0.01    0.12    0.97_*  0.01   0.00  
## 895 -0.05   0.10    0.14_*  0.99_*  0.01   0.00  
## 900  0.09  -0.11   -0.12    1.02_*  0.01   0.02_*
## 904 -0.24   0.33    0.34_*  0.99_*  0.06   0.01_*
## 933 -0.02   0.02   -0.02    1.01_*  0.00   0.00  
## 934  0.00   0.00    0.00    1.01_*  0.00   0.00  
## 940  0.01  -0.01   -0.01    1.01_*  0.00   0.01  
## 944  0.03   0.00    0.08    0.99_*  0.00   0.00  
## 948  0.26  -0.32   -0.32_*  1.07_*  0.05   0.07_*
## 952 -0.03   0.05    0.06    1.01_*  0.00   0.01  
## 956 -0.01   0.01    0.01    1.01_*  0.00   0.01  
## 974  0.03   0.01    0.08    0.99_*  0.00   0.00  
## 976 -0.05   0.10    0.14_*  0.99_*  0.01   0.00  
## 998  0.01  -0.02   -0.02    1.01_*  0.00   0.01  
## 999 -0.13   0.18    0.19_*  1.01    0.02   0.01_*

summary(influence.measures(model1))

## Potentially influential observations of
##   lm(formula = price ~ buildingarea, data = melb) :
## 
##     dfb.1_     dfb.bldn   dffit      cov.r      cook.d     hat       
## 21      0.10       0.00       0.11       0.98_*     0.01       0.00  
## 37      0.17       0.01       0.17       0.95_*     0.01       0.00  
## 64      0.11       0.00       0.11       0.98_*     0.01       0.00  
## 67      0.18       0.02       0.18       0.94_*     0.02       0.00  
## 96      0.11       0.00       0.11       0.98_*     0.01       0.00  
## 192     0.11       0.00       0.11       0.98_*     0.01       0.00  
## 193     0.15       0.00       0.16       0.96_*     0.01       0.00  
## 309     0.09       0.00       0.09       0.99_*     0.00       0.00  
## 505     0.09       0.00       0.09       0.99_*     0.00       0.00  
## 517     0.13       0.00       0.13       0.97_*     0.01       0.00  
## 522     0.09       0.02       0.10       0.99_*     0.00       0.00  
## 559     0.11       0.00       0.11       0.98_*     0.01       0.00  
## 620     0.17      -0.01       0.17       0.95_*     0.01       0.00  
## 666    16.70_*  -204.21_*  -204.41_*   433.13_* 18295.05_*     1.00_*
## 717     0.10       0.00       0.10       0.98_*     0.01       0.00  
## 762     0.20      -0.01       0.20_*     0.93_*     0.02       0.00  
## 765     0.09       0.00       0.09       0.99_*     0.00       0.00  
## 769     0.11      -0.01       0.11       0.98_*     0.01       0.00  
## 788     0.11       0.00       0.11       0.98_*     0.01       0.00  
## 855     0.11      -0.01       0.11       0.98_*     0.01       0.00  
## 887     0.15       0.00       0.15       0.96_*     0.01       0.00  
## 889     0.16      -0.02       0.16       0.96_*     0.01       0.00  
## 895     0.13       0.01       0.13       0.97_*     0.01       0.00  
## 904     0.15       0.00       0.16       0.96_*     0.01       0.00

summary(influence.measures(model2))

## Potentially influential observations of
##   lm(formula = price ~ yearbuilt, data = melb) :
## 
##     dfb.1_ dfb.yrbl dffit   cov.r   cook.d hat    
## 13  -0.03   0.03    -0.03    1.01_*  0.00   0.01  
## 31  -0.04   0.04    -0.04    1.01_*  0.00   0.01  
## 37   0.23  -0.23     0.27_*  0.97_*  0.04   0.01  
## 64   0.07  -0.07     0.12    0.99_*  0.01   0.00  
## 67  -0.07   0.08     0.20_*  0.94_*  0.02   0.00  
## 96  -0.01   0.01     0.12    0.98_*  0.01   0.00  
## 97  -0.09   0.09     0.15    0.98_*  0.01   0.00  
## 164 -0.08   0.08     0.14    0.98_*  0.01   0.00  
## 170 -0.04   0.04    -0.04    1.01_*  0.00   0.01  
## 183  0.35  -0.35     0.43_*  0.89_*  0.09   0.01  
## 192 -0.18   0.19     0.23_*  0.97_*  0.03   0.01  
## 214 -0.02   0.02     0.09    0.99_*  0.00   0.00  
## 240 -0.02   0.02    -0.02    1.01_*  0.00   0.01  
## 251 -0.08   0.08    -0.08    1.01_*  0.00   0.01  
## 303  0.04  -0.04     0.05    1.01_*  0.00   0.01  
## 350 -0.03   0.03    -0.03    1.01_*  0.00   0.01  
## 352 -0.16   0.16     0.20_*  0.98_*  0.02   0.01  
## 358 -0.06   0.06    -0.07    1.01_*  0.00   0.01  
## 361 -0.04   0.04    -0.04    1.01_*  0.00   0.01  
## 383  0.06  -0.06     0.06    1.01_*  0.00   0.01  
## 396  0.12  -0.12     0.13    1.01    0.01   0.01_*
## 416 -0.02   0.02    -0.02    1.01_*  0.00   0.01  
## 469 -0.06   0.06    -0.07    1.01_*  0.00   0.01_*
## 512  0.06  -0.06     0.06    1.01_*  0.00   0.01  
## 517  0.15  -0.15     0.18_*  0.99_*  0.02   0.01  
## 518 -0.02   0.02    -0.02    1.01_*  0.00   0.01  
## 522 -0.14   0.14     0.19_*  0.98_*  0.02   0.00  
## 546 -0.03   0.03    -0.03    1.02_*  0.00   0.01_*
## 559 -0.08   0.09     0.15    0.98_*  0.01   0.00  
## 618 -0.04   0.04     0.11    0.99_*  0.01   0.00  
## 620  0.16  -0.16     0.22_*  0.96_*  0.02   0.00  
## 645  0.12  -0.12     0.13    1.01_*  0.01   0.01_*
## 673 -0.01   0.01    -0.01    1.01_*  0.00   0.01  
## 686  0.00   0.00     0.00    1.01_*  0.00   0.01  
## 757  0.10  -0.09     0.10    1.01_*  0.01   0.01_*
## 762  0.28  -0.28     0.33_*  0.95_*  0.05   0.01  
## 769  0.02  -0.02     0.11    0.98_*  0.01   0.00  
## 787  0.13  -0.12     0.17    0.98_*  0.01   0.00  
## 788  0.10  -0.10     0.14    0.99_*  0.01   0.00  
## 810  0.00   0.00     0.00    1.02_*  0.00   0.01_*
## 836 -0.03   0.03    -0.03    1.01_*  0.00   0.01_*
## 838 -0.05   0.05    -0.05    1.01_*  0.00   0.01  
## 839 -0.05   0.05    -0.05    1.01_*  0.00   0.01  
## 840  0.02  -0.02     0.03    1.01_*  0.00   0.01  
## 887  0.16  -0.16     0.20_*  0.98_*  0.02   0.00  
## 889 -0.20   0.20     0.27_*  0.95_*  0.04   0.00  
## 895  0.12  -0.11     0.16    0.98_*  0.01   0.00  
## 904  0.01  -0.01     0.15    0.96_*  0.01   0.00  
## 939 -0.16   0.16     0.20_*  0.98_*  0.02   0.01  
## 942 -0.13   0.14     0.17    0.98_*  0.02   0.00  
## 945 -0.04   0.04    -0.04    1.01_*  0.00   0.01

summary(influence.measures(model3))

## Potentially influential observations of
##   lm(formula = price ~ yearbuilt + buildingarea + car, data = melb) :
## 
##     dfb.1_ dfb.yrbl dfb.bldn dfb.car dffit   cov.r   cook.d hat    
## 13  -0.01   0.01     0.00     0.01   -0.02    1.03_*  0.00   0.02  
## 21   0.09  -0.09     0.09    -0.04    0.13    1.03_*  0.00   0.03_*
## 37   0.25  -0.26     0.30    -0.04    0.40_*  0.96_*  0.04   0.02  
## 64   0.10  -0.10     0.06     0.00    0.15    0.97_*  0.01   0.00  
## 67   0.02  -0.04     0.49     0.14    0.59_*  0.96_*  0.08   0.03_*
## 96   0.01  -0.01     0.18    -0.04    0.21    0.96_*  0.01   0.01  
## 124  0.01  -0.01     0.05    -0.09   -0.10    1.03_*  0.00   0.03_*
## 128  0.00   0.01    -0.22     0.00   -0.24    1.02    0.01   0.03_*
## 165 -0.05   0.05    -0.15     0.03   -0.16    1.03_*  0.01   0.03_*
## 170 -0.02   0.02     0.00     0.01   -0.02    1.03_*  0.00   0.02  
## 192 -0.17   0.16     0.14     0.07    0.30_*  0.94_*  0.02   0.01  
## 214  0.00   0.00     0.06    -0.02    0.06    1.05_*  0.00   0.04_*
## 237  0.02  -0.02     0.01    -0.02    0.03    1.03_*  0.00   0.02  
## 277 -0.02   0.02     0.07    -0.02    0.08    1.03_*  0.00   0.02  
## 288  0.00   0.00     0.00    -0.04   -0.04    1.04_*  0.00   0.03_*
## 296 -0.30   0.32    -0.77     0.17   -0.82_*  0.96_*  0.16   0.05_*
## 361 -0.13   0.13     0.00    -0.11   -0.17    1.03_*  0.01   0.03_*
## 508 -0.02   0.02     0.04     0.02    0.06    1.03_*  0.00   0.02  
## 517  0.20  -0.20     0.05     0.02    0.23    0.96_*  0.01   0.01  
## 522 -0.03   0.02     0.18    -0.05    0.18    1.07_*  0.01   0.06_*
## 559 -0.07   0.07     0.15    -0.04    0.22    0.94_*  0.01   0.01  
## 601 -0.07   0.10    -0.93     0.24   -0.94_*  1.02    0.22   0.09_*
## 618 -0.01   0.01    -0.01     0.20    0.24    0.97_*  0.01   0.01  
## 620  0.23  -0.23     0.01     0.04    0.30_*  0.88_*  0.02   0.01  
## 628  0.01  -0.01    -0.09     0.09    0.12    1.03_*  0.00   0.03_*
## 643  0.00   0.00     0.04    -0.09   -0.10    1.03_*  0.00   0.02  
## 698  0.04  -0.03    -0.10    -0.23   -0.30_*  1.03_*  0.02   0.04_*
## 700 -0.04   0.04     0.04    -0.10   -0.11    1.06_*  0.00   0.05_*
## 762  0.38  -0.37     0.18    -0.18    0.49_*  0.85_*  0.06   0.01  
## 769  0.02   0.00    -0.41     0.13    0.45_*  0.88_*  0.05   0.01  
## 788  0.13  -0.13     0.07     0.01    0.18    0.97_*  0.01   0.01  
## 792 -0.02   0.02    -0.01    -0.06   -0.07    1.03_*  0.00   0.02  
## 834  0.06  -0.06     0.00     0.11    0.12    1.05_*  0.00   0.04_*
## 838 -0.01   0.01     0.00     0.01   -0.02    1.03_*  0.00   0.02  
## 855  0.23  -0.22    -0.28     0.12    0.40_*  0.95_*  0.04   0.02  
## 887  0.19  -0.20     0.20    -0.02    0.29_*  0.97_*  0.02   0.01  
## 889 -0.39   0.41    -0.67     0.15    0.80_*  0.74_*  0.15   0.02  
## 900  0.03  -0.03     0.00    -0.15   -0.17    1.04_*  0.01   0.03_*
## 904  0.08  -0.09    -0.09     0.53    0.57_*  0.91_*  0.08   0.02  
## 939 -0.06   0.06     0.20    -0.01    0.24    1.03    0.01   0.03_*
## 942 -0.29   0.30    -0.46     0.10    0.56_*  0.88_*  0.07   0.02  
## 948 -0.01   0.01     0.09    -0.32   -0.32_*  1.16_*  0.03   0.14_*
## 999  0.12  -0.13    -0.05     0.23    0.26    1.02    0.02   0.03_*

covra = as.data.frame(covratio(model))
head(covra)

covra1 = as.data.frame(covratio(model1))
head(covra1)

covra2 = as.data.frame(covratio(model2))
head(covra2)

covra3 = as.data.frame(covratio(model3))
head(covra3)

#find number of predictors in model
p  <- length(model$coefficients)-1
p1 <- length(model1$coefficients)-1
p2 <- length(model2$coefficients)-1
p3 <- length(model3$coefficients)-1

#calculate COVRATIO threshold value
threshcv  <- (3*p)/n
threshcv1 <- (3*p1)/n
threshcv2 <- (3*p2)/n
threshcv3 <- (3*p3)/n

threshcv

## [1] 0.003

threshcv1

## [1] 0.003

threshcv2

## [1] 0.003

threshcv3

## [1] 0.009

sort_covra <- covra[order(-covra),]
sort_covra1 <- covra1[order(-covra1),]
sort_covra2 <- covra2[order(-covra2),]
sort_covra3 <- covra3[order(-covra3),]

head(sort_covra)

## [1] 1.073004 1.034317 1.020706 1.020672 1.019312 1.019105

head(sort_covra1)

## [1] 433.125061   1.005869   1.005853   1.005852   1.005850   1.005848

head(sort_covra2)

## [1] 1.015580 1.015355 1.014225 1.013914 1.013289 1.013211

head(sort_covra3)

## [1] 1.159737 1.071505 1.055904 1.053008 1.051011 1.042829

Membuat Visualisasi Untuk Nilai COVRATIO

plot(covratio(model), type = 'l')
abline(l = threshcv, lty = 2)
abline(l = -threshcv, lty = 2)

plot(covratio(model1), type = 'l')
abline(l = threshcv1, lty = 2)
abline(l = -threshcv1, lty = 2)

plot(covratio(model2), type = 'l')
abline(l = threshcv2, lty = 2)
abline(l = -threshcv2, lty = 2)

plot(covratio(model3), type = 'l')
abline(l = threshcv3, lty = 2)
abline(l = -threshcv3, lty = 2)

TUGAS PRAKTIKUM EVD PEKAN 11