Hi Im guiding this project
library(MASS)
library(ggplot2)
library(pastecs)
## Loading required package: boot
## Warning: package 'boot' was built under R version 3.2.5
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:boot':
##
## logit
library(qpcR)
## Loading required package: minpack.lm
## Loading required package: rgl
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:boot':
##
## salinity
## Loading required package: Matrix
library(MPV)
##
## Attaching package: 'MPV'
## The following object is masked from 'package:qpcR':
##
## PRESS
## The following object is masked from 'package:MASS':
##
## cement
## The following object is masked from 'package:datasets':
##
## stackloss
library(lattice)
##
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
##
## melanoma
library(stats)
library(foreign)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:robustbase':
##
## cushny
## The following object is masked from 'package:car':
##
## logit
## The following object is masked from 'package:boot':
##
## logit
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(Hmisc)
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:robustbase':
##
## heart
## The following object is masked from 'package:boot':
##
## aml
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
library(HSAUR)
## Loading required package: tools
##
## Attaching package: 'HSAUR'
## The following object is masked from 'package:robustbase':
##
## epilepsy
library(MVN)
## sROC 0.1-2 loaded
##
## Attaching package: 'MVN'
## The following object is masked from 'package:psych':
##
## mardia
library(caTools) # for spliting the sample
## Warning: package 'caTools' was built under R version 3.2.5
library(boot)
library(DAAG)
## Warning: package 'DAAG' was built under R version 3.2.5
##
## Attaching package: 'DAAG'
## The following object is masked from 'package:survival':
##
## lung
## The following object is masked from 'package:psych':
##
## cities
## The following object is masked from 'package:robustbase':
##
## milk
## The following object is masked from 'package:car':
##
## vif
## The following object is masked from 'package:MASS':
##
## hills
#to remove all the files or data set from the r environment
#rm(list=ls())
#use boston 1 file
reg1<-read.csv(choose.files())
missing values
#missing values
#gives logical value for missing values
#is.na(reg1$egkm)
#find sum of missing values
#sum(is.na(reg1$egkm))
#perecntage of missingvlaues
sum(is.na(reg1$crim)*100)/length(reg1$crim)
## [1] 0
sum(is.na(reg1$zn)*100)/length(reg1$zn)
## [1] 0
sum(is.na(reg1$indus)*100)/length(reg1$indus)
## [1] 0
sum(is.na(reg1$chas)*100)/length(reg1$chas)
## [1] 0
sum(is.na(reg1$nox)*100)/length(reg1$nox)
## [1] 0
sum(is.na(reg1$rm)*100)/length(reg1$rm)
## [1] 0
sum(is.na(reg1$age)*100)/length(reg1$age)
## [1] 0
sum(is.na(reg1$dis)*100)/length(reg1$dis)
## [1] 0
sum(is.na(reg1$rad)*100)/length(reg1$rad)
## [1] 0
sum(is.na(reg1$tax)*100)/length(reg1$tax)
## [1] 0
sum(is.na(reg1$ptratio)*100)/length(reg1$ptratio)
## [1] 0
sum(is.na(reg1$black)*100)/length(reg1$black)
## [1] 0
sum(is.na(reg1$lstat)*100)/length(reg1$lstat)
## [1] 0
sum(is.na(reg1$medv)*100)/length(reg1$medv)
## [1] 0
# since sample size is very large so we can remove them
#descriptive study of the variables
#use SPSS for descriptive statistics
#gives the descriptive statisitics of all the variables, sample size small works better
stat.desc(reg1)
## X crim zn indus
## nbr.val 5.060000e+02 506.0000000 506.000000 506.0000000
## nbr.null 0.000000e+00 0.0000000 372.000000 0.0000000
## nbr.na 0.000000e+00 0.0000000 0.000000 0.0000000
## min 1.000000e+00 0.0063200 0.000000 0.4600000
## max 5.060000e+02 88.9762000 100.000000 27.7400000
## range 5.050000e+02 88.9698800 100.000000 27.2800000
## sum 1.282710e+05 1828.4429200 5750.000000 5635.2100000
## median 2.535000e+02 0.2565100 0.000000 9.6900000
## mean 2.535000e+02 3.6135236 11.363636 11.1367787
## SE.mean 6.500000e+00 0.3823853 1.036810 0.3049799
## CI.mean.0.95 1.277037e+01 0.7512620 2.036991 0.5991856
## var 2.137850e+04 73.9865782 543.936814 47.0644425
## std.dev 1.462139e+02 8.6015451 23.322453 6.8603529
## coef.var 5.767806e-01 2.3803761 2.052376 0.6160087
## chas nox rm age
## nbr.val 506.00000000 5.060000e+02 5.060000e+02 5.060000e+02
## nbr.null 471.00000000 0.000000e+00 0.000000e+00 0.000000e+00
## nbr.na 0.00000000 0.000000e+00 0.000000e+00 0.000000e+00
## min 0.00000000 3.850000e-01 3.561000e+00 2.900000e+00
## max 1.00000000 8.710000e-01 8.780000e+00 1.000000e+02
## range 1.00000000 4.860000e-01 5.219000e+00 9.710000e+01
## sum 35.00000000 2.806757e+02 3.180025e+03 3.469890e+04
## median 0.00000000 5.380000e-01 6.208500e+00 7.750000e+01
## mean 0.06916996 5.546951e-01 6.284634e+00 6.857490e+01
## SE.mean 0.01129141 5.151391e-03 3.123514e-02 1.251370e+00
## CI.mean.0.95 0.02218393 1.012080e-02 6.136683e-02 2.458531e+00
## var 0.06451297 1.342764e-02 4.936709e-01 7.923584e+02
## std.dev 0.25399404 1.158777e-01 7.026171e-01 2.814886e+01
## coef.var 3.67202814 2.089034e-01 1.117992e-01 4.104834e-01
## dis rad tax ptratio
## nbr.val 5.060000e+02 506.0000000 5.060000e+02 5.060000e+02
## nbr.null 0.000000e+00 0.0000000 0.000000e+00 0.000000e+00
## nbr.na 0.000000e+00 0.0000000 0.000000e+00 0.000000e+00
## min 1.129600e+00 1.0000000 1.870000e+02 1.260000e+01
## max 1.212650e+01 24.0000000 7.110000e+02 2.200000e+01
## range 1.099690e+01 23.0000000 5.240000e+02 9.400000e+00
## sum 1.920292e+03 4832.0000000 2.065680e+05 9.338500e+03
## median 3.207450e+00 5.0000000 3.300000e+02 1.905000e+01
## mean 3.795043e+00 9.5494071 4.082372e+02 1.845553e+01
## SE.mean 9.361023e-02 0.3870849 7.492389e+00 9.624357e-02
## CI.mean.0.95 1.839135e-01 0.7604951 1.472009e+01 1.890871e-01
## var 4.434015e+00 75.8163660 2.840476e+04 4.686989e+00
## std.dev 2.105710e+00 8.7072594 1.685371e+02 2.164946e+00
## coef.var 5.548581e-01 0.9118115 4.128412e-01 1.173060e-01
## black lstat medv
## nbr.val 5.060000e+02 506.0000000 5.060000e+02
## nbr.null 0.000000e+00 0.0000000 0.000000e+00
## nbr.na 0.000000e+00 0.0000000 0.000000e+00
## min 3.200000e-01 1.7300000 5.000000e+00
## max 3.969000e+02 37.9700000 5.000000e+01
## range 3.965800e+02 36.2400000 4.500000e+01
## sum 1.804771e+05 6402.4500000 1.140160e+04
## median 3.914400e+02 11.3600000 2.120000e+01
## mean 3.566740e+02 12.6530632 2.253281e+01
## SE.mean 4.058552e+00 0.3174589 4.088611e-01
## CI.mean.0.95 7.973726e+00 0.6237028 8.032783e-01
## var 8.334752e+03 50.9947595 8.458672e+01
## std.dev 9.129486e+01 7.1410615 9.197104e+00
## coef.var 2.559616e-01 0.5643741 4.081651e-01
summary(reg1)
## X crim zn indus
## Min. : 1.0 Min. : 0.00632 Min. : 0.00 Min. : 0.46
## 1st Qu.:127.2 1st Qu.: 0.08204 1st Qu.: 0.00 1st Qu.: 5.19
## Median :253.5 Median : 0.25651 Median : 0.00 Median : 9.69
## Mean :253.5 Mean : 3.61352 Mean : 11.36 Mean :11.14
## 3rd Qu.:379.8 3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10
## Max. :506.0 Max. :88.97620 Max. :100.00 Max. :27.74
## chas nox rm age
## Min. :0.00000 Min. :0.3850 Min. :3.561 Min. : 2.90
## 1st Qu.:0.00000 1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02
## Median :0.00000 Median :0.5380 Median :6.208 Median : 77.50
## Mean :0.06917 Mean :0.5547 Mean :6.285 Mean : 68.57
## 3rd Qu.:0.00000 3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08
## Max. :1.00000 Max. :0.8710 Max. :8.780 Max. :100.00
## dis rad tax ptratio
## Min. : 1.130 Min. : 1.000 Min. :187.0 Min. :12.60
## 1st Qu.: 2.100 1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40
## Median : 3.207 Median : 5.000 Median :330.0 Median :19.05
## Mean : 3.795 Mean : 9.549 Mean :408.2 Mean :18.46
## 3rd Qu.: 5.188 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20
## Max. :12.127 Max. :24.000 Max. :711.0 Max. :22.00
## black lstat medv
## Min. : 0.32 Min. : 1.73 Min. : 5.00
## 1st Qu.:375.38 1st Qu.: 6.95 1st Qu.:17.02
## Median :391.44 Median :11.36 Median :21.20
## Mean :356.67 Mean :12.65 Mean :22.53
## 3rd Qu.:396.23 3rd Qu.:16.95 3rd Qu.:25.00
## Max. :396.90 Max. :37.97 Max. :50.00
str(reg1)
## 'data.frame': 506 obs. of 15 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
## $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
## $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
## $ chas : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
## $ rm : num 6.58 6.42 7.18 7 7.15 ...
## $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
## $ dis : num 4.09 4.97 4.97 6.06 6.06 ...
## $ rad : int 1 2 2 3 3 3 5 5 5 5 ...
## $ tax : int 296 242 242 222 222 222 311 311 311 311 ...
## $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
## $ black : num 397 397 393 395 397 ...
## $ lstat : num 4.98 9.14 4.03 2.94 5.33 ...
## $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
#-------------------------------------------------------------------------------#
#outliers
#to identify the outliers, use box plot
boxplot(reg1$crim)
#convert them into log to remove the outliers effect
boxplot(log(reg1$crim))
#since the outliers are removed if log is taken so log crim wil be considered for analysis
#---------------------------------------------------------------------------------------#
#outliers
#to identify the outliers, use box plot
boxplot(reg1$zn)
#convert them into log to remove the outliers effect
boxplot(log(reg1$zn)) # since in ZN 73 % of the observations are not alloted any land so it should be converted into categories
#to identify the outliers, use box plot
boxplot(reg1$indus) # no outliers
#to identify the outliers, use box plot
boxplot(reg1$chas) # categorical data
#to identify the outliers, use box plot
boxplot(reg1$nox) # no outliers
#to identify the outliers, use box plot
boxplot(reg1$rm)
#convert them into log to remove the outliers effect
boxplot(log(reg1$rm))
#outliers removal using IQR range for rm
#upper limit for rm using IQR
bu_rm<-quantile(reg1$rm, 0.75, na.rm = TRUE)+1.5*IQR(reg1$rm, na.rm = TRUE)
#upper limit for rm using IQR
bl_rm<-quantile(reg1$rm, 0.25, na.rm = TRUE)-1.5*IQR(reg1$rm, na.rm = TRUE)
# gives the observations which are outliers
rm_u<-reg1$rm[reg1$rm>bu_rm]
length(rm_u)
## [1] 22
rm_l<-reg1$rm[reg1$rm<bl_rm]
length(rm_l)
## [1] 8
#outliers are to be replaced by the highest and lowest values, since the sample size decreases by removing them
reg1$rm<-ifelse(reg1$rm<=bl_rm,reg1$rm==bl_rm,reg1$rm)
#if lower limt is negative dont replace the outliers
reg1$rm<-ifelse(reg1$rm>=bu_rm, reg1$rm==bu_rm, reg1$rm)
#to check the outliers again after winsorising
boxplot(reg1$rm)
summary(reg1$rm)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 5.851 6.152 5.864 6.515 7.691
length(reg1$rm)
## [1] 506
#---------------------------------------------------------------------------------------#
#outliers
#to identify the outliers, use box plot
boxplot(reg1$age) #no outliers
#to identify the outliers, use box plot
boxplot(reg1$dis)
#convert them into log to remove the outliers effect
boxplot(log(reg1$dis)) # no outliers
#---------------------------------------------------------------#
#to identify the outliers, use box plot
boxplot(reg1$rad) # no outliers
#---------------------------------------------------------------------------------------#
#outliers
#to identify the outliers, use box plot
boxplot(reg1$tax) # no outliers
#outliers
#to identify the outliers, use box plot
boxplot(reg1$ptratio) # no outliers
#outliers
#to identify the outliers, use box plot
boxplot(reg1$black)
#convert them into log to remove the outliers effect
boxplot(log(reg1$black))
#outliers removal using IQR range for black
#upper limit for black using IQR
bu_black<-quantile(reg1$black, 0.75, na.rm = TRUE)+1.5*IQR(reg1$black, na.rm = TRUE)
#upper limit for black using IQR
bl_black<-quantile(reg1$black, 0.25, na.rm = TRUE)-1.5*IQR(reg1$black, na.rm = TRUE)
# gives the observations which are outliers
black_u<-reg1$black[reg1$black>bu_black]
length(black_u)
## [1] 0
black_l<-reg1$black[reg1$black<bl_black]
length(black_l)
## [1] 77
#outliers are to be replaced by the highest and lowest values, since the sample size decreases by removing them
reg1$black<-ifelse(reg1$black<=bl_black,reg1$black==bl_black,reg1$black)
#if lower limt is negative dont replace the outliers
reg1$black<-ifelse(reg1$black>=bu_black, reg1$black==bu_black, reg1$black)
#to check the outliers again after winsorising
boxplot(reg1$black)
summary(reg1$black)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 375.4 391.4 329.6 396.2 396.9
length(reg1$black)
## [1] 506
#---------------------------------------------------------------------------------------#
#outliers
#to identify the outliers, use box plot
boxplot(reg1$lstat)
#convert them into log to remove the outliers effect
boxplot(log(reg1$lstat))
#---------------------------------------------------------------------------------------#
#outliers
#to identify the outliers, use box plot
boxplot(reg1$medv)
#convert them into log to remove the outliers effect
boxplot(log(reg1$medv))
#outliers removal using IQR range for medv
#upper limit for medv using IQR
bu_medv<-quantile(reg1$medv, 0.75, na.rm = TRUE)+1.5*IQR(reg1$medv, na.rm = TRUE)
#upper limit for medv using IQR
bl_medv<-quantile(reg1$medv, 0.25, na.rm = TRUE)-1.5*IQR(reg1$medv, na.rm = TRUE)
# gives the observations which are outliers
medv_u<-reg1$medv[reg1$medv>bu_medv]
length(medv_u)
## [1] 38
medv_l<-reg1$medv[reg1$medv<bl_medv]
length(medv_l)
## [1] 2
#outliers are to be replaced by the highest and lowest values, since the sample size decreases by removing them
reg1$medv<-ifelse(reg1$medv<=bl_medv,reg1$medv==bl_medv,reg1$medv)
#if lower limt is negative dont replace the outliers
reg1$medv<-ifelse(reg1$medv>=bu_medv, reg1$medv==bu_medv, reg1$medv)
#to check the outliers again after winsorising
boxplot(reg1$medv)
summary(reg1$medv)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 14.60 20.05 19.08 23.80 36.50
#Linear relation ship between Dependent and independent variables
#---------------------------------------#
#---------------------------------------#
#linear relation between medv and log(crim)
plot(reg1$medv, log(reg1$crim))
qplot(medv, log(crim), data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,log(reg1$crim))
##
## Pearson's product-moment correlation
##
## data: reg1$medv and log(reg1$crim)
## t = -10.6, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4956852 -0.3529316
## sample estimates:
## cor
## -0.4269649
#---------------------------------------#
#linear relation between medv and indus
plot(reg1$medv, reg1$indus)
qplot(medv, indus, data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,reg1$indus)
##
## Pearson's product-moment correlation
##
## data: reg1$medv and reg1$indus
## t = -8.3757, df = 504, p-value = 5.48e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4238029 -0.2706245
## sample estimates:
## cor
## -0.3495473
#---------------------------------------#
#linear relation between medv and indus
plot(reg1$medv, reg1$nox)
qplot(medv, nox, data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,reg1$nox)
##
## Pearson's product-moment correlation
##
## data: reg1$medv and reg1$nox
## t = -8.8922, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4412575 -0.2904067
## sample estimates:
## cor
## -0.3682533
#---------------------------------------#
#linear relation between medv and indus
plot(reg1$medv, reg1$rm)
qplot(medv, rm, data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,reg1$rm)
##
## Pearson's product-moment correlation
##
## data: reg1$medv and reg1$rm
## t = 13.259, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4409076 0.5704150
## sample estimates:
## cor
## 0.5085317
#---------------------------------------#
#linear relation between medv and indus
plot(reg1$medv, reg1$age)
qplot(medv, age, data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,reg1$age)
##
## Pearson's product-moment correlation
##
## data: reg1$medv and reg1$age
## t = -9.4416, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4593212 -0.3110159
## sample estimates:
## cor
## -0.3876744
#---------------------------------------#
#linear relation between medv and indus
plot(reg1$medv, log(reg1$dis))
qplot(medv, log(dis), data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,log(reg1$dis))
##
## Pearson's product-moment correlation
##
## data: reg1$medv and log(reg1$dis)
## t = 10.005, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3316809 0.4773115
## sample estimates:
## cor
## 0.40708
#---------------------------------------#
#linear relation between medv and indus
plot(reg1$medv, reg1$rad)
qplot(medv, rad, data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,reg1$rad)
##
## Pearson's product-moment correlation
##
## data: reg1$medv and reg1$rad
## t = -7.8726, df = 504, p-value = 2.143e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4063647 -0.2509892
## sample estimates:
## cor
## -0.3309179
#---------------------------------------#
#linear relation between medv and indus
plot(reg1$medv, reg1$tax)
qplot(medv, tax, data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,reg1$tax)
##
## Pearson's product-moment correlation
##
## data: reg1$medv and reg1$tax
## t = -8.4977, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4279683 -0.2753336
## sample estimates:
## cor
## -0.354006
#---------------------------------------#
#linear relation between medv and indus
plot(reg1$medv, reg1$ptratio)
qplot(medv, ptratio, data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,reg1$ptratio)
##
## Pearson's product-moment correlation
##
## data: reg1$medv and reg1$ptratio
## t = -3.6457, df = 504, p-value = 0.0002944
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.24404986 -0.07415862
## sample estimates:
## cor
## -0.1602911
#---------------------------------------#
#linear relation between medv and indus
plot(reg1$medv, reg1$black)
qplot(medv, black, data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,reg1$black)
##
## Pearson's product-moment correlation
##
## data: reg1$medv and reg1$black
## t = 5.0064, df = 504, p-value = 7.68e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1330110 0.2991491
## sample estimates:
## cor
## 0.2176561
#---------------------------------------#
#linear relation between medv and indus
plot(reg1$medv, log(reg1$lstat))
qplot(medv, log(lstat), data = reg1, geom = "smooth", method="lm")
cor.test(reg1$medv,(reg1$lstat))
##
## Pearson's product-moment correlation
##
## data: reg1$medv and (reg1$lstat)
## t = -7.9568, df = 504, p-value = 1.174e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4093140 -0.2543011
## sample estimates:
## cor
## -0.3340645
#---------------------------------------#
#---------------------------------------#
set.seed(88)
split1 = sample.split(reg1, SplitRatio = 0.75)
summary(split1)
## Mode FALSE TRUE NA's
## logical 4 11 0
length(reg1$rm)
## [1] 506
# create a sub sample for the same
reg1train<-subset(reg1,split1==TRUE)
reg1Val<-subset(reg1,split1==FALSE)
summary(reg1train)
## X crim zn indus
## Min. : 1.0 Min. : 0.00632 Min. : 0.000 Min. : 0.460
## 1st Qu.:127.5 1st Qu.: 0.08827 1st Qu.: 0.000 1st Qu.: 5.190
## Median :253.5 Median : 0.26888 Median : 0.000 Median : 9.795
## Mean :253.0 Mean : 3.42444 Mean : 10.907 Mean :11.257
## 3rd Qu.:377.8 3rd Qu.: 3.75547 3rd Qu.: 9.375 3rd Qu.:18.100
## Max. :506.0 Max. :73.53410 Max. :100.000 Max. :27.740
## chas nox rm age
## Min. :0.00000 Min. :0.3850 Min. :0.000 Min. : 2.90
## 1st Qu.:0.00000 1st Qu.:0.4530 1st Qu.:5.852 1st Qu.: 45.65
## Median :0.00000 Median :0.5380 Median :6.162 Median : 78.60
## Mean :0.07027 Mean :0.5554 Mean :5.896 Mean : 68.75
## 3rd Qu.:0.00000 3rd Qu.:0.6240 3rd Qu.:6.495 3rd Qu.: 94.08
## Max. :1.00000 Max. :0.8710 Max. :7.691 Max. :100.00
## dis rad tax ptratio
## Min. : 1.130 Min. : 1.000 Min. :187.0 Min. :12.60
## 1st Qu.: 2.113 1st Qu.: 4.000 1st Qu.:284.0 1st Qu.:16.93
## Median : 3.106 Median : 5.000 Median :330.0 Median :18.80
## Mean : 3.795 Mean : 9.576 Mean :410.1 Mean :18.41
## 3rd Qu.: 5.227 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20
## Max. :12.127 Max. :24.000 Max. :711.0 Max. :22.00
## black lstat medv
## Min. : 0.0 Min. : 1.73 Min. : 0.00
## 1st Qu.:374.8 1st Qu.: 7.15 1st Qu.:14.53
## Median :390.8 Median :11.57 Median :19.95
## Mean :329.4 Mean :12.68 Mean :18.72
## 3rd Qu.:395.6 3rd Qu.:16.95 3rd Qu.:23.40
## Max. :396.9 Max. :37.97 Max. :36.40
summary(reg1Val)
## X crim zn indus
## Min. : 5.0 Min. : 0.01360 Min. : 0.00 Min. : 1.250
## 1st Qu.:127.5 1st Qu.: 0.06894 1st Qu.: 0.00 1st Qu.: 4.928
## Median :255.0 Median : 0.21508 Median : 0.00 Median : 8.140
## Mean :254.8 Mean : 4.12793 Mean :12.61 Mean :10.811
## 3rd Qu.:381.5 3rd Qu.: 3.48946 3rd Qu.:20.00 3rd Qu.:18.100
## Max. :505.0 Max. :88.97620 Max. :95.00 Max. :27.740
## chas nox rm age
## Min. :0.00000 Min. :0.3980 Min. :0.000 Min. : 9.80
## 1st Qu.:0.00000 1st Qu.:0.4470 1st Qu.:5.851 1st Qu.: 42.70
## Median :0.00000 Median :0.5380 Median :6.136 Median : 76.25
## Mean :0.06618 Mean :0.5527 Mean :5.777 Mean : 68.11
## 3rd Qu.:0.00000 3rd Qu.:0.6240 3rd Qu.:6.555 3rd Qu.: 93.95
## Max. :1.00000 Max. :0.8710 Max. :7.610 Max. :100.00
## dis rad tax ptratio
## Min. : 1.322 Min. : 1.000 Min. :188.0 Min. :13.00
## 1st Qu.: 1.990 1st Qu.: 4.000 1st Qu.:276.0 1st Qu.:17.55
## Median : 3.367 Median : 5.000 Median :332.0 Median :19.10
## Mean : 3.795 Mean : 9.478 Mean :403.2 Mean :18.59
## 3rd Qu.: 4.926 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20
## Max. :10.710 Max. :24.000 Max. :711.0 Max. :22.00
## black lstat medv
## Min. : 0.0 Min. : 2.470 Min. : 0.00
## 1st Qu.:376.5 1st Qu.: 6.862 1st Qu.:14.97
## Median :393.4 Median :10.570 Median :20.45
## Mean :329.9 Mean :12.566 Mean :20.07
## 3rd Qu.:396.9 3rd Qu.:16.980 3rd Qu.:25.00
## Max. :396.9 Max. :36.980 Max. :36.50
#fitting the model method 1
fit1 <-lm(medv ~log(crim)+indus+nox+rm+age+log(dis)+rad+tax+ptratio+black+log(lstat), data=reg1train)
summary(fit1)
##
## Call:
## lm(formula = medv ~ log(crim) + indus + nox + rm + age + log(dis) +
## rad + tax + ptratio + black + log(lstat), data = reg1train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.2452 -2.2053 0.0314 2.8149 16.9850
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.408354 5.963202 0.404 0.6865
## log(crim) -0.801404 0.411658 -1.947 0.0523 .
## indus 0.094067 0.094433 0.996 0.3199
## nox 2.287610 6.262192 0.365 0.7151
## rm 2.798462 0.225566 12.406 <2e-16 ***
## age -0.041353 0.019953 -2.072 0.0389 *
## log(dis) 1.745463 1.325149 1.317 0.1886
## rad 0.029199 0.115455 0.253 0.8005
## tax -0.005255 0.005681 -0.925 0.3556
## ptratio -0.251536 0.189386 -1.328 0.1850
## black 0.003424 0.002660 1.287 0.1989
## log(lstat) 1.269495 0.769730 1.649 0.1000 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.219 on 358 degrees of freedom
## Multiple R-squared: 0.4548, Adjusted R-squared: 0.438
## F-statistic: 27.14 on 11 and 358 DF, p-value: < 2.2e-16
coefficients(fit1) # model coefficients
## (Intercept) log(crim) indus nox rm
## 2.408354341 -0.801404389 0.094067269 2.287609679 2.798461581
## age log(dis) rad tax ptratio
## -0.041353130 1.745463431 0.029198719 -0.005254872 -0.251536400
## black log(lstat)
## 0.003423819 1.269495276
confint(fit1, level=0.95) # CIs for model parameters
## 2.5 % 97.5 %
## (Intercept) -9.318954121 14.135662802
## log(crim) -1.610975917 0.008167139
## indus -0.091644879 0.279779417
## nox -10.027694552 14.602913909
## rm 2.354860564 3.242062598
## age -0.080593880 -0.002112380
## log(dis) -0.860590359 4.351517221
## rad -0.197856235 0.256253672
## tax -0.016426624 0.005916880
## ptratio -0.623984316 0.120911516
## black -0.001807556 0.008655194
## log(lstat) -0.244265288 2.783255840
fit_val<-fitted(fit1) # predicted values
#residuals(fit1) # residuals
anova(fit1) # anova table
## Analysis of Variance Table
##
## Response: medv
## Df Sum Sq Mean Sq F value Pr(>F)
## log(crim) 1 4204.3 4204.3 108.7122 < 2e-16 ***
## indus 1 0.2 0.2 0.0054 0.94169
## nox 1 33.6 33.6 0.8701 0.35156
## rm 1 6742.1 6742.1 174.3326 < 2e-16 ***
## age 1 230.2 230.2 5.9519 0.01519 *
## log(dis) 1 73.7 73.7 1.9049 0.16839
## rad 1 44.9 44.9 1.1622 0.28174
## tax 1 34.3 34.3 0.8871 0.34689
## ptratio 1 30.5 30.5 0.7890 0.37498
## black 1 48.6 48.6 1.2579 0.26279
## log(lstat) 1 105.2 105.2 2.7201 0.09997 .
## Residuals 358 13845.1 38.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vcov(fit1) # covariance matrix for model parameters
## (Intercept) log(crim) indus nox
## (Intercept) 35.559782676 0.6658641716 5.807653e-02 -27.322722681
## log(crim) 0.665864172 0.1694622051 -5.245035e-03 -0.621321043
## indus 0.058076533 -0.0052450347 8.917498e-03 -0.115437721
## nox -27.322722681 -0.6213210426 -1.154377e-01 39.215042582
## rm -0.023247740 0.0119588619 -2.068253e-04 -0.129412505
## age -0.018808389 -0.0009654415 7.190623e-05 -0.018328464
## log(dis) -4.315262214 0.0710888617 3.226833e-02 3.040987950
## rad 0.058624813 -0.0263517713 3.639519e-03 0.009319399
## tax -0.004506153 0.0002782160 -2.256866e-04 -0.001953321
## ptratio -0.633337821 -0.0019225713 -3.667932e-03 0.344010077
## black -0.003325295 0.0001505703 6.761737e-06 0.001277735
## log(lstat) 0.074196263 -0.0409166999 -7.351305e-03 -0.534028756
## rm age log(dis) rad
## (Intercept) -2.324774e-02 -1.880839e-02 -4.315262e+00 5.862481e-02
## log(crim) 1.195886e-02 -9.654415e-04 7.108886e-02 -2.635177e-02
## indus -2.068253e-04 7.190623e-05 3.226833e-02 3.639519e-03
## nox -1.294125e-01 -1.832846e-02 3.040988e+00 9.319399e-03
## rm 5.088004e-02 9.119014e-05 -4.479403e-02 -6.515114e-04
## age 9.119014e-05 3.981411e-04 1.061157e-02 1.819075e-04
## log(dis) -4.479403e-02 1.061157e-02 1.756019e+00 -6.488587e-03
## rad -6.515114e-04 1.819075e-04 -6.488587e-03 1.332982e-02
## tax -6.259735e-05 2.847461e-06 7.319694e-05 -4.916486e-04
## ptratio -4.366831e-03 -2.968289e-04 1.143656e-03 -3.735353e-03
## black 3.102445e-05 -2.677091e-06 1.777077e-04 -1.388101e-05
## log(lstat) -1.867652e-02 -4.065278e-03 -1.196343e-01 9.055132e-03
## tax ptratio black log(lstat)
## (Intercept) -4.506153e-03 -6.333378e-01 -3.325295e-03 0.0741962628
## log(crim) 2.782160e-04 -1.922571e-03 1.505703e-04 -0.0409166999
## indus -2.256866e-04 -3.667932e-03 6.761737e-06 -0.0073513053
## nox -1.953321e-03 3.440101e-01 1.277735e-03 -0.5340287563
## rm -6.259735e-05 -4.366831e-03 3.102445e-05 -0.0186765214
## age 2.847461e-06 -2.968289e-04 -2.677091e-06 -0.0040652784
## log(dis) 7.319694e-05 1.143656e-03 1.777077e-04 -0.1196343040
## rad -4.916486e-04 -3.735353e-03 -1.388101e-05 0.0090551321
## tax 3.227045e-05 3.198650e-06 1.163035e-06 -0.0002267425
## ptratio 3.198650e-06 3.586687e-02 -4.002219e-05 -0.0360927462
## black 1.163035e-06 -4.002219e-05 7.076101e-06 0.0002130596
## log(lstat) -2.267425e-04 -3.609275e-02 2.130596e-04 0.5924842022
#influence(fit1) # regression diagnostics
# diagnostic plots
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page
plot(fit1)
#k fold analysis is used for model building method 2
#cv.lm(reg1, fit1, m=3) # k(3) fold cross-validation for simple linear regression
CVlm(reg1train, fit1, m=3, plotit= "Residual")
## Analysis of Variance Table
##
## Response: medv
## Df Sum Sq Mean Sq F value Pr(>F)
## log(crim) 1 4204 4204 108.71 <2e-16 ***
## indus 1 0 0 0.01 0.942
## nox 1 34 34 0.87 0.352
## rm 1 6742 6742 174.33 <2e-16 ***
## age 1 230 230 5.95 0.015 *
## log(dis) 1 74 74 1.90 0.168
## rad 1 45 45 1.16 0.282
## tax 1 34 34 0.89 0.347
## ptratio 1 31 31 0.79 0.375
## black 1 49 49 1.26 0.263
## log(lstat) 1 105 105 2.72 0.100 .
## Residuals 358 13845 39
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in CVlm(reg1train, fit1, m = 3, plotit = "Residual"):
##
## As there is >1 explanatory variable, cross-validation
## predicted values for a fold are not a linear function
## of corresponding overall predicted values. Lines that
## are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 123
## 4 12 15 17 19 22 27 29 30 31
## Predicted 24.18 21.97 18.419 19.32 17.0 17.86 17.88 19.28 19.7 17.33
## cvpred 27.53 22.99 18.857 18.96 16.2 17.52 17.61 19.67 20.0 16.03
## medv 33.40 18.90 18.200 23.10 20.2 19.60 16.60 18.40 21.0 12.70
## CV residual 5.87 -4.09 -0.657 4.14 4.0 2.08 -1.01 -1.27 1.0 -3.33
## 37 39 44 59 62 63 72 74 75
## Predicted 20.142 21.44 23.971 22.570 19.99 21.97 22.592 23.3458 23.615
## cvpred 19.865 20.59 24.034 23.655 21.06 24.31 22.224 23.3361 23.501
## medv 20.000 24.70 24.700 23.300 16.00 22.20 21.700 23.4000 24.100
## CV residual 0.135 4.11 0.666 -0.355 -5.06 -2.11 -0.524 0.0639 0.599
## 78 84 90 92 93 94 97 99 102 105
## Predicted 21.95 22.544 23.27 21.4605 23.6 23.978 20.49 3.41 20.81 18.47
## cvpred 21.96 23.401 24.73 22.0634 25.6 25.786 19.97 1.34 21.33 17.96
## medv 20.80 22.900 28.70 22.0000 22.9 25.000 21.40 0.00 26.50 20.10
## CV residual -1.16 -0.501 3.97 -0.0634 -2.7 -0.786 1.43 -1.34 5.17 2.14
## 106 107 112 114 118 119 122 123 132 138
## Predicted 17.47 17.67 21.30 18.933 19.05 18.12 21.89 21.40 17.88 19.0
## cvpred 16.23 16.02 21.63 17.995 18.97 17.72 24.21 23.34 18.29 19.2
## medv 19.50 19.50 22.80 18.700 19.20 20.40 20.30 20.50 19.60 17.1
## CV residual 3.27 3.48 1.17 0.705 0.23 2.68 -3.91 -2.84 1.31 -2.1
## 147 148 149 153 159 162 163 168 179 183
## Predicted 15.87 16.01 16.74 14.8634 17.13 20.0 -1.175 17.4 22.33 21.6
## cvpred 16.17 13.59 14.63 15.3929 19.21 25.4 0.336 18.6 23.71 24.4
## medv 15.60 14.60 17.80 15.3000 24.30 0.0 0.000 23.8 29.90 0.0
## CV residual -0.57 1.01 3.17 -0.0929 5.09 -25.4 -0.336 5.2 6.19 -24.4
## 189 191 192 195 196 197 208 210 214 223
## Predicted 22.66 25.1 24.25 25.77 6.43 26.81 20.36 17.70 23.14 21.13
## cvpred 23.25 25.9 25.43 27.38 5.91 28.99 19.99 17.08 23.36 22.01
## medv 29.80 0.0 30.50 29.10 0.00 33.30 22.50 20.00 28.10 27.50
## CV residual 6.55 -25.9 5.07 1.72 -5.91 4.31 2.51 2.92 4.74 5.49
## 226 227 239 240 242 255 257 266 267 270
## Predicted 0.578 0.396 24.48 23.96 22.21 23.71 27.2 18.45 22.13 21.351
## cvpred -0.375 0.547 25.37 25.22 22.72 24.41 30.9 17.44 21.83 21.227
## medv 0.000 0.000 23.70 23.30 20.10 21.90 0.0 22.80 30.70 20.700
## CV residual 0.375 -0.547 -1.67 -1.92 -2.62 -2.51 -30.9 5.36 8.87 -0.527
## 272 281 283 287 288 292 297 298 309 315
## Predicted 22.96 4.71 26.1 24.0 24.31 24.5 24.853 22.75 19.571 20.46
## cvpred 23.28 4.91 30.4 24.6 25.59 27.0 27.417 23.35 22.088 21.72
## medv 25.20 0.00 0.0 20.1 23.20 0.0 27.100 20.30 22.800 23.80
## CV residual 1.92 -4.91 -30.4 -4.5 -2.39 -27.0 -0.317 -3.05 0.712 2.08
## 316 317 318 322 324 328 331 332 337 339
## Predicted 19.18 19.95 20.125 21.35 19.023 21.84 22.87 23.20 22.32 22.93
## cvpred 19.45 19.47 19.603 22.48 19.196 21.34 21.89 22.18 22.83 23.49
## medv 16.20 17.80 19.800 23.10 18.500 22.20 19.80 17.10 19.50 20.60
## CV residual -3.25 -1.67 0.197 0.62 -0.696 0.86 -2.09 -5.08 -3.33 -2.89
## 341 345 352 354 363 374 375 388 390 401
## Predicted 21.4 25.04 23.387 26.601 13.52 11.57 -2.51 12.11 13.314 14.19
## cvpred 22.2 26.46 24.218 30.201 13.85 8.27 -8.65 8.87 11.813 12.17
## medv 18.7 31.20 24.100 30.100 20.80 13.80 13.80 7.40 11.500 5.60
## CV residual -3.5 4.74 -0.118 -0.101 6.95 5.53 22.45 -1.47 -0.313 -6.57
## 403 404 407 412 417 418 420 421 427 444
## Predicted 15.76 12.32 -3.27 14.6 16.27 11.372 16.81 14.3 14.13 16.337
## cvpred 14.83 10.58 -8.29 14.5 15.78 9.712 16.16 15.0 13.47 15.756
## medv 12.10 8.30 11.90 17.2 7.50 10.400 8.40 16.7 10.20 15.400
## CV residual -2.73 -2.28 20.19 2.7 -8.28 0.688 -7.76 1.7 -3.27 -0.356
## 449 454 457 462 466 468 469 477 481 483
## Predicted 15.76 19.18 15.11 17.396 16.25 14.78 15.81 17.1720 17.35 18.59
## cvpred 15.23 19.62 15.33 17.479 16.19 15.02 14.68 16.7968 17.67 20.58
## medv 14.10 17.80 12.70 17.700 19.90 19.10 19.10 16.7000 23.00 25.00
## CV residual -1.13 -1.82 -2.63 0.221 3.71 4.08 4.42 -0.0968 5.33 4.42
## 484 489 497 498
## Predicted 17.86 16.753 18.04 18.870
## cvpred 17.68 14.881 15.83 17.714
## medv 21.80 15.200 19.70 18.300
## CV residual 4.12 0.319 3.87 0.586
##
## Sum of squares = 6721 Mean square = 54.6 n = 123
##
## fold 2
## Observations in test set: 124
## 1 9 14 16 18 24 33 42 46 47
## Predicted 24.10 20.69 18.83 18.68 18.468 17.37 17.26 25.29 21.51 22.16
## cvpred 25.62 20.14 18.37 18.39 18.275 17.22 16.45 24.26 21.02 21.88
## medv 24.00 16.50 20.40 19.90 17.500 14.50 13.20 26.60 19.30 20.00
## CV residual -1.62 -3.64 2.03 1.51 -0.775 -2.72 -3.25 2.34 -1.72 -1.88
## 52 54 57 58 61 77 82 86 91 108
## Predicted 23.13 24.276 24.433 26.07 20.6 21.163 22.61 22.8 21.763 18.524
## cvpred 22.41 23.864 24.223 25.05 20.1 20.964 22.36 22.8 22.942 20.507
## medv 20.50 23.400 24.700 31.60 18.7 20.000 23.90 26.6 22.600 20.400
## CV residual -1.91 -0.464 0.477 6.55 -1.4 -0.964 1.54 3.8 -0.342 -0.107
## 109 117 120 121 127 129 131 133 135
## Predicted 19.111 20.198 19.35 22.241 19.34 19.06 19.109 18.44 15.552
## cvpred 20.597 21.455 20.79 22.441 19.17 20.26 19.852 18.61 14.967
## medv 19.800 21.200 19.30 22.000 15.70 18.00 19.200 23.00 15.600
## CV residual -0.797 -0.255 -1.49 -0.441 -3.47 -2.26 -0.652 4.39 0.633
## 136 139 142 146 161 164 165 166 167 172
## Predicted 18.77 17.94 14.39 17.73 16.6 -0.0541 17.92 16.7 -0.398 17.59
## cvpred 19.51 19.81 15.94 18.19 14.2 -1.2597 16.34 13.9 -1.658 15.87
## medv 18.10 13.30 14.40 13.80 27.0 0.0000 22.70 25.0 0.000 19.10
## CV residual -1.41 -6.51 -1.54 -4.39 12.8 1.2597 6.36 11.1 1.658 3.23
## 176 180 181 187 198 206 211 213 217 219
## Predicted 22.9 23.0 2.77 3.71 26.89 22.449 20.10 20.88 22.811 20.957
## cvpred 23.7 23.5 4.56 5.22 25.83 22.794 19.94 21.06 23.627 21.198
## medv 29.4 0.0 0.00 0.00 30.30 22.600 21.70 22.40 23.300 21.500
## CV residual 5.7 -23.5 -4.56 -5.22 4.47 -0.194 1.76 1.34 -0.327 0.302
## 222 224 228 232 234 236 237 238 243 244
## Predicted 19.78 20.0 21.5 22.2 1.67 20.35 20.99 22.4 23.453 24.271
## cvpred 20.02 19.0 20.5 20.8 1.62 20.14 19.96 20.7 22.486 23.015
## medv 21.70 30.1 31.6 31.7 0.00 24.00 25.10 31.5 22.200 23.700
## CV residual 1.68 11.1 11.1 10.9 -1.62 3.86 5.14 10.8 -0.286 0.685
## 249 253 256 262 271 273 274 277 285 286
## Predicted 22.77 25.54 24.28 22.8 21.160 22.30 25.3 24.86 28.5 25.97
## cvpred 21.68 24.21 23.91 22.5 20.801 21.96 24.1 23.89 28.7 26.19
## medv 24.50 29.60 20.90 0.0 21.100 24.40 35.2 33.20 32.2 22.00
## CV residual 2.82 5.39 -3.01 -22.5 0.299 2.44 11.1 9.31 3.5 -4.19
## 301 304 312 327 330 333 334 342 344 346
## Predicted 25.064 25.37 18.94 21.98 23.24 23.93 23.24 26.82 23.904 22.89
## cvpred 23.958 24.34 18.48 21.23 24.01 23.88 23.27 26.68 24.567 23.28
## medv 24.800 33.10 22.10 23.00 22.60 19.40 22.20 32.70 23.900 17.50
## CV residual 0.842 8.76 3.62 1.77 -1.41 -4.48 -1.07 6.02 -0.667 -5.78
## 348 349 358 364 367 369 371 373 377 379
## Predicted 25.26 25.98 17.27 15.138 11.3 9.46 14.8 13.0 16.08 15.05
## cvpred 25.34 25.94 17.93 16.305 11.6 9.10 14.1 13.5 16.69 15.47
## medv 23.10 24.50 21.70 16.800 21.9 0.00 0.0 0.0 13.90 13.10
## CV residual -2.24 -1.44 3.77 0.495 10.3 -9.10 -14.1 -13.5 -2.79 -2.37
## 382 389 391 392 399 405 408 414 422 423
## Predicted 15.73 11.6 14.353 16.4 12.4 11.81 10.9 9.82 13.771 12.35
## cvpred 16.07 12.7 14.959 17.1 12.9 11.23 10.4 8.55 13.462 11.11
## medv 10.90 10.2 15.100 23.2 0.0 8.50 27.9 16.30 14.200 20.80
## CV residual -5.17 -2.5 0.141 6.1 -12.9 -2.73 17.5 7.75 0.738 9.69
## 424 429 433 434 435 436 438 447 450 453
## Predicted 14.855 15.33 15.49 15.86 14.04 15.94 14.12 15.13490 15.3 16.82
## cvpred 14.239 15.14 14.11 15.35 12.97 15.52 13.82 14.89402 14.7 17.29
## medv 13.400 11.00 16.10 14.30 11.70 13.40 8.70 14.90000 13.0 16.10
## CV residual -0.839 -4.14 1.99 -1.05 -1.27 -2.12 -5.12 0.00598 -1.7 -1.19
## 461 463 464 472 476 478 479 482 487 491
## Predicted 16.852 17.05 17.07 16.58 14.717 12.86 15.406 17.89 16.97 14.55
## cvpred 16.184 17.08 16.75 15.49 13.725 12.65 15.014 16.12 16.03 16.09
## medv 16.400 19.50 20.20 19.60 13.300 12.00 14.600 23.70 19.10 8.10
## CV residual 0.216 2.42 3.45 4.11 -0.425 -0.65 -0.414 7.58 3.07 -7.99
## 492 493 496 504 506
## Predicted 18.29 18.737 20.7878 21.06 19.71
## cvpred 20.54 20.686 23.0461 22.15 21.28
## medv 13.60 20.100 23.1000 23.90 11.90
## CV residual -6.94 -0.586 0.0539 1.75 -9.38
##
## Sum of squares = 4418 Mean square = 35.6 n = 124
##
## fold 3
## Observations in test set: 123
## 2 3 7 11 13 26 28 32 34 41
## Predicted 23.0129 24.8 22.599 22.58 23.59 16.1 17.14 17.28 16.78 24.7
## cvpred 21.6136 22.7 22.396 23.15 23.98 17.2 18.23 18.03 17.78 22.8
## medv 21.6000 34.7 22.900 15.00 21.70 13.9 14.80 14.50 13.10 34.9
## CV residual -0.0136 12.0 0.504 -8.15 -2.28 -3.3 -3.43 -3.53 -4.68 12.1
## 43 45 48 49 56 60 64 67 69 71
## Predicted 23.59 22.70 21.07 19.51 27.88 21.73 24.336 22.96 21.09 24.324
## cvpred 23.98 23.05 21.89 20.95 26.31 21.63 24.352 23.44 22.46 24.705
## medv 25.30 21.20 16.60 14.40 35.40 19.60 25.000 19.40 17.40 24.200
## CV residual 1.32 -1.85 -5.29 -6.55 9.09 -2.03 0.648 -4.04 -5.06 -0.505
## 73 76 79 87 88 89 101 103 104 116
## Predicted 23.034 22.3 22.82 22.528 21.26 22.02 20.30 17.59 18.47 18.640
## cvpred 23.362 22.5 22.99 22.159 20.37 19.83 19.07 16.89 17.85 18.027
## medv 22.800 21.4 21.20 22.500 22.20 23.60 27.50 18.60 19.30 18.300
## CV residual -0.562 -1.1 -1.79 0.341 1.83 3.77 8.43 1.71 1.45 0.273
## 124 134 137 144 150 151 152 154 157 169
## Predicted 20.85 17.969 18.12 16.73 17.34 18.75 15.33 16.21 15.22 17.36
## cvpred 20.17 17.761 17.72 16.82 17.11 17.69 14.45 15.66 15.04 18.02
## medv 17.30 18.400 17.40 15.60 15.40 21.50 19.60 19.40 13.10 23.80
## CV residual -2.87 0.639 -0.32 -1.22 -1.71 3.81 5.15 3.74 -1.94 5.78
## 174 177 178 182 184 193 194 199 202 204
## Predicted 20.61 21.89 21.13 21.0 20.0 24.9 26.59 27.25 23.535 6.06
## cvpred 18.82 20.93 19.09 19.3 17.7 24.2 25.93 27.03 23.458 6.63
## medv 23.60 23.20 24.60 36.2 32.5 36.4 31.10 34.60 24.100 0.00
## CV residual 4.78 2.27 5.51 16.9 14.8 12.2 5.17 7.57 0.642 -6.63
## 207 209 212 221 225 229 241 247 251 252
## Predicted 22.18 21.9 18.4029 21.07 1.05 25.3 24.72 21.92 23.9043 22.90
## cvpred 22.43 22.0 19.3256 19.93 1.39 24.1 24.85 23.25 24.3101 23.23
## medv 24.40 24.4 19.3000 26.70 0.00 0.0 22.00 24.30 24.4000 24.80
## CV residual 1.97 2.4 -0.0256 6.77 -1.39 -24.1 -2.85 1.05 0.0899 1.57
## 254 258 259 264 268 269 279 282 284 289
## Predicted 4.84 0.986 21.5 22.2 2.66 23.4 23.57 25.8 7.26 23.96
## cvpred 7.73 0.829 19.4 20.7 3.73 21.8 22.94 24.1 7.64 23.69
## medv 0.00 0.000 36.0 31.0 0.00 0.0 29.10 35.4 0.00 22.30
## CV residual -7.73 -0.829 16.6 10.3 -3.73 -21.8 6.16 11.3 -7.64 -1.39
## 294 296 299 300 302 303 307 311 313 314 319
## Predicted 24.74 25.14 24.08 26.52 25.03 24.7 23.7 16.09 19.0 19.67 20.81
## cvpred 25.32 25.52 24.32 26.52 24.33 24.9 21.1 17.85 18.3 18.86 20.74
## medv 23.90 28.60 22.50 29.00 22.00 26.4 33.4 16.10 19.4 21.60 23.10
## CV residual -1.42 3.08 -1.82 2.48 -2.33 1.5 12.3 -1.75 1.1 2.74 2.36
## 326 329 343 347 356 357 359 360 361 362
## Predicted 23.01 22.01 23.65 22.02 21.5 15.84 16.57 16.74 16.55 16.66
## cvpred 22.92 22.43 22.86 22.72 22.4 15.55 16.02 16.08 15.35 15.78
## medv 24.60 19.30 16.50 17.20 20.6 17.80 22.70 22.60 25.00 19.90
## CV residual 1.68 -3.13 -6.36 -5.52 -1.8 2.25 6.68 6.52 9.65 4.12
## 372 376 378 384 386 387 393 394 397 402
## Predicted 13.5 16.93 16.63 13.65 12.61 -2.5921 12.43 15.42 16.38 15.28
## cvpred 12.7 16.52 16.34 13.72 13.39 0.0085 13.14 15.22 15.95 15.38
## medv 0.0 15.00 13.30 12.30 7.20 10.5000 9.70 13.80 12.50 7.20
## CV residual -12.7 -1.52 -3.04 -1.42 -6.19 10.4915 -3.44 -1.42 -3.45 -8.18
## 406 409 416 419 431 432 437 439 442 446
## Predicted 12.1 12.47 14.7 11.75 14.96 16.01 14.89 14.32 16.37 15.41
## cvpred 13.5 13.14 15.6 13.44 15.74 16.77 15.12 15.18 16.28 15.66
## medv 0.0 17.20 7.2 8.80 14.50 14.10 9.60 8.40 17.10 11.80
## CV residual -13.5 4.06 -8.4 -4.64 -1.24 -2.67 -5.52 -6.78 0.82 -3.86
## 448 451 452 459 465 467 474 480 494 495
## Predicted 15.84 16.49 17.43 15.72 17.28 15.27 19.2 15.0 19.12 19.98
## cvpred 15.75 16.41 17.08 16.12 17.99 15.66 19.2 15.6 18.33 19.69
## medv 12.60 13.40 15.20 14.90 21.40 19.00 29.8 21.4 21.80 24.50
## CV residual -3.15 -3.01 -1.88 -1.22 3.41 3.34 10.6 5.8 3.47 4.81
## 499 502
## Predicted 19.38 21.77
## cvpred 18.66 19.94
## medv 21.20 22.40
## CV residual 2.54 2.46
##
## Sum of squares = 5008 Mean square = 40.7 n = 123
##
## Overall (Sum over all 123 folds)
## ms
## 43.6
CVlm(reg1train, fit1, m=3, plotit= "Observed")
## Analysis of Variance Table
##
## Response: medv
## Df Sum Sq Mean Sq F value Pr(>F)
## log(crim) 1 4204 4204 108.71 <2e-16 ***
## indus 1 0 0 0.01 0.942
## nox 1 34 34 0.87 0.352
## rm 1 6742 6742 174.33 <2e-16 ***
## age 1 230 230 5.95 0.015 *
## log(dis) 1 74 74 1.90 0.168
## rad 1 45 45 1.16 0.282
## tax 1 34 34 0.89 0.347
## ptratio 1 31 31 0.79 0.375
## black 1 49 49 1.26 0.263
## log(lstat) 1 105 105 2.72 0.100 .
## Residuals 358 13845 39
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in CVlm(reg1train, fit1, m = 3, plotit = "Observed"):
##
## As there is >1 explanatory variable, cross-validation
## predicted values for a fold are not a linear function
## of corresponding overall predicted values. Lines that
## are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 123
## 4 12 15 17 19 22 27 29 30 31
## Predicted 24.18 21.97 18.419 19.32 17.0 17.86 17.88 19.28 19.7 17.33
## cvpred 27.53 22.99 18.857 18.96 16.2 17.52 17.61 19.67 20.0 16.03
## medv 33.40 18.90 18.200 23.10 20.2 19.60 16.60 18.40 21.0 12.70
## CV residual 5.87 -4.09 -0.657 4.14 4.0 2.08 -1.01 -1.27 1.0 -3.33
## 37 39 44 59 62 63 72 74 75
## Predicted 20.142 21.44 23.971 22.570 19.99 21.97 22.592 23.3458 23.615
## cvpred 19.865 20.59 24.034 23.655 21.06 24.31 22.224 23.3361 23.501
## medv 20.000 24.70 24.700 23.300 16.00 22.20 21.700 23.4000 24.100
## CV residual 0.135 4.11 0.666 -0.355 -5.06 -2.11 -0.524 0.0639 0.599
## 78 84 90 92 93 94 97 99 102 105
## Predicted 21.95 22.544 23.27 21.4605 23.6 23.978 20.49 3.41 20.81 18.47
## cvpred 21.96 23.401 24.73 22.0634 25.6 25.786 19.97 1.34 21.33 17.96
## medv 20.80 22.900 28.70 22.0000 22.9 25.000 21.40 0.00 26.50 20.10
## CV residual -1.16 -0.501 3.97 -0.0634 -2.7 -0.786 1.43 -1.34 5.17 2.14
## 106 107 112 114 118 119 122 123 132 138
## Predicted 17.47 17.67 21.30 18.933 19.05 18.12 21.89 21.40 17.88 19.0
## cvpred 16.23 16.02 21.63 17.995 18.97 17.72 24.21 23.34 18.29 19.2
## medv 19.50 19.50 22.80 18.700 19.20 20.40 20.30 20.50 19.60 17.1
## CV residual 3.27 3.48 1.17 0.705 0.23 2.68 -3.91 -2.84 1.31 -2.1
## 147 148 149 153 159 162 163 168 179 183
## Predicted 15.87 16.01 16.74 14.8634 17.13 20.0 -1.175 17.4 22.33 21.6
## cvpred 16.17 13.59 14.63 15.3929 19.21 25.4 0.336 18.6 23.71 24.4
## medv 15.60 14.60 17.80 15.3000 24.30 0.0 0.000 23.8 29.90 0.0
## CV residual -0.57 1.01 3.17 -0.0929 5.09 -25.4 -0.336 5.2 6.19 -24.4
## 189 191 192 195 196 197 208 210 214 223
## Predicted 22.66 25.1 24.25 25.77 6.43 26.81 20.36 17.70 23.14 21.13
## cvpred 23.25 25.9 25.43 27.38 5.91 28.99 19.99 17.08 23.36 22.01
## medv 29.80 0.0 30.50 29.10 0.00 33.30 22.50 20.00 28.10 27.50
## CV residual 6.55 -25.9 5.07 1.72 -5.91 4.31 2.51 2.92 4.74 5.49
## 226 227 239 240 242 255 257 266 267 270
## Predicted 0.578 0.396 24.48 23.96 22.21 23.71 27.2 18.45 22.13 21.351
## cvpred -0.375 0.547 25.37 25.22 22.72 24.41 30.9 17.44 21.83 21.227
## medv 0.000 0.000 23.70 23.30 20.10 21.90 0.0 22.80 30.70 20.700
## CV residual 0.375 -0.547 -1.67 -1.92 -2.62 -2.51 -30.9 5.36 8.87 -0.527
## 272 281 283 287 288 292 297 298 309 315
## Predicted 22.96 4.71 26.1 24.0 24.31 24.5 24.853 22.75 19.571 20.46
## cvpred 23.28 4.91 30.4 24.6 25.59 27.0 27.417 23.35 22.088 21.72
## medv 25.20 0.00 0.0 20.1 23.20 0.0 27.100 20.30 22.800 23.80
## CV residual 1.92 -4.91 -30.4 -4.5 -2.39 -27.0 -0.317 -3.05 0.712 2.08
## 316 317 318 322 324 328 331 332 337 339
## Predicted 19.18 19.95 20.125 21.35 19.023 21.84 22.87 23.20 22.32 22.93
## cvpred 19.45 19.47 19.603 22.48 19.196 21.34 21.89 22.18 22.83 23.49
## medv 16.20 17.80 19.800 23.10 18.500 22.20 19.80 17.10 19.50 20.60
## CV residual -3.25 -1.67 0.197 0.62 -0.696 0.86 -2.09 -5.08 -3.33 -2.89
## 341 345 352 354 363 374 375 388 390 401
## Predicted 21.4 25.04 23.387 26.601 13.52 11.57 -2.51 12.11 13.314 14.19
## cvpred 22.2 26.46 24.218 30.201 13.85 8.27 -8.65 8.87 11.813 12.17
## medv 18.7 31.20 24.100 30.100 20.80 13.80 13.80 7.40 11.500 5.60
## CV residual -3.5 4.74 -0.118 -0.101 6.95 5.53 22.45 -1.47 -0.313 -6.57
## 403 404 407 412 417 418 420 421 427 444
## Predicted 15.76 12.32 -3.27 14.6 16.27 11.372 16.81 14.3 14.13 16.337
## cvpred 14.83 10.58 -8.29 14.5 15.78 9.712 16.16 15.0 13.47 15.756
## medv 12.10 8.30 11.90 17.2 7.50 10.400 8.40 16.7 10.20 15.400
## CV residual -2.73 -2.28 20.19 2.7 -8.28 0.688 -7.76 1.7 -3.27 -0.356
## 449 454 457 462 466 468 469 477 481 483
## Predicted 15.76 19.18 15.11 17.396 16.25 14.78 15.81 17.1720 17.35 18.59
## cvpred 15.23 19.62 15.33 17.479 16.19 15.02 14.68 16.7968 17.67 20.58
## medv 14.10 17.80 12.70 17.700 19.90 19.10 19.10 16.7000 23.00 25.00
## CV residual -1.13 -1.82 -2.63 0.221 3.71 4.08 4.42 -0.0968 5.33 4.42
## 484 489 497 498
## Predicted 17.86 16.753 18.04 18.870
## cvpred 17.68 14.881 15.83 17.714
## medv 21.80 15.200 19.70 18.300
## CV residual 4.12 0.319 3.87 0.586
##
## Sum of squares = 6721 Mean square = 54.6 n = 123
##
## fold 2
## Observations in test set: 124
## 1 9 14 16 18 24 33 42 46 47
## Predicted 24.10 20.69 18.83 18.68 18.468 17.37 17.26 25.29 21.51 22.16
## cvpred 25.62 20.14 18.37 18.39 18.275 17.22 16.45 24.26 21.02 21.88
## medv 24.00 16.50 20.40 19.90 17.500 14.50 13.20 26.60 19.30 20.00
## CV residual -1.62 -3.64 2.03 1.51 -0.775 -2.72 -3.25 2.34 -1.72 -1.88
## 52 54 57 58 61 77 82 86 91 108
## Predicted 23.13 24.276 24.433 26.07 20.6 21.163 22.61 22.8 21.763 18.524
## cvpred 22.41 23.864 24.223 25.05 20.1 20.964 22.36 22.8 22.942 20.507
## medv 20.50 23.400 24.700 31.60 18.7 20.000 23.90 26.6 22.600 20.400
## CV residual -1.91 -0.464 0.477 6.55 -1.4 -0.964 1.54 3.8 -0.342 -0.107
## 109 117 120 121 127 129 131 133 135
## Predicted 19.111 20.198 19.35 22.241 19.34 19.06 19.109 18.44 15.552
## cvpred 20.597 21.455 20.79 22.441 19.17 20.26 19.852 18.61 14.967
## medv 19.800 21.200 19.30 22.000 15.70 18.00 19.200 23.00 15.600
## CV residual -0.797 -0.255 -1.49 -0.441 -3.47 -2.26 -0.652 4.39 0.633
## 136 139 142 146 161 164 165 166 167 172
## Predicted 18.77 17.94 14.39 17.73 16.6 -0.0541 17.92 16.7 -0.398 17.59
## cvpred 19.51 19.81 15.94 18.19 14.2 -1.2597 16.34 13.9 -1.658 15.87
## medv 18.10 13.30 14.40 13.80 27.0 0.0000 22.70 25.0 0.000 19.10
## CV residual -1.41 -6.51 -1.54 -4.39 12.8 1.2597 6.36 11.1 1.658 3.23
## 176 180 181 187 198 206 211 213 217 219
## Predicted 22.9 23.0 2.77 3.71 26.89 22.449 20.10 20.88 22.811 20.957
## cvpred 23.7 23.5 4.56 5.22 25.83 22.794 19.94 21.06 23.627 21.198
## medv 29.4 0.0 0.00 0.00 30.30 22.600 21.70 22.40 23.300 21.500
## CV residual 5.7 -23.5 -4.56 -5.22 4.47 -0.194 1.76 1.34 -0.327 0.302
## 222 224 228 232 234 236 237 238 243 244
## Predicted 19.78 20.0 21.5 22.2 1.67 20.35 20.99 22.4 23.453 24.271
## cvpred 20.02 19.0 20.5 20.8 1.62 20.14 19.96 20.7 22.486 23.015
## medv 21.70 30.1 31.6 31.7 0.00 24.00 25.10 31.5 22.200 23.700
## CV residual 1.68 11.1 11.1 10.9 -1.62 3.86 5.14 10.8 -0.286 0.685
## 249 253 256 262 271 273 274 277 285 286
## Predicted 22.77 25.54 24.28 22.8 21.160 22.30 25.3 24.86 28.5 25.97
## cvpred 21.68 24.21 23.91 22.5 20.801 21.96 24.1 23.89 28.7 26.19
## medv 24.50 29.60 20.90 0.0 21.100 24.40 35.2 33.20 32.2 22.00
## CV residual 2.82 5.39 -3.01 -22.5 0.299 2.44 11.1 9.31 3.5 -4.19
## 301 304 312 327 330 333 334 342 344 346
## Predicted 25.064 25.37 18.94 21.98 23.24 23.93 23.24 26.82 23.904 22.89
## cvpred 23.958 24.34 18.48 21.23 24.01 23.88 23.27 26.68 24.567 23.28
## medv 24.800 33.10 22.10 23.00 22.60 19.40 22.20 32.70 23.900 17.50
## CV residual 0.842 8.76 3.62 1.77 -1.41 -4.48 -1.07 6.02 -0.667 -5.78
## 348 349 358 364 367 369 371 373 377 379
## Predicted 25.26 25.98 17.27 15.138 11.3 9.46 14.8 13.0 16.08 15.05
## cvpred 25.34 25.94 17.93 16.305 11.6 9.10 14.1 13.5 16.69 15.47
## medv 23.10 24.50 21.70 16.800 21.9 0.00 0.0 0.0 13.90 13.10
## CV residual -2.24 -1.44 3.77 0.495 10.3 -9.10 -14.1 -13.5 -2.79 -2.37
## 382 389 391 392 399 405 408 414 422 423
## Predicted 15.73 11.6 14.353 16.4 12.4 11.81 10.9 9.82 13.771 12.35
## cvpred 16.07 12.7 14.959 17.1 12.9 11.23 10.4 8.55 13.462 11.11
## medv 10.90 10.2 15.100 23.2 0.0 8.50 27.9 16.30 14.200 20.80
## CV residual -5.17 -2.5 0.141 6.1 -12.9 -2.73 17.5 7.75 0.738 9.69
## 424 429 433 434 435 436 438 447 450 453
## Predicted 14.855 15.33 15.49 15.86 14.04 15.94 14.12 15.13490 15.3 16.82
## cvpred 14.239 15.14 14.11 15.35 12.97 15.52 13.82 14.89402 14.7 17.29
## medv 13.400 11.00 16.10 14.30 11.70 13.40 8.70 14.90000 13.0 16.10
## CV residual -0.839 -4.14 1.99 -1.05 -1.27 -2.12 -5.12 0.00598 -1.7 -1.19
## 461 463 464 472 476 478 479 482 487 491
## Predicted 16.852 17.05 17.07 16.58 14.717 12.86 15.406 17.89 16.97 14.55
## cvpred 16.184 17.08 16.75 15.49 13.725 12.65 15.014 16.12 16.03 16.09
## medv 16.400 19.50 20.20 19.60 13.300 12.00 14.600 23.70 19.10 8.10
## CV residual 0.216 2.42 3.45 4.11 -0.425 -0.65 -0.414 7.58 3.07 -7.99
## 492 493 496 504 506
## Predicted 18.29 18.737 20.7878 21.06 19.71
## cvpred 20.54 20.686 23.0461 22.15 21.28
## medv 13.60 20.100 23.1000 23.90 11.90
## CV residual -6.94 -0.586 0.0539 1.75 -9.38
##
## Sum of squares = 4418 Mean square = 35.6 n = 124
##
## fold 3
## Observations in test set: 123
## 2 3 7 11 13 26 28 32 34 41
## Predicted 23.0129 24.8 22.599 22.58 23.59 16.1 17.14 17.28 16.78 24.7
## cvpred 21.6136 22.7 22.396 23.15 23.98 17.2 18.23 18.03 17.78 22.8
## medv 21.6000 34.7 22.900 15.00 21.70 13.9 14.80 14.50 13.10 34.9
## CV residual -0.0136 12.0 0.504 -8.15 -2.28 -3.3 -3.43 -3.53 -4.68 12.1
## 43 45 48 49 56 60 64 67 69 71
## Predicted 23.59 22.70 21.07 19.51 27.88 21.73 24.336 22.96 21.09 24.324
## cvpred 23.98 23.05 21.89 20.95 26.31 21.63 24.352 23.44 22.46 24.705
## medv 25.30 21.20 16.60 14.40 35.40 19.60 25.000 19.40 17.40 24.200
## CV residual 1.32 -1.85 -5.29 -6.55 9.09 -2.03 0.648 -4.04 -5.06 -0.505
## 73 76 79 87 88 89 101 103 104 116
## Predicted 23.034 22.3 22.82 22.528 21.26 22.02 20.30 17.59 18.47 18.640
## cvpred 23.362 22.5 22.99 22.159 20.37 19.83 19.07 16.89 17.85 18.027
## medv 22.800 21.4 21.20 22.500 22.20 23.60 27.50 18.60 19.30 18.300
## CV residual -0.562 -1.1 -1.79 0.341 1.83 3.77 8.43 1.71 1.45 0.273
## 124 134 137 144 150 151 152 154 157 169
## Predicted 20.85 17.969 18.12 16.73 17.34 18.75 15.33 16.21 15.22 17.36
## cvpred 20.17 17.761 17.72 16.82 17.11 17.69 14.45 15.66 15.04 18.02
## medv 17.30 18.400 17.40 15.60 15.40 21.50 19.60 19.40 13.10 23.80
## CV residual -2.87 0.639 -0.32 -1.22 -1.71 3.81 5.15 3.74 -1.94 5.78
## 174 177 178 182 184 193 194 199 202 204
## Predicted 20.61 21.89 21.13 21.0 20.0 24.9 26.59 27.25 23.535 6.06
## cvpred 18.82 20.93 19.09 19.3 17.7 24.2 25.93 27.03 23.458 6.63
## medv 23.60 23.20 24.60 36.2 32.5 36.4 31.10 34.60 24.100 0.00
## CV residual 4.78 2.27 5.51 16.9 14.8 12.2 5.17 7.57 0.642 -6.63
## 207 209 212 221 225 229 241 247 251 252
## Predicted 22.18 21.9 18.4029 21.07 1.05 25.3 24.72 21.92 23.9043 22.90
## cvpred 22.43 22.0 19.3256 19.93 1.39 24.1 24.85 23.25 24.3101 23.23
## medv 24.40 24.4 19.3000 26.70 0.00 0.0 22.00 24.30 24.4000 24.80
## CV residual 1.97 2.4 -0.0256 6.77 -1.39 -24.1 -2.85 1.05 0.0899 1.57
## 254 258 259 264 268 269 279 282 284 289
## Predicted 4.84 0.986 21.5 22.2 2.66 23.4 23.57 25.8 7.26 23.96
## cvpred 7.73 0.829 19.4 20.7 3.73 21.8 22.94 24.1 7.64 23.69
## medv 0.00 0.000 36.0 31.0 0.00 0.0 29.10 35.4 0.00 22.30
## CV residual -7.73 -0.829 16.6 10.3 -3.73 -21.8 6.16 11.3 -7.64 -1.39
## 294 296 299 300 302 303 307 311 313 314 319
## Predicted 24.74 25.14 24.08 26.52 25.03 24.7 23.7 16.09 19.0 19.67 20.81
## cvpred 25.32 25.52 24.32 26.52 24.33 24.9 21.1 17.85 18.3 18.86 20.74
## medv 23.90 28.60 22.50 29.00 22.00 26.4 33.4 16.10 19.4 21.60 23.10
## CV residual -1.42 3.08 -1.82 2.48 -2.33 1.5 12.3 -1.75 1.1 2.74 2.36
## 326 329 343 347 356 357 359 360 361 362
## Predicted 23.01 22.01 23.65 22.02 21.5 15.84 16.57 16.74 16.55 16.66
## cvpred 22.92 22.43 22.86 22.72 22.4 15.55 16.02 16.08 15.35 15.78
## medv 24.60 19.30 16.50 17.20 20.6 17.80 22.70 22.60 25.00 19.90
## CV residual 1.68 -3.13 -6.36 -5.52 -1.8 2.25 6.68 6.52 9.65 4.12
## 372 376 378 384 386 387 393 394 397 402
## Predicted 13.5 16.93 16.63 13.65 12.61 -2.5921 12.43 15.42 16.38 15.28
## cvpred 12.7 16.52 16.34 13.72 13.39 0.0085 13.14 15.22 15.95 15.38
## medv 0.0 15.00 13.30 12.30 7.20 10.5000 9.70 13.80 12.50 7.20
## CV residual -12.7 -1.52 -3.04 -1.42 -6.19 10.4915 -3.44 -1.42 -3.45 -8.18
## 406 409 416 419 431 432 437 439 442 446
## Predicted 12.1 12.47 14.7 11.75 14.96 16.01 14.89 14.32 16.37 15.41
## cvpred 13.5 13.14 15.6 13.44 15.74 16.77 15.12 15.18 16.28 15.66
## medv 0.0 17.20 7.2 8.80 14.50 14.10 9.60 8.40 17.10 11.80
## CV residual -13.5 4.06 -8.4 -4.64 -1.24 -2.67 -5.52 -6.78 0.82 -3.86
## 448 451 452 459 465 467 474 480 494 495
## Predicted 15.84 16.49 17.43 15.72 17.28 15.27 19.2 15.0 19.12 19.98
## cvpred 15.75 16.41 17.08 16.12 17.99 15.66 19.2 15.6 18.33 19.69
## medv 12.60 13.40 15.20 14.90 21.40 19.00 29.8 21.4 21.80 24.50
## CV residual -3.15 -3.01 -1.88 -1.22 3.41 3.34 10.6 5.8 3.47 4.81
## 499 502
## Predicted 19.38 21.77
## cvpred 18.66 19.94
## medv 21.20 22.40
## CV residual 2.54 2.46
##
## Sum of squares = 5008 Mean square = 40.7 n = 123
##
## Overall (Sum over all 123 folds)
## ms
## 43.6
#-------------------------------------------------------------------------------------#
# All Subsets Regression method 3
library(leaps)
fit3<-regsubsets(medv ~log(crim)+indus+nox+rm+age+log(dis)+rad+tax+ptratio+black+log(lstat), data=reg1,nbest=10)
# view results
summary(fit3)
## Subset selection object
## Call: regsubsets.formula(medv ~ log(crim) + indus + nox + rm + age +
## log(dis) + rad + tax + ptratio + black + log(lstat), data = reg1,
## nbest = 10)
## 11 Variables (and intercept)
## Forced in Forced out
## log(crim) FALSE FALSE
## indus FALSE FALSE
## nox FALSE FALSE
## rm FALSE FALSE
## age FALSE FALSE
## log(dis) FALSE FALSE
## rad FALSE FALSE
## tax FALSE FALSE
## ptratio FALSE FALSE
## black FALSE FALSE
## log(lstat) FALSE FALSE
## 10 subsets of each size up to 8
## Selection Algorithm: exhaustive
## log(crim) indus nox rm age log(dis) rad tax ptratio black
## 1 ( 1 ) " " " " " " "*" " " " " " " " " " " " "
## 1 ( 2 ) "*" " " " " " " " " " " " " " " " " " "
## 1 ( 3 ) " " " " " " " " " " "*" " " " " " " " "
## 1 ( 4 ) " " " " " " " " "*" " " " " " " " " " "
## 1 ( 5 ) " " " " "*" " " " " " " " " " " " " " "
## 1 ( 6 ) " " " " " " " " " " " " " " "*" " " " "
## 1 ( 7 ) " " "*" " " " " " " " " " " " " " " " "
## 1 ( 8 ) " " " " " " " " " " " " "*" " " " " " "
## 1 ( 9 ) " " " " " " " " " " " " " " " " " " " "
## 1 ( 10 ) " " " " " " " " " " " " " " " " " " "*"
## 2 ( 1 ) "*" " " " " "*" " " " " " " " " " " " "
## 2 ( 2 ) " " " " " " "*" " " " " " " "*" " " " "
## 2 ( 3 ) " " " " " " "*" "*" " " " " " " " " " "
## 2 ( 4 ) " " " " " " "*" " " "*" " " " " " " " "
## 2 ( 5 ) " " " " "*" "*" " " " " " " " " " " " "
## 2 ( 6 ) " " "*" " " "*" " " " " " " " " " " " "
## 2 ( 7 ) " " " " " " "*" " " " " "*" " " " " " "
## 2 ( 8 ) " " " " " " "*" " " " " " " " " " " " "
## 2 ( 9 ) " " " " " " "*" " " " " " " " " " " "*"
## 2 ( 10 ) " " " " " " "*" " " " " " " " " "*" " "
## 3 ( 1 ) "*" " " " " "*" "*" " " " " " " " " " "
## 3 ( 2 ) " " " " " " "*" "*" " " " " "*" " " " "
## 3 ( 3 ) "*" " " " " "*" " " "*" " " " " " " " "
## 3 ( 4 ) "*" "*" " " "*" " " " " " " " " " " " "
## 3 ( 5 ) "*" " " "*" "*" " " " " " " " " " " " "
## 3 ( 6 ) "*" " " " " "*" " " " " " " "*" " " " "
## 3 ( 7 ) "*" " " " " "*" " " " " " " " " "*" " "
## 3 ( 8 ) " " " " " " "*" "*" " " "*" " " " " " "
## 3 ( 9 ) "*" " " " " "*" " " " " " " " " " " "*"
## 3 ( 10 ) "*" " " " " "*" " " " " "*" " " " " " "
## 4 ( 1 ) "*" " " " " "*" "*" " " " " "*" " " " "
## 4 ( 2 ) "*" " " " " "*" "*" " " " " " " "*" " "
## 4 ( 3 ) "*" "*" " " "*" "*" " " " " " " " " " "
## 4 ( 4 ) "*" " " " " "*" "*" " " " " " " " " "*"
## 4 ( 5 ) "*" " " " " "*" "*" " " " " " " " " " "
## 4 ( 6 ) "*" " " " " "*" "*" "*" " " " " " " " "
## 4 ( 7 ) "*" " " " " "*" "*" " " "*" " " " " " "
## 4 ( 8 ) "*" " " "*" "*" "*" " " " " " " " " " "
## 4 ( 9 ) " " " " " " "*" "*" " " " " "*" " " "*"
## 4 ( 10 ) " " " " " " "*" "*" "*" " " "*" " " " "
## 5 ( 1 ) "*" " " " " "*" "*" " " "*" "*" " " " "
## 5 ( 2 ) "*" " " " " "*" "*" " " " " "*" " " " "
## 5 ( 3 ) "*" " " " " "*" "*" " " " " "*" "*" " "
## 5 ( 4 ) "*" " " " " "*" "*" " " " " "*" " " "*"
## 5 ( 5 ) "*" "*" " " "*" "*" " " " " "*" " " " "
## 5 ( 6 ) "*" " " " " "*" "*" "*" " " "*" " " " "
## 5 ( 7 ) "*" " " "*" "*" "*" " " " " "*" " " " "
## 5 ( 8 ) "*" " " " " "*" "*" " " " " " " "*" " "
## 5 ( 9 ) "*" "*" " " "*" "*" " " " " " " "*" " "
## 5 ( 10 ) "*" " " " " "*" "*" " " " " " " "*" "*"
## 6 ( 1 ) "*" " " " " "*" "*" " " "*" "*" "*" " "
## 6 ( 2 ) "*" " " " " "*" "*" " " "*" "*" " " " "
## 6 ( 3 ) "*" " " " " "*" "*" " " "*" "*" " " "*"
## 6 ( 4 ) "*" " " "*" "*" "*" " " "*" "*" " " " "
## 6 ( 5 ) "*" " " " " "*" "*" " " " " "*" "*" " "
## 6 ( 6 ) "*" " " " " "*" "*" "*" "*" "*" " " " "
## 6 ( 7 ) "*" "*" " " "*" "*" " " "*" "*" " " " "
## 6 ( 8 ) "*" " " " " "*" "*" " " " " "*" " " "*"
## 6 ( 9 ) "*" "*" " " "*" "*" " " " " "*" " " " "
## 6 ( 10 ) "*" " " " " "*" "*" " " " " "*" "*" "*"
## 7 ( 1 ) "*" " " " " "*" "*" " " "*" "*" "*" " "
## 7 ( 2 ) "*" " " " " "*" "*" " " "*" "*" "*" "*"
## 7 ( 3 ) "*" " " " " "*" "*" " " "*" "*" " " "*"
## 7 ( 4 ) "*" " " "*" "*" "*" " " "*" "*" " " " "
## 7 ( 5 ) "*" " " " " "*" "*" "*" "*" "*" "*" " "
## 7 ( 6 ) "*" "*" " " "*" "*" " " "*" "*" "*" " "
## 7 ( 7 ) "*" " " "*" "*" "*" " " "*" "*" "*" " "
## 7 ( 8 ) "*" "*" " " "*" "*" " " "*" "*" " " " "
## 7 ( 9 ) "*" " " " " "*" "*" "*" "*" "*" " " " "
## 7 ( 10 ) "*" " " " " "*" "*" " " " " "*" "*" "*"
## 8 ( 1 ) "*" " " " " "*" "*" " " "*" "*" "*" "*"
## 8 ( 2 ) "*" " " " " "*" "*" "*" "*" "*" "*" " "
## 8 ( 3 ) "*" " " "*" "*" "*" " " "*" "*" "*" " "
## 8 ( 4 ) "*" "*" " " "*" "*" " " "*" "*" "*" " "
## 8 ( 5 ) "*" " " "*" "*" "*" " " "*" "*" " " "*"
## 8 ( 6 ) "*" " " " " "*" "*" "*" "*" "*" "*" "*"
## 8 ( 7 ) "*" "*" " " "*" "*" " " "*" "*" "*" "*"
## 8 ( 8 ) "*" " " "*" "*" "*" " " "*" "*" "*" "*"
## 8 ( 9 ) "*" "*" " " "*" "*" " " "*" "*" " " "*"
## 8 ( 10 ) "*" " " " " "*" "*" "*" "*" "*" " " "*"
## log(lstat)
## 1 ( 1 ) " "
## 1 ( 2 ) " "
## 1 ( 3 ) " "
## 1 ( 4 ) " "
## 1 ( 5 ) " "
## 1 ( 6 ) " "
## 1 ( 7 ) " "
## 1 ( 8 ) " "
## 1 ( 9 ) "*"
## 1 ( 10 ) " "
## 2 ( 1 ) " "
## 2 ( 2 ) " "
## 2 ( 3 ) " "
## 2 ( 4 ) " "
## 2 ( 5 ) " "
## 2 ( 6 ) " "
## 2 ( 7 ) " "
## 2 ( 8 ) "*"
## 2 ( 9 ) " "
## 2 ( 10 ) " "
## 3 ( 1 ) " "
## 3 ( 2 ) " "
## 3 ( 3 ) " "
## 3 ( 4 ) " "
## 3 ( 5 ) " "
## 3 ( 6 ) " "
## 3 ( 7 ) " "
## 3 ( 8 ) " "
## 3 ( 9 ) " "
## 3 ( 10 ) " "
## 4 ( 1 ) " "
## 4 ( 2 ) " "
## 4 ( 3 ) " "
## 4 ( 4 ) " "
## 4 ( 5 ) "*"
## 4 ( 6 ) " "
## 4 ( 7 ) " "
## 4 ( 8 ) " "
## 4 ( 9 ) " "
## 4 ( 10 ) " "
## 5 ( 1 ) " "
## 5 ( 2 ) "*"
## 5 ( 3 ) " "
## 5 ( 4 ) " "
## 5 ( 5 ) " "
## 5 ( 6 ) " "
## 5 ( 7 ) " "
## 5 ( 8 ) "*"
## 5 ( 9 ) " "
## 5 ( 10 ) " "
## 6 ( 1 ) " "
## 6 ( 2 ) "*"
## 6 ( 3 ) " "
## 6 ( 4 ) " "
## 6 ( 5 ) "*"
## 6 ( 6 ) " "
## 6 ( 7 ) " "
## 6 ( 8 ) "*"
## 6 ( 9 ) "*"
## 6 ( 10 ) " "
## 7 ( 1 ) "*"
## 7 ( 2 ) " "
## 7 ( 3 ) "*"
## 7 ( 4 ) "*"
## 7 ( 5 ) " "
## 7 ( 6 ) " "
## 7 ( 7 ) " "
## 7 ( 8 ) "*"
## 7 ( 9 ) "*"
## 7 ( 10 ) "*"
## 8 ( 1 ) "*"
## 8 ( 2 ) "*"
## 8 ( 3 ) "*"
## 8 ( 4 ) "*"
## 8 ( 5 ) "*"
## 8 ( 6 ) " "
## 8 ( 7 ) " "
## 8 ( 8 ) " "
## 8 ( 9 ) "*"
## 8 ( 10 ) "*"
# plot a table of models showing variables in each model.
# models are ordered by the selection statistic.
plot(fit3,scale="r2")
#------------------------------------------------------------------#
#to save any file in the R environment into csv to the working directory
#getwd()
#write.table(reg1, file = "boston1.csv", sep = ",", col.names = NA)
#save the data package file in txt format
#write.table(reg1, "reg2.txt", sep="\t")