DR Manohar Kapse

PhD Statistics

MPhill Statistics

Hi Im guiding this project

library(MASS)
library(ggplot2)
library(pastecs)
## Loading required package: boot
## Warning: package 'boot' was built under R version 3.2.5
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:boot':
## 
##     logit
library(qpcR)
## Loading required package: minpack.lm
## Loading required package: rgl
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## The following object is masked from 'package:boot':
## 
##     salinity
## Loading required package: Matrix
library(MPV)
## 
## Attaching package: 'MPV'
## The following object is masked from 'package:qpcR':
## 
##     PRESS
## The following object is masked from 'package:MASS':
## 
##     cement
## The following object is masked from 'package:datasets':
## 
##     stackloss
library(lattice)
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
## 
##     melanoma
library(stats)
library(foreign)
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:robustbase':
## 
##     cushny
## The following object is masked from 'package:car':
## 
##     logit
## The following object is masked from 'package:boot':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(Hmisc)
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:robustbase':
## 
##     heart
## The following object is masked from 'package:boot':
## 
##     aml
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
library(HSAUR)
## Loading required package: tools
## 
## Attaching package: 'HSAUR'
## The following object is masked from 'package:robustbase':
## 
##     epilepsy
library(MVN)
## sROC 0.1-2 loaded
## 
## Attaching package: 'MVN'
## The following object is masked from 'package:psych':
## 
##     mardia
library(caTools) # for spliting the sample
## Warning: package 'caTools' was built under R version 3.2.5
library(boot)
library(DAAG)
## Warning: package 'DAAG' was built under R version 3.2.5
## 
## Attaching package: 'DAAG'
## The following object is masked from 'package:survival':
## 
##     lung
## The following object is masked from 'package:psych':
## 
##     cities
## The following object is masked from 'package:robustbase':
## 
##     milk
## The following object is masked from 'package:car':
## 
##     vif
## The following object is masked from 'package:MASS':
## 
##     hills
#to remove all the files or data set from the r environment
#rm(list=ls())
#use boston 1 file
reg1<-read.csv(choose.files())

missing values

#missing values

#gives logical value for missing values
#is.na(reg1$egkm)
#find sum of missing values
#sum(is.na(reg1$egkm))
#perecntage of missingvlaues



sum(is.na(reg1$crim)*100)/length(reg1$crim)
## [1] 0
sum(is.na(reg1$zn)*100)/length(reg1$zn)
## [1] 0
sum(is.na(reg1$indus)*100)/length(reg1$indus)
## [1] 0
sum(is.na(reg1$chas)*100)/length(reg1$chas)
## [1] 0
sum(is.na(reg1$nox)*100)/length(reg1$nox)
## [1] 0
sum(is.na(reg1$rm)*100)/length(reg1$rm)
## [1] 0
sum(is.na(reg1$age)*100)/length(reg1$age)
## [1] 0
sum(is.na(reg1$dis)*100)/length(reg1$dis)
## [1] 0
sum(is.na(reg1$rad)*100)/length(reg1$rad)
## [1] 0
sum(is.na(reg1$tax)*100)/length(reg1$tax)
## [1] 0
sum(is.na(reg1$ptratio)*100)/length(reg1$ptratio)
## [1] 0
sum(is.na(reg1$black)*100)/length(reg1$black)
## [1] 0
sum(is.na(reg1$lstat)*100)/length(reg1$lstat)
## [1] 0
sum(is.na(reg1$medv)*100)/length(reg1$medv)
## [1] 0
# since sample size is very large so we can remove them
#descriptive study of the variables
#use SPSS for descriptive statistics
#gives the descriptive statisitics of all the variables, sample size small works better

stat.desc(reg1)
##                         X         crim          zn        indus
## nbr.val      5.060000e+02  506.0000000  506.000000  506.0000000
## nbr.null     0.000000e+00    0.0000000  372.000000    0.0000000
## nbr.na       0.000000e+00    0.0000000    0.000000    0.0000000
## min          1.000000e+00    0.0063200    0.000000    0.4600000
## max          5.060000e+02   88.9762000  100.000000   27.7400000
## range        5.050000e+02   88.9698800  100.000000   27.2800000
## sum          1.282710e+05 1828.4429200 5750.000000 5635.2100000
## median       2.535000e+02    0.2565100    0.000000    9.6900000
## mean         2.535000e+02    3.6135236   11.363636   11.1367787
## SE.mean      6.500000e+00    0.3823853    1.036810    0.3049799
## CI.mean.0.95 1.277037e+01    0.7512620    2.036991    0.5991856
## var          2.137850e+04   73.9865782  543.936814   47.0644425
## std.dev      1.462139e+02    8.6015451   23.322453    6.8603529
## coef.var     5.767806e-01    2.3803761    2.052376    0.6160087
##                      chas          nox           rm          age
## nbr.val      506.00000000 5.060000e+02 5.060000e+02 5.060000e+02
## nbr.null     471.00000000 0.000000e+00 0.000000e+00 0.000000e+00
## nbr.na         0.00000000 0.000000e+00 0.000000e+00 0.000000e+00
## min            0.00000000 3.850000e-01 3.561000e+00 2.900000e+00
## max            1.00000000 8.710000e-01 8.780000e+00 1.000000e+02
## range          1.00000000 4.860000e-01 5.219000e+00 9.710000e+01
## sum           35.00000000 2.806757e+02 3.180025e+03 3.469890e+04
## median         0.00000000 5.380000e-01 6.208500e+00 7.750000e+01
## mean           0.06916996 5.546951e-01 6.284634e+00 6.857490e+01
## SE.mean        0.01129141 5.151391e-03 3.123514e-02 1.251370e+00
## CI.mean.0.95   0.02218393 1.012080e-02 6.136683e-02 2.458531e+00
## var            0.06451297 1.342764e-02 4.936709e-01 7.923584e+02
## std.dev        0.25399404 1.158777e-01 7.026171e-01 2.814886e+01
## coef.var       3.67202814 2.089034e-01 1.117992e-01 4.104834e-01
##                       dis          rad          tax      ptratio
## nbr.val      5.060000e+02  506.0000000 5.060000e+02 5.060000e+02
## nbr.null     0.000000e+00    0.0000000 0.000000e+00 0.000000e+00
## nbr.na       0.000000e+00    0.0000000 0.000000e+00 0.000000e+00
## min          1.129600e+00    1.0000000 1.870000e+02 1.260000e+01
## max          1.212650e+01   24.0000000 7.110000e+02 2.200000e+01
## range        1.099690e+01   23.0000000 5.240000e+02 9.400000e+00
## sum          1.920292e+03 4832.0000000 2.065680e+05 9.338500e+03
## median       3.207450e+00    5.0000000 3.300000e+02 1.905000e+01
## mean         3.795043e+00    9.5494071 4.082372e+02 1.845553e+01
## SE.mean      9.361023e-02    0.3870849 7.492389e+00 9.624357e-02
## CI.mean.0.95 1.839135e-01    0.7604951 1.472009e+01 1.890871e-01
## var          4.434015e+00   75.8163660 2.840476e+04 4.686989e+00
## std.dev      2.105710e+00    8.7072594 1.685371e+02 2.164946e+00
## coef.var     5.548581e-01    0.9118115 4.128412e-01 1.173060e-01
##                     black        lstat         medv
## nbr.val      5.060000e+02  506.0000000 5.060000e+02
## nbr.null     0.000000e+00    0.0000000 0.000000e+00
## nbr.na       0.000000e+00    0.0000000 0.000000e+00
## min          3.200000e-01    1.7300000 5.000000e+00
## max          3.969000e+02   37.9700000 5.000000e+01
## range        3.965800e+02   36.2400000 4.500000e+01
## sum          1.804771e+05 6402.4500000 1.140160e+04
## median       3.914400e+02   11.3600000 2.120000e+01
## mean         3.566740e+02   12.6530632 2.253281e+01
## SE.mean      4.058552e+00    0.3174589 4.088611e-01
## CI.mean.0.95 7.973726e+00    0.6237028 8.032783e-01
## var          8.334752e+03   50.9947595 8.458672e+01
## std.dev      9.129486e+01    7.1410615 9.197104e+00
## coef.var     2.559616e-01    0.5643741 4.081651e-01
summary(reg1)
##        X              crim                zn             indus      
##  Min.   :  1.0   Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46  
##  1st Qu.:127.2   1st Qu.: 0.08204   1st Qu.:  0.00   1st Qu.: 5.19  
##  Median :253.5   Median : 0.25651   Median :  0.00   Median : 9.69  
##  Mean   :253.5   Mean   : 3.61352   Mean   : 11.36   Mean   :11.14  
##  3rd Qu.:379.8   3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10  
##  Max.   :506.0   Max.   :88.97620   Max.   :100.00   Max.   :27.74  
##       chas              nox               rm             age        
##  Min.   :0.00000   Min.   :0.3850   Min.   :3.561   Min.   :  2.90  
##  1st Qu.:0.00000   1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02  
##  Median :0.00000   Median :0.5380   Median :6.208   Median : 77.50  
##  Mean   :0.06917   Mean   :0.5547   Mean   :6.285   Mean   : 68.57  
##  3rd Qu.:0.00000   3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08  
##  Max.   :1.00000   Max.   :0.8710   Max.   :8.780   Max.   :100.00  
##       dis              rad              tax           ptratio     
##  Min.   : 1.130   Min.   : 1.000   Min.   :187.0   Min.   :12.60  
##  1st Qu.: 2.100   1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40  
##  Median : 3.207   Median : 5.000   Median :330.0   Median :19.05  
##  Mean   : 3.795   Mean   : 9.549   Mean   :408.2   Mean   :18.46  
##  3rd Qu.: 5.188   3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20  
##  Max.   :12.127   Max.   :24.000   Max.   :711.0   Max.   :22.00  
##      black            lstat            medv      
##  Min.   :  0.32   Min.   : 1.73   Min.   : 5.00  
##  1st Qu.:375.38   1st Qu.: 6.95   1st Qu.:17.02  
##  Median :391.44   Median :11.36   Median :21.20  
##  Mean   :356.67   Mean   :12.65   Mean   :22.53  
##  3rd Qu.:396.23   3rd Qu.:16.95   3rd Qu.:25.00  
##  Max.   :396.90   Max.   :37.97   Max.   :50.00
str(reg1)
## 'data.frame':    506 obs. of  15 variables:
##  $ X      : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : int  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ black  : num  397 397 393 395 397 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
#-------------------------------------------------------------------------------#

#outliers
#to identify the outliers, use box plot 
boxplot(reg1$crim)

#convert them into log to remove the outliers effect
boxplot(log(reg1$crim))

#since the outliers are removed if log is taken so log crim wil be considered for analysis

#---------------------------------------------------------------------------------------#
#outliers
#to identify the outliers, use box plot 
boxplot(reg1$zn)

#convert them into log to remove the outliers effect
boxplot(log(reg1$zn)) # since in ZN 73 % of the observations are not alloted any land so it should be converted into categories

#to identify the outliers, use box plot 
boxplot(reg1$indus)    # no outliers

#to identify the outliers, use box plot 
boxplot(reg1$chas) # categorical data

#to identify the outliers, use box plot 
boxplot(reg1$nox)  # no outliers

#to identify the outliers, use box plot 
boxplot(reg1$rm)

#convert them into log to remove the outliers effect
boxplot(log(reg1$rm))

#outliers removal using IQR range for rm
#upper limit for rm using IQR
bu_rm<-quantile(reg1$rm, 0.75, na.rm = TRUE)+1.5*IQR(reg1$rm, na.rm = TRUE)
#upper limit for rm using IQR
bl_rm<-quantile(reg1$rm, 0.25, na.rm = TRUE)-1.5*IQR(reg1$rm, na.rm = TRUE)
# gives the observations which are outliers
rm_u<-reg1$rm[reg1$rm>bu_rm]
length(rm_u)
## [1] 22
rm_l<-reg1$rm[reg1$rm<bl_rm]
length(rm_l)
## [1] 8
#outliers are to be replaced by the highest and lowest values, since the sample size decreases by removing them
reg1$rm<-ifelse(reg1$rm<=bl_rm,reg1$rm==bl_rm,reg1$rm)
#if lower limt is negative dont replace the outliers
reg1$rm<-ifelse(reg1$rm>=bu_rm, reg1$rm==bu_rm, reg1$rm)
#to check the outliers again after winsorising
boxplot(reg1$rm)

summary(reg1$rm)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   5.851   6.152   5.864   6.515   7.691
length(reg1$rm)
## [1] 506
#---------------------------------------------------------------------------------------#
#outliers
#to identify the outliers, use box plot 
boxplot(reg1$age) #no outliers

#to identify the outliers, use box plot 
boxplot(reg1$dis)

#convert them into log to remove the outliers effect
boxplot(log(reg1$dis)) # no outliers

#---------------------------------------------------------------#
#to identify the outliers, use box plot 
boxplot(reg1$rad) # no outliers

#---------------------------------------------------------------------------------------#

#outliers
#to identify the outliers, use box plot 
boxplot(reg1$tax)   # no outliers

#outliers
#to identify the outliers, use box plot 
boxplot(reg1$ptratio)  # no outliers

#outliers
#to identify the outliers, use box plot 
boxplot(reg1$black)

#convert them into log to remove the outliers effect
boxplot(log(reg1$black))

#outliers removal using IQR range for black
#upper limit for black using IQR
bu_black<-quantile(reg1$black, 0.75, na.rm = TRUE)+1.5*IQR(reg1$black, na.rm = TRUE)
#upper limit for black using IQR
bl_black<-quantile(reg1$black, 0.25, na.rm = TRUE)-1.5*IQR(reg1$black, na.rm = TRUE)
# gives the observations which are outliers
black_u<-reg1$black[reg1$black>bu_black]
length(black_u)
## [1] 0
black_l<-reg1$black[reg1$black<bl_black]
length(black_l)
## [1] 77
#outliers are to be replaced by the highest and lowest values, since the sample size decreases by removing them
reg1$black<-ifelse(reg1$black<=bl_black,reg1$black==bl_black,reg1$black)
#if lower limt is negative dont replace the outliers
reg1$black<-ifelse(reg1$black>=bu_black, reg1$black==bu_black, reg1$black)
#to check the outliers again after winsorising
boxplot(reg1$black)

summary(reg1$black)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0   375.4   391.4   329.6   396.2   396.9
length(reg1$black)
## [1] 506
#---------------------------------------------------------------------------------------#

#outliers
#to identify the outliers, use box plot 
boxplot(reg1$lstat)

#convert them into log to remove the outliers effect
boxplot(log(reg1$lstat))

#---------------------------------------------------------------------------------------#

#outliers
#to identify the outliers, use box plot 
boxplot(reg1$medv)

#convert them into log to remove the outliers effect
boxplot(log(reg1$medv))

#outliers removal using IQR range for medv
#upper limit for medv using IQR
bu_medv<-quantile(reg1$medv, 0.75, na.rm = TRUE)+1.5*IQR(reg1$medv, na.rm = TRUE)
#upper limit for medv using IQR
bl_medv<-quantile(reg1$medv, 0.25, na.rm = TRUE)-1.5*IQR(reg1$medv, na.rm = TRUE)
# gives the observations which are outliers
medv_u<-reg1$medv[reg1$medv>bu_medv]
length(medv_u)
## [1] 38
medv_l<-reg1$medv[reg1$medv<bl_medv]
length(medv_l)
## [1] 2
#outliers are to be replaced by the highest and lowest values, since the sample size decreases by removing them
reg1$medv<-ifelse(reg1$medv<=bl_medv,reg1$medv==bl_medv,reg1$medv)
#if lower limt is negative dont replace the outliers
reg1$medv<-ifelse(reg1$medv>=bu_medv, reg1$medv==bu_medv, reg1$medv)
#to check the outliers again after winsorising
boxplot(reg1$medv)

summary(reg1$medv)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   14.60   20.05   19.08   23.80   36.50
#Linear relation ship between Dependent and independent variables

#---------------------------------------#
#---------------------------------------#
#linear relation between medv and log(crim)
plot(reg1$medv, log(reg1$crim))

qplot(medv, log(crim), data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,log(reg1$crim))
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and log(reg1$crim)
## t = -10.6, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4956852 -0.3529316
## sample estimates:
##        cor 
## -0.4269649
#---------------------------------------#

#linear relation between medv and indus
plot(reg1$medv, reg1$indus)

qplot(medv, indus, data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,reg1$indus)
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and reg1$indus
## t = -8.3757, df = 504, p-value = 5.48e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4238029 -0.2706245
## sample estimates:
##        cor 
## -0.3495473
#---------------------------------------#

#linear relation between medv and indus
plot(reg1$medv, reg1$nox)

qplot(medv, nox, data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,reg1$nox)
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and reg1$nox
## t = -8.8922, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4412575 -0.2904067
## sample estimates:
##        cor 
## -0.3682533
#---------------------------------------#

#linear relation between medv and indus
plot(reg1$medv, reg1$rm)

qplot(medv, rm, data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,reg1$rm)
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and reg1$rm
## t = 13.259, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4409076 0.5704150
## sample estimates:
##       cor 
## 0.5085317
#---------------------------------------#

#linear relation between medv and indus
plot(reg1$medv, reg1$age)

qplot(medv, age, data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,reg1$age)
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and reg1$age
## t = -9.4416, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4593212 -0.3110159
## sample estimates:
##        cor 
## -0.3876744
#---------------------------------------#

#linear relation between medv and indus
plot(reg1$medv, log(reg1$dis))

qplot(medv, log(dis), data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,log(reg1$dis))
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and log(reg1$dis)
## t = 10.005, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3316809 0.4773115
## sample estimates:
##     cor 
## 0.40708
#---------------------------------------#

#linear relation between medv and indus
plot(reg1$medv, reg1$rad)

qplot(medv, rad, data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,reg1$rad)
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and reg1$rad
## t = -7.8726, df = 504, p-value = 2.143e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4063647 -0.2509892
## sample estimates:
##        cor 
## -0.3309179
#---------------------------------------#

#linear relation between medv and indus
plot(reg1$medv, reg1$tax)

qplot(medv, tax, data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,reg1$tax)
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and reg1$tax
## t = -8.4977, df = 504, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4279683 -0.2753336
## sample estimates:
##       cor 
## -0.354006
#---------------------------------------#

#linear relation between medv and indus
plot(reg1$medv, reg1$ptratio)

qplot(medv, ptratio, data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,reg1$ptratio)
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and reg1$ptratio
## t = -3.6457, df = 504, p-value = 0.0002944
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.24404986 -0.07415862
## sample estimates:
##        cor 
## -0.1602911
#---------------------------------------#

#linear relation between medv and indus
plot(reg1$medv, reg1$black)

qplot(medv, black, data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,reg1$black)
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and reg1$black
## t = 5.0064, df = 504, p-value = 7.68e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1330110 0.2991491
## sample estimates:
##       cor 
## 0.2176561
#---------------------------------------#

#linear relation between medv and indus
plot(reg1$medv, log(reg1$lstat))

qplot(medv, log(lstat), data = reg1, geom = "smooth", method="lm")

cor.test(reg1$medv,(reg1$lstat))
## 
##  Pearson's product-moment correlation
## 
## data:  reg1$medv and (reg1$lstat)
## t = -7.9568, df = 504, p-value = 1.174e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4093140 -0.2543011
## sample estimates:
##        cor 
## -0.3340645
#---------------------------------------#

#---------------------------------------#

set.seed(88)
split1 = sample.split(reg1, SplitRatio = 0.75)
summary(split1)
##    Mode   FALSE    TRUE    NA's 
## logical       4      11       0
length(reg1$rm)
## [1] 506
# create a sub sample for the same 

reg1train<-subset(reg1,split1==TRUE)
reg1Val<-subset(reg1,split1==FALSE)

summary(reg1train)
##        X              crim                zn              indus       
##  Min.   :  1.0   Min.   : 0.00632   Min.   :  0.000   Min.   : 0.460  
##  1st Qu.:127.5   1st Qu.: 0.08827   1st Qu.:  0.000   1st Qu.: 5.190  
##  Median :253.5   Median : 0.26888   Median :  0.000   Median : 9.795  
##  Mean   :253.0   Mean   : 3.42444   Mean   : 10.907   Mean   :11.257  
##  3rd Qu.:377.8   3rd Qu.: 3.75547   3rd Qu.:  9.375   3rd Qu.:18.100  
##  Max.   :506.0   Max.   :73.53410   Max.   :100.000   Max.   :27.740  
##       chas              nox               rm             age        
##  Min.   :0.00000   Min.   :0.3850   Min.   :0.000   Min.   :  2.90  
##  1st Qu.:0.00000   1st Qu.:0.4530   1st Qu.:5.852   1st Qu.: 45.65  
##  Median :0.00000   Median :0.5380   Median :6.162   Median : 78.60  
##  Mean   :0.07027   Mean   :0.5554   Mean   :5.896   Mean   : 68.75  
##  3rd Qu.:0.00000   3rd Qu.:0.6240   3rd Qu.:6.495   3rd Qu.: 94.08  
##  Max.   :1.00000   Max.   :0.8710   Max.   :7.691   Max.   :100.00  
##       dis              rad              tax           ptratio     
##  Min.   : 1.130   Min.   : 1.000   Min.   :187.0   Min.   :12.60  
##  1st Qu.: 2.113   1st Qu.: 4.000   1st Qu.:284.0   1st Qu.:16.93  
##  Median : 3.106   Median : 5.000   Median :330.0   Median :18.80  
##  Mean   : 3.795   Mean   : 9.576   Mean   :410.1   Mean   :18.41  
##  3rd Qu.: 5.227   3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20  
##  Max.   :12.127   Max.   :24.000   Max.   :711.0   Max.   :22.00  
##      black           lstat            medv      
##  Min.   :  0.0   Min.   : 1.73   Min.   : 0.00  
##  1st Qu.:374.8   1st Qu.: 7.15   1st Qu.:14.53  
##  Median :390.8   Median :11.57   Median :19.95  
##  Mean   :329.4   Mean   :12.68   Mean   :18.72  
##  3rd Qu.:395.6   3rd Qu.:16.95   3rd Qu.:23.40  
##  Max.   :396.9   Max.   :37.97   Max.   :36.40
summary(reg1Val)
##        X              crim                zn            indus       
##  Min.   :  5.0   Min.   : 0.01360   Min.   : 0.00   Min.   : 1.250  
##  1st Qu.:127.5   1st Qu.: 0.06894   1st Qu.: 0.00   1st Qu.: 4.928  
##  Median :255.0   Median : 0.21508   Median : 0.00   Median : 8.140  
##  Mean   :254.8   Mean   : 4.12793   Mean   :12.61   Mean   :10.811  
##  3rd Qu.:381.5   3rd Qu.: 3.48946   3rd Qu.:20.00   3rd Qu.:18.100  
##  Max.   :505.0   Max.   :88.97620   Max.   :95.00   Max.   :27.740  
##       chas              nox               rm             age        
##  Min.   :0.00000   Min.   :0.3980   Min.   :0.000   Min.   :  9.80  
##  1st Qu.:0.00000   1st Qu.:0.4470   1st Qu.:5.851   1st Qu.: 42.70  
##  Median :0.00000   Median :0.5380   Median :6.136   Median : 76.25  
##  Mean   :0.06618   Mean   :0.5527   Mean   :5.777   Mean   : 68.11  
##  3rd Qu.:0.00000   3rd Qu.:0.6240   3rd Qu.:6.555   3rd Qu.: 93.95  
##  Max.   :1.00000   Max.   :0.8710   Max.   :7.610   Max.   :100.00  
##       dis              rad              tax           ptratio     
##  Min.   : 1.322   Min.   : 1.000   Min.   :188.0   Min.   :13.00  
##  1st Qu.: 1.990   1st Qu.: 4.000   1st Qu.:276.0   1st Qu.:17.55  
##  Median : 3.367   Median : 5.000   Median :332.0   Median :19.10  
##  Mean   : 3.795   Mean   : 9.478   Mean   :403.2   Mean   :18.59  
##  3rd Qu.: 4.926   3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20  
##  Max.   :10.710   Max.   :24.000   Max.   :711.0   Max.   :22.00  
##      black           lstat             medv      
##  Min.   :  0.0   Min.   : 2.470   Min.   : 0.00  
##  1st Qu.:376.5   1st Qu.: 6.862   1st Qu.:14.97  
##  Median :393.4   Median :10.570   Median :20.45  
##  Mean   :329.9   Mean   :12.566   Mean   :20.07  
##  3rd Qu.:396.9   3rd Qu.:16.980   3rd Qu.:25.00  
##  Max.   :396.9   Max.   :36.980   Max.   :36.50
#fitting the model method 1


fit1 <-lm(medv ~log(crim)+indus+nox+rm+age+log(dis)+rad+tax+ptratio+black+log(lstat), data=reg1train)

summary(fit1)
## 
## Call:
## lm(formula = medv ~ log(crim) + indus + nox + rm + age + log(dis) + 
##     rad + tax + ptratio + black + log(lstat), data = reg1train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.2452  -2.2053   0.0314   2.8149  16.9850 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.408354   5.963202   0.404   0.6865    
## log(crim)   -0.801404   0.411658  -1.947   0.0523 .  
## indus        0.094067   0.094433   0.996   0.3199    
## nox          2.287610   6.262192   0.365   0.7151    
## rm           2.798462   0.225566  12.406   <2e-16 ***
## age         -0.041353   0.019953  -2.072   0.0389 *  
## log(dis)     1.745463   1.325149   1.317   0.1886    
## rad          0.029199   0.115455   0.253   0.8005    
## tax         -0.005255   0.005681  -0.925   0.3556    
## ptratio     -0.251536   0.189386  -1.328   0.1850    
## black        0.003424   0.002660   1.287   0.1989    
## log(lstat)   1.269495   0.769730   1.649   0.1000 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.219 on 358 degrees of freedom
## Multiple R-squared:  0.4548, Adjusted R-squared:  0.438 
## F-statistic: 27.14 on 11 and 358 DF,  p-value: < 2.2e-16
coefficients(fit1)  # model coefficients
##  (Intercept)    log(crim)        indus          nox           rm 
##  2.408354341 -0.801404389  0.094067269  2.287609679  2.798461581 
##          age     log(dis)          rad          tax      ptratio 
## -0.041353130  1.745463431  0.029198719 -0.005254872 -0.251536400 
##        black   log(lstat) 
##  0.003423819  1.269495276
confint(fit1, level=0.95) # CIs for model parameters 
##                     2.5 %       97.5 %
## (Intercept)  -9.318954121 14.135662802
## log(crim)    -1.610975917  0.008167139
## indus        -0.091644879  0.279779417
## nox         -10.027694552 14.602913909
## rm            2.354860564  3.242062598
## age          -0.080593880 -0.002112380
## log(dis)     -0.860590359  4.351517221
## rad          -0.197856235  0.256253672
## tax          -0.016426624  0.005916880
## ptratio      -0.623984316  0.120911516
## black        -0.001807556  0.008655194
## log(lstat)   -0.244265288  2.783255840
fit_val<-fitted(fit1) # predicted values
#residuals(fit1) # residuals
anova(fit1) # anova table 
## Analysis of Variance Table
## 
## Response: medv
##             Df  Sum Sq Mean Sq  F value  Pr(>F)    
## log(crim)    1  4204.3  4204.3 108.7122 < 2e-16 ***
## indus        1     0.2     0.2   0.0054 0.94169    
## nox          1    33.6    33.6   0.8701 0.35156    
## rm           1  6742.1  6742.1 174.3326 < 2e-16 ***
## age          1   230.2   230.2   5.9519 0.01519 *  
## log(dis)     1    73.7    73.7   1.9049 0.16839    
## rad          1    44.9    44.9   1.1622 0.28174    
## tax          1    34.3    34.3   0.8871 0.34689    
## ptratio      1    30.5    30.5   0.7890 0.37498    
## black        1    48.6    48.6   1.2579 0.26279    
## log(lstat)   1   105.2   105.2   2.7201 0.09997 .  
## Residuals  358 13845.1    38.7                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vcov(fit1) # covariance matrix for model parameters 
##               (Intercept)     log(crim)         indus           nox
## (Intercept)  35.559782676  0.6658641716  5.807653e-02 -27.322722681
## log(crim)     0.665864172  0.1694622051 -5.245035e-03  -0.621321043
## indus         0.058076533 -0.0052450347  8.917498e-03  -0.115437721
## nox         -27.322722681 -0.6213210426 -1.154377e-01  39.215042582
## rm           -0.023247740  0.0119588619 -2.068253e-04  -0.129412505
## age          -0.018808389 -0.0009654415  7.190623e-05  -0.018328464
## log(dis)     -4.315262214  0.0710888617  3.226833e-02   3.040987950
## rad           0.058624813 -0.0263517713  3.639519e-03   0.009319399
## tax          -0.004506153  0.0002782160 -2.256866e-04  -0.001953321
## ptratio      -0.633337821 -0.0019225713 -3.667932e-03   0.344010077
## black        -0.003325295  0.0001505703  6.761737e-06   0.001277735
## log(lstat)    0.074196263 -0.0409166999 -7.351305e-03  -0.534028756
##                        rm           age      log(dis)           rad
## (Intercept) -2.324774e-02 -1.880839e-02 -4.315262e+00  5.862481e-02
## log(crim)    1.195886e-02 -9.654415e-04  7.108886e-02 -2.635177e-02
## indus       -2.068253e-04  7.190623e-05  3.226833e-02  3.639519e-03
## nox         -1.294125e-01 -1.832846e-02  3.040988e+00  9.319399e-03
## rm           5.088004e-02  9.119014e-05 -4.479403e-02 -6.515114e-04
## age          9.119014e-05  3.981411e-04  1.061157e-02  1.819075e-04
## log(dis)    -4.479403e-02  1.061157e-02  1.756019e+00 -6.488587e-03
## rad         -6.515114e-04  1.819075e-04 -6.488587e-03  1.332982e-02
## tax         -6.259735e-05  2.847461e-06  7.319694e-05 -4.916486e-04
## ptratio     -4.366831e-03 -2.968289e-04  1.143656e-03 -3.735353e-03
## black        3.102445e-05 -2.677091e-06  1.777077e-04 -1.388101e-05
## log(lstat)  -1.867652e-02 -4.065278e-03 -1.196343e-01  9.055132e-03
##                       tax       ptratio         black    log(lstat)
## (Intercept) -4.506153e-03 -6.333378e-01 -3.325295e-03  0.0741962628
## log(crim)    2.782160e-04 -1.922571e-03  1.505703e-04 -0.0409166999
## indus       -2.256866e-04 -3.667932e-03  6.761737e-06 -0.0073513053
## nox         -1.953321e-03  3.440101e-01  1.277735e-03 -0.5340287563
## rm          -6.259735e-05 -4.366831e-03  3.102445e-05 -0.0186765214
## age          2.847461e-06 -2.968289e-04 -2.677091e-06 -0.0040652784
## log(dis)     7.319694e-05  1.143656e-03  1.777077e-04 -0.1196343040
## rad         -4.916486e-04 -3.735353e-03 -1.388101e-05  0.0090551321
## tax          3.227045e-05  3.198650e-06  1.163035e-06 -0.0002267425
## ptratio      3.198650e-06  3.586687e-02 -4.002219e-05 -0.0360927462
## black        1.163035e-06 -4.002219e-05  7.076101e-06  0.0002130596
## log(lstat)  -2.267425e-04 -3.609275e-02  2.130596e-04  0.5924842022
#influence(fit1) # regression diagnostics

# diagnostic plots 
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page 
plot(fit1)

#k fold analysis is used for model building method 2

#cv.lm(reg1, fit1, m=3) # k(3) fold cross-validation for simple linear regression


CVlm(reg1train, fit1, m=3, plotit= "Residual")
## Analysis of Variance Table
## 
## Response: medv
##             Df Sum Sq Mean Sq F value Pr(>F)    
## log(crim)    1   4204    4204  108.71 <2e-16 ***
## indus        1      0       0    0.01  0.942    
## nox          1     34      34    0.87  0.352    
## rm           1   6742    6742  174.33 <2e-16 ***
## age          1    230     230    5.95  0.015 *  
## log(dis)     1     74      74    1.90  0.168    
## rad          1     45      45    1.16  0.282    
## tax          1     34      34    0.89  0.347    
## ptratio      1     31      31    0.79  0.375    
## black        1     49      49    1.26  0.263    
## log(lstat)   1    105     105    2.72  0.100 .  
## Residuals  358  13845      39                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in CVlm(reg1train, fit1, m = 3, plotit = "Residual"): 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate
## 
## fold 1 
## Observations in test set: 123 
##                 4    12     15    17   19    22    27    29   30    31
## Predicted   24.18 21.97 18.419 19.32 17.0 17.86 17.88 19.28 19.7 17.33
## cvpred      27.53 22.99 18.857 18.96 16.2 17.52 17.61 19.67 20.0 16.03
## medv        33.40 18.90 18.200 23.10 20.2 19.60 16.60 18.40 21.0 12.70
## CV residual  5.87 -4.09 -0.657  4.14  4.0  2.08 -1.01 -1.27  1.0 -3.33
##                 37    39     44     59    62    63     72      74     75
## Predicted   20.142 21.44 23.971 22.570 19.99 21.97 22.592 23.3458 23.615
## cvpred      19.865 20.59 24.034 23.655 21.06 24.31 22.224 23.3361 23.501
## medv        20.000 24.70 24.700 23.300 16.00 22.20 21.700 23.4000 24.100
## CV residual  0.135  4.11  0.666 -0.355 -5.06 -2.11 -0.524  0.0639  0.599
##                78     84    90      92   93     94    97    99   102   105
## Predicted   21.95 22.544 23.27 21.4605 23.6 23.978 20.49  3.41 20.81 18.47
## cvpred      21.96 23.401 24.73 22.0634 25.6 25.786 19.97  1.34 21.33 17.96
## medv        20.80 22.900 28.70 22.0000 22.9 25.000 21.40  0.00 26.50 20.10
## CV residual -1.16 -0.501  3.97 -0.0634 -2.7 -0.786  1.43 -1.34  5.17  2.14
##               106   107   112    114   118   119   122   123   132  138
## Predicted   17.47 17.67 21.30 18.933 19.05 18.12 21.89 21.40 17.88 19.0
## cvpred      16.23 16.02 21.63 17.995 18.97 17.72 24.21 23.34 18.29 19.2
## medv        19.50 19.50 22.80 18.700 19.20 20.40 20.30 20.50 19.60 17.1
## CV residual  3.27  3.48  1.17  0.705  0.23  2.68 -3.91 -2.84  1.31 -2.1
##               147   148   149     153   159   162    163  168   179   183
## Predicted   15.87 16.01 16.74 14.8634 17.13  20.0 -1.175 17.4 22.33  21.6
## cvpred      16.17 13.59 14.63 15.3929 19.21  25.4  0.336 18.6 23.71  24.4
## medv        15.60 14.60 17.80 15.3000 24.30   0.0  0.000 23.8 29.90   0.0
## CV residual -0.57  1.01  3.17 -0.0929  5.09 -25.4 -0.336  5.2  6.19 -24.4
##               189   191   192   195   196   197   208   210   214   223
## Predicted   22.66  25.1 24.25 25.77  6.43 26.81 20.36 17.70 23.14 21.13
## cvpred      23.25  25.9 25.43 27.38  5.91 28.99 19.99 17.08 23.36 22.01
## medv        29.80   0.0 30.50 29.10  0.00 33.30 22.50 20.00 28.10 27.50
## CV residual  6.55 -25.9  5.07  1.72 -5.91  4.31  2.51  2.92  4.74  5.49
##                226    227   239   240   242   255   257   266   267    270
## Predicted    0.578  0.396 24.48 23.96 22.21 23.71  27.2 18.45 22.13 21.351
## cvpred      -0.375  0.547 25.37 25.22 22.72 24.41  30.9 17.44 21.83 21.227
## medv         0.000  0.000 23.70 23.30 20.10 21.90   0.0 22.80 30.70 20.700
## CV residual  0.375 -0.547 -1.67 -1.92 -2.62 -2.51 -30.9  5.36  8.87 -0.527
##               272   281   283  287   288   292    297   298    309   315
## Predicted   22.96  4.71  26.1 24.0 24.31  24.5 24.853 22.75 19.571 20.46
## cvpred      23.28  4.91  30.4 24.6 25.59  27.0 27.417 23.35 22.088 21.72
## medv        25.20  0.00   0.0 20.1 23.20   0.0 27.100 20.30 22.800 23.80
## CV residual  1.92 -4.91 -30.4 -4.5 -2.39 -27.0 -0.317 -3.05  0.712  2.08
##               316   317    318   322    324   328   331   332   337   339
## Predicted   19.18 19.95 20.125 21.35 19.023 21.84 22.87 23.20 22.32 22.93
## cvpred      19.45 19.47 19.603 22.48 19.196 21.34 21.89 22.18 22.83 23.49
## medv        16.20 17.80 19.800 23.10 18.500 22.20 19.80 17.10 19.50 20.60
## CV residual -3.25 -1.67  0.197  0.62 -0.696  0.86 -2.09 -5.08 -3.33 -2.89
##              341   345    352    354   363   374   375   388    390   401
## Predicted   21.4 25.04 23.387 26.601 13.52 11.57 -2.51 12.11 13.314 14.19
## cvpred      22.2 26.46 24.218 30.201 13.85  8.27 -8.65  8.87 11.813 12.17
## medv        18.7 31.20 24.100 30.100 20.80 13.80 13.80  7.40 11.500  5.60
## CV residual -3.5  4.74 -0.118 -0.101  6.95  5.53 22.45 -1.47 -0.313 -6.57
##               403   404   407  412   417    418   420  421   427    444
## Predicted   15.76 12.32 -3.27 14.6 16.27 11.372 16.81 14.3 14.13 16.337
## cvpred      14.83 10.58 -8.29 14.5 15.78  9.712 16.16 15.0 13.47 15.756
## medv        12.10  8.30 11.90 17.2  7.50 10.400  8.40 16.7 10.20 15.400
## CV residual -2.73 -2.28 20.19  2.7 -8.28  0.688 -7.76  1.7 -3.27 -0.356
##               449   454   457    462   466   468   469     477   481   483
## Predicted   15.76 19.18 15.11 17.396 16.25 14.78 15.81 17.1720 17.35 18.59
## cvpred      15.23 19.62 15.33 17.479 16.19 15.02 14.68 16.7968 17.67 20.58
## medv        14.10 17.80 12.70 17.700 19.90 19.10 19.10 16.7000 23.00 25.00
## CV residual -1.13 -1.82 -2.63  0.221  3.71  4.08  4.42 -0.0968  5.33  4.42
##               484    489   497    498
## Predicted   17.86 16.753 18.04 18.870
## cvpred      17.68 14.881 15.83 17.714
## medv        21.80 15.200 19.70 18.300
## CV residual  4.12  0.319  3.87  0.586
## 
## Sum of squares = 6721    Mean square = 54.6    n = 123 
## 
## fold 2 
## Observations in test set: 124 
##                 1     9    14    16     18    24    33    42    46    47
## Predicted   24.10 20.69 18.83 18.68 18.468 17.37 17.26 25.29 21.51 22.16
## cvpred      25.62 20.14 18.37 18.39 18.275 17.22 16.45 24.26 21.02 21.88
## medv        24.00 16.50 20.40 19.90 17.500 14.50 13.20 26.60 19.30 20.00
## CV residual -1.62 -3.64  2.03  1.51 -0.775 -2.72 -3.25  2.34 -1.72 -1.88
##                52     54     57    58   61     77    82   86     91    108
## Predicted   23.13 24.276 24.433 26.07 20.6 21.163 22.61 22.8 21.763 18.524
## cvpred      22.41 23.864 24.223 25.05 20.1 20.964 22.36 22.8 22.942 20.507
## medv        20.50 23.400 24.700 31.60 18.7 20.000 23.90 26.6 22.600 20.400
## CV residual -1.91 -0.464  0.477  6.55 -1.4 -0.964  1.54  3.8 -0.342 -0.107
##                109    117   120    121   127   129    131   133    135
## Predicted   19.111 20.198 19.35 22.241 19.34 19.06 19.109 18.44 15.552
## cvpred      20.597 21.455 20.79 22.441 19.17 20.26 19.852 18.61 14.967
## medv        19.800 21.200 19.30 22.000 15.70 18.00 19.200 23.00 15.600
## CV residual -0.797 -0.255 -1.49 -0.441 -3.47 -2.26 -0.652  4.39  0.633
##               136   139   142   146  161     164   165  166    167   172
## Predicted   18.77 17.94 14.39 17.73 16.6 -0.0541 17.92 16.7 -0.398 17.59
## cvpred      19.51 19.81 15.94 18.19 14.2 -1.2597 16.34 13.9 -1.658 15.87
## medv        18.10 13.30 14.40 13.80 27.0  0.0000 22.70 25.0  0.000 19.10
## CV residual -1.41 -6.51 -1.54 -4.39 12.8  1.2597  6.36 11.1  1.658  3.23
##              176   180   181   187   198    206   211   213    217    219
## Predicted   22.9  23.0  2.77  3.71 26.89 22.449 20.10 20.88 22.811 20.957
## cvpred      23.7  23.5  4.56  5.22 25.83 22.794 19.94 21.06 23.627 21.198
## medv        29.4   0.0  0.00  0.00 30.30 22.600 21.70 22.40 23.300 21.500
## CV residual  5.7 -23.5 -4.56 -5.22  4.47 -0.194  1.76  1.34 -0.327  0.302
##               222  224  228  232   234   236   237  238    243    244
## Predicted   19.78 20.0 21.5 22.2  1.67 20.35 20.99 22.4 23.453 24.271
## cvpred      20.02 19.0 20.5 20.8  1.62 20.14 19.96 20.7 22.486 23.015
## medv        21.70 30.1 31.6 31.7  0.00 24.00 25.10 31.5 22.200 23.700
## CV residual  1.68 11.1 11.1 10.9 -1.62  3.86  5.14 10.8 -0.286  0.685
##               249   253   256   262    271   273  274   277  285   286
## Predicted   22.77 25.54 24.28  22.8 21.160 22.30 25.3 24.86 28.5 25.97
## cvpred      21.68 24.21 23.91  22.5 20.801 21.96 24.1 23.89 28.7 26.19
## medv        24.50 29.60 20.90   0.0 21.100 24.40 35.2 33.20 32.2 22.00
## CV residual  2.82  5.39 -3.01 -22.5  0.299  2.44 11.1  9.31  3.5 -4.19
##                301   304   312   327   330   333   334   342    344   346
## Predicted   25.064 25.37 18.94 21.98 23.24 23.93 23.24 26.82 23.904 22.89
## cvpred      23.958 24.34 18.48 21.23 24.01 23.88 23.27 26.68 24.567 23.28
## medv        24.800 33.10 22.10 23.00 22.60 19.40 22.20 32.70 23.900 17.50
## CV residual  0.842  8.76  3.62  1.77 -1.41 -4.48 -1.07  6.02 -0.667 -5.78
##               348   349   358    364  367   369   371   373   377   379
## Predicted   25.26 25.98 17.27 15.138 11.3  9.46  14.8  13.0 16.08 15.05
## cvpred      25.34 25.94 17.93 16.305 11.6  9.10  14.1  13.5 16.69 15.47
## medv        23.10 24.50 21.70 16.800 21.9  0.00   0.0   0.0 13.90 13.10
## CV residual -2.24 -1.44  3.77  0.495 10.3 -9.10 -14.1 -13.5 -2.79 -2.37
##               382  389    391  392   399   405  408   414    422   423
## Predicted   15.73 11.6 14.353 16.4  12.4 11.81 10.9  9.82 13.771 12.35
## cvpred      16.07 12.7 14.959 17.1  12.9 11.23 10.4  8.55 13.462 11.11
## medv        10.90 10.2 15.100 23.2   0.0  8.50 27.9 16.30 14.200 20.80
## CV residual -5.17 -2.5  0.141  6.1 -12.9 -2.73 17.5  7.75  0.738  9.69
##                424   429   433   434   435   436   438      447  450   453
## Predicted   14.855 15.33 15.49 15.86 14.04 15.94 14.12 15.13490 15.3 16.82
## cvpred      14.239 15.14 14.11 15.35 12.97 15.52 13.82 14.89402 14.7 17.29
## medv        13.400 11.00 16.10 14.30 11.70 13.40  8.70 14.90000 13.0 16.10
## CV residual -0.839 -4.14  1.99 -1.05 -1.27 -2.12 -5.12  0.00598 -1.7 -1.19
##                461   463   464   472    476   478    479   482   487   491
## Predicted   16.852 17.05 17.07 16.58 14.717 12.86 15.406 17.89 16.97 14.55
## cvpred      16.184 17.08 16.75 15.49 13.725 12.65 15.014 16.12 16.03 16.09
## medv        16.400 19.50 20.20 19.60 13.300 12.00 14.600 23.70 19.10  8.10
## CV residual  0.216  2.42  3.45  4.11 -0.425 -0.65 -0.414  7.58  3.07 -7.99
##               492    493     496   504   506
## Predicted   18.29 18.737 20.7878 21.06 19.71
## cvpred      20.54 20.686 23.0461 22.15 21.28
## medv        13.60 20.100 23.1000 23.90 11.90
## CV residual -6.94 -0.586  0.0539  1.75 -9.38
## 
## Sum of squares = 4418    Mean square = 35.6    n = 124 
## 
## fold 3 
## Observations in test set: 123 
##                   2    3      7    11    13   26    28    32    34   41
## Predicted   23.0129 24.8 22.599 22.58 23.59 16.1 17.14 17.28 16.78 24.7
## cvpred      21.6136 22.7 22.396 23.15 23.98 17.2 18.23 18.03 17.78 22.8
## medv        21.6000 34.7 22.900 15.00 21.70 13.9 14.80 14.50 13.10 34.9
## CV residual -0.0136 12.0  0.504 -8.15 -2.28 -3.3 -3.43 -3.53 -4.68 12.1
##                43    45    48    49    56    60     64    67    69     71
## Predicted   23.59 22.70 21.07 19.51 27.88 21.73 24.336 22.96 21.09 24.324
## cvpred      23.98 23.05 21.89 20.95 26.31 21.63 24.352 23.44 22.46 24.705
## medv        25.30 21.20 16.60 14.40 35.40 19.60 25.000 19.40 17.40 24.200
## CV residual  1.32 -1.85 -5.29 -6.55  9.09 -2.03  0.648 -4.04 -5.06 -0.505
##                 73   76    79     87    88    89   101   103   104    116
## Predicted   23.034 22.3 22.82 22.528 21.26 22.02 20.30 17.59 18.47 18.640
## cvpred      23.362 22.5 22.99 22.159 20.37 19.83 19.07 16.89 17.85 18.027
## medv        22.800 21.4 21.20 22.500 22.20 23.60 27.50 18.60 19.30 18.300
## CV residual -0.562 -1.1 -1.79  0.341  1.83  3.77  8.43  1.71  1.45  0.273
##               124    134   137   144   150   151   152   154   157   169
## Predicted   20.85 17.969 18.12 16.73 17.34 18.75 15.33 16.21 15.22 17.36
## cvpred      20.17 17.761 17.72 16.82 17.11 17.69 14.45 15.66 15.04 18.02
## medv        17.30 18.400 17.40 15.60 15.40 21.50 19.60 19.40 13.10 23.80
## CV residual -2.87  0.639 -0.32 -1.22 -1.71  3.81  5.15  3.74 -1.94  5.78
##               174   177   178  182  184  193   194   199    202   204
## Predicted   20.61 21.89 21.13 21.0 20.0 24.9 26.59 27.25 23.535  6.06
## cvpred      18.82 20.93 19.09 19.3 17.7 24.2 25.93 27.03 23.458  6.63
## medv        23.60 23.20 24.60 36.2 32.5 36.4 31.10 34.60 24.100  0.00
## CV residual  4.78  2.27  5.51 16.9 14.8 12.2  5.17  7.57  0.642 -6.63
##               207  209     212   221   225   229   241   247     251   252
## Predicted   22.18 21.9 18.4029 21.07  1.05  25.3 24.72 21.92 23.9043 22.90
## cvpred      22.43 22.0 19.3256 19.93  1.39  24.1 24.85 23.25 24.3101 23.23
## medv        24.40 24.4 19.3000 26.70  0.00   0.0 22.00 24.30 24.4000 24.80
## CV residual  1.97  2.4 -0.0256  6.77 -1.39 -24.1 -2.85  1.05  0.0899  1.57
##               254    258  259  264   268   269   279  282   284   289
## Predicted    4.84  0.986 21.5 22.2  2.66  23.4 23.57 25.8  7.26 23.96
## cvpred       7.73  0.829 19.4 20.7  3.73  21.8 22.94 24.1  7.64 23.69
## medv         0.00  0.000 36.0 31.0  0.00   0.0 29.10 35.4  0.00 22.30
## CV residual -7.73 -0.829 16.6 10.3 -3.73 -21.8  6.16 11.3 -7.64 -1.39
##               294   296   299   300   302  303  307   311  313   314   319
## Predicted   24.74 25.14 24.08 26.52 25.03 24.7 23.7 16.09 19.0 19.67 20.81
## cvpred      25.32 25.52 24.32 26.52 24.33 24.9 21.1 17.85 18.3 18.86 20.74
## medv        23.90 28.60 22.50 29.00 22.00 26.4 33.4 16.10 19.4 21.60 23.10
## CV residual -1.42  3.08 -1.82  2.48 -2.33  1.5 12.3 -1.75  1.1  2.74  2.36
##               326   329   343   347  356   357   359   360   361   362
## Predicted   23.01 22.01 23.65 22.02 21.5 15.84 16.57 16.74 16.55 16.66
## cvpred      22.92 22.43 22.86 22.72 22.4 15.55 16.02 16.08 15.35 15.78
## medv        24.60 19.30 16.50 17.20 20.6 17.80 22.70 22.60 25.00 19.90
## CV residual  1.68 -3.13 -6.36 -5.52 -1.8  2.25  6.68  6.52  9.65  4.12
##               372   376   378   384   386     387   393   394   397   402
## Predicted    13.5 16.93 16.63 13.65 12.61 -2.5921 12.43 15.42 16.38 15.28
## cvpred       12.7 16.52 16.34 13.72 13.39  0.0085 13.14 15.22 15.95 15.38
## medv          0.0 15.00 13.30 12.30  7.20 10.5000  9.70 13.80 12.50  7.20
## CV residual -12.7 -1.52 -3.04 -1.42 -6.19 10.4915 -3.44 -1.42 -3.45 -8.18
##               406   409  416   419   431   432   437   439   442   446
## Predicted    12.1 12.47 14.7 11.75 14.96 16.01 14.89 14.32 16.37 15.41
## cvpred       13.5 13.14 15.6 13.44 15.74 16.77 15.12 15.18 16.28 15.66
## medv          0.0 17.20  7.2  8.80 14.50 14.10  9.60  8.40 17.10 11.80
## CV residual -13.5  4.06 -8.4 -4.64 -1.24 -2.67 -5.52 -6.78  0.82 -3.86
##               448   451   452   459   465   467  474  480   494   495
## Predicted   15.84 16.49 17.43 15.72 17.28 15.27 19.2 15.0 19.12 19.98
## cvpred      15.75 16.41 17.08 16.12 17.99 15.66 19.2 15.6 18.33 19.69
## medv        12.60 13.40 15.20 14.90 21.40 19.00 29.8 21.4 21.80 24.50
## CV residual -3.15 -3.01 -1.88 -1.22  3.41  3.34 10.6  5.8  3.47  4.81
##               499   502
## Predicted   19.38 21.77
## cvpred      18.66 19.94
## medv        21.20 22.40
## CV residual  2.54  2.46
## 
## Sum of squares = 5008    Mean square = 40.7    n = 123 
## 
## Overall (Sum over all 123 folds) 
##   ms 
## 43.6
CVlm(reg1train, fit1, m=3, plotit= "Observed")
## Analysis of Variance Table
## 
## Response: medv
##             Df Sum Sq Mean Sq F value Pr(>F)    
## log(crim)    1   4204    4204  108.71 <2e-16 ***
## indus        1      0       0    0.01  0.942    
## nox          1     34      34    0.87  0.352    
## rm           1   6742    6742  174.33 <2e-16 ***
## age          1    230     230    5.95  0.015 *  
## log(dis)     1     74      74    1.90  0.168    
## rad          1     45      45    1.16  0.282    
## tax          1     34      34    0.89  0.347    
## ptratio      1     31      31    0.79  0.375    
## black        1     49      49    1.26  0.263    
## log(lstat)   1    105     105    2.72  0.100 .  
## Residuals  358  13845      39                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in CVlm(reg1train, fit1, m = 3, plotit = "Observed"): 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate
## 
## fold 1 
## Observations in test set: 123 
##                 4    12     15    17   19    22    27    29   30    31
## Predicted   24.18 21.97 18.419 19.32 17.0 17.86 17.88 19.28 19.7 17.33
## cvpred      27.53 22.99 18.857 18.96 16.2 17.52 17.61 19.67 20.0 16.03
## medv        33.40 18.90 18.200 23.10 20.2 19.60 16.60 18.40 21.0 12.70
## CV residual  5.87 -4.09 -0.657  4.14  4.0  2.08 -1.01 -1.27  1.0 -3.33
##                 37    39     44     59    62    63     72      74     75
## Predicted   20.142 21.44 23.971 22.570 19.99 21.97 22.592 23.3458 23.615
## cvpred      19.865 20.59 24.034 23.655 21.06 24.31 22.224 23.3361 23.501
## medv        20.000 24.70 24.700 23.300 16.00 22.20 21.700 23.4000 24.100
## CV residual  0.135  4.11  0.666 -0.355 -5.06 -2.11 -0.524  0.0639  0.599
##                78     84    90      92   93     94    97    99   102   105
## Predicted   21.95 22.544 23.27 21.4605 23.6 23.978 20.49  3.41 20.81 18.47
## cvpred      21.96 23.401 24.73 22.0634 25.6 25.786 19.97  1.34 21.33 17.96
## medv        20.80 22.900 28.70 22.0000 22.9 25.000 21.40  0.00 26.50 20.10
## CV residual -1.16 -0.501  3.97 -0.0634 -2.7 -0.786  1.43 -1.34  5.17  2.14
##               106   107   112    114   118   119   122   123   132  138
## Predicted   17.47 17.67 21.30 18.933 19.05 18.12 21.89 21.40 17.88 19.0
## cvpred      16.23 16.02 21.63 17.995 18.97 17.72 24.21 23.34 18.29 19.2
## medv        19.50 19.50 22.80 18.700 19.20 20.40 20.30 20.50 19.60 17.1
## CV residual  3.27  3.48  1.17  0.705  0.23  2.68 -3.91 -2.84  1.31 -2.1
##               147   148   149     153   159   162    163  168   179   183
## Predicted   15.87 16.01 16.74 14.8634 17.13  20.0 -1.175 17.4 22.33  21.6
## cvpred      16.17 13.59 14.63 15.3929 19.21  25.4  0.336 18.6 23.71  24.4
## medv        15.60 14.60 17.80 15.3000 24.30   0.0  0.000 23.8 29.90   0.0
## CV residual -0.57  1.01  3.17 -0.0929  5.09 -25.4 -0.336  5.2  6.19 -24.4
##               189   191   192   195   196   197   208   210   214   223
## Predicted   22.66  25.1 24.25 25.77  6.43 26.81 20.36 17.70 23.14 21.13
## cvpred      23.25  25.9 25.43 27.38  5.91 28.99 19.99 17.08 23.36 22.01
## medv        29.80   0.0 30.50 29.10  0.00 33.30 22.50 20.00 28.10 27.50
## CV residual  6.55 -25.9  5.07  1.72 -5.91  4.31  2.51  2.92  4.74  5.49
##                226    227   239   240   242   255   257   266   267    270
## Predicted    0.578  0.396 24.48 23.96 22.21 23.71  27.2 18.45 22.13 21.351
## cvpred      -0.375  0.547 25.37 25.22 22.72 24.41  30.9 17.44 21.83 21.227
## medv         0.000  0.000 23.70 23.30 20.10 21.90   0.0 22.80 30.70 20.700
## CV residual  0.375 -0.547 -1.67 -1.92 -2.62 -2.51 -30.9  5.36  8.87 -0.527
##               272   281   283  287   288   292    297   298    309   315
## Predicted   22.96  4.71  26.1 24.0 24.31  24.5 24.853 22.75 19.571 20.46
## cvpred      23.28  4.91  30.4 24.6 25.59  27.0 27.417 23.35 22.088 21.72
## medv        25.20  0.00   0.0 20.1 23.20   0.0 27.100 20.30 22.800 23.80
## CV residual  1.92 -4.91 -30.4 -4.5 -2.39 -27.0 -0.317 -3.05  0.712  2.08
##               316   317    318   322    324   328   331   332   337   339
## Predicted   19.18 19.95 20.125 21.35 19.023 21.84 22.87 23.20 22.32 22.93
## cvpred      19.45 19.47 19.603 22.48 19.196 21.34 21.89 22.18 22.83 23.49
## medv        16.20 17.80 19.800 23.10 18.500 22.20 19.80 17.10 19.50 20.60
## CV residual -3.25 -1.67  0.197  0.62 -0.696  0.86 -2.09 -5.08 -3.33 -2.89
##              341   345    352    354   363   374   375   388    390   401
## Predicted   21.4 25.04 23.387 26.601 13.52 11.57 -2.51 12.11 13.314 14.19
## cvpred      22.2 26.46 24.218 30.201 13.85  8.27 -8.65  8.87 11.813 12.17
## medv        18.7 31.20 24.100 30.100 20.80 13.80 13.80  7.40 11.500  5.60
## CV residual -3.5  4.74 -0.118 -0.101  6.95  5.53 22.45 -1.47 -0.313 -6.57
##               403   404   407  412   417    418   420  421   427    444
## Predicted   15.76 12.32 -3.27 14.6 16.27 11.372 16.81 14.3 14.13 16.337
## cvpred      14.83 10.58 -8.29 14.5 15.78  9.712 16.16 15.0 13.47 15.756
## medv        12.10  8.30 11.90 17.2  7.50 10.400  8.40 16.7 10.20 15.400
## CV residual -2.73 -2.28 20.19  2.7 -8.28  0.688 -7.76  1.7 -3.27 -0.356
##               449   454   457    462   466   468   469     477   481   483
## Predicted   15.76 19.18 15.11 17.396 16.25 14.78 15.81 17.1720 17.35 18.59
## cvpred      15.23 19.62 15.33 17.479 16.19 15.02 14.68 16.7968 17.67 20.58
## medv        14.10 17.80 12.70 17.700 19.90 19.10 19.10 16.7000 23.00 25.00
## CV residual -1.13 -1.82 -2.63  0.221  3.71  4.08  4.42 -0.0968  5.33  4.42
##               484    489   497    498
## Predicted   17.86 16.753 18.04 18.870
## cvpred      17.68 14.881 15.83 17.714
## medv        21.80 15.200 19.70 18.300
## CV residual  4.12  0.319  3.87  0.586
## 
## Sum of squares = 6721    Mean square = 54.6    n = 123 
## 
## fold 2 
## Observations in test set: 124 
##                 1     9    14    16     18    24    33    42    46    47
## Predicted   24.10 20.69 18.83 18.68 18.468 17.37 17.26 25.29 21.51 22.16
## cvpred      25.62 20.14 18.37 18.39 18.275 17.22 16.45 24.26 21.02 21.88
## medv        24.00 16.50 20.40 19.90 17.500 14.50 13.20 26.60 19.30 20.00
## CV residual -1.62 -3.64  2.03  1.51 -0.775 -2.72 -3.25  2.34 -1.72 -1.88
##                52     54     57    58   61     77    82   86     91    108
## Predicted   23.13 24.276 24.433 26.07 20.6 21.163 22.61 22.8 21.763 18.524
## cvpred      22.41 23.864 24.223 25.05 20.1 20.964 22.36 22.8 22.942 20.507
## medv        20.50 23.400 24.700 31.60 18.7 20.000 23.90 26.6 22.600 20.400
## CV residual -1.91 -0.464  0.477  6.55 -1.4 -0.964  1.54  3.8 -0.342 -0.107
##                109    117   120    121   127   129    131   133    135
## Predicted   19.111 20.198 19.35 22.241 19.34 19.06 19.109 18.44 15.552
## cvpred      20.597 21.455 20.79 22.441 19.17 20.26 19.852 18.61 14.967
## medv        19.800 21.200 19.30 22.000 15.70 18.00 19.200 23.00 15.600
## CV residual -0.797 -0.255 -1.49 -0.441 -3.47 -2.26 -0.652  4.39  0.633
##               136   139   142   146  161     164   165  166    167   172
## Predicted   18.77 17.94 14.39 17.73 16.6 -0.0541 17.92 16.7 -0.398 17.59
## cvpred      19.51 19.81 15.94 18.19 14.2 -1.2597 16.34 13.9 -1.658 15.87
## medv        18.10 13.30 14.40 13.80 27.0  0.0000 22.70 25.0  0.000 19.10
## CV residual -1.41 -6.51 -1.54 -4.39 12.8  1.2597  6.36 11.1  1.658  3.23
##              176   180   181   187   198    206   211   213    217    219
## Predicted   22.9  23.0  2.77  3.71 26.89 22.449 20.10 20.88 22.811 20.957
## cvpred      23.7  23.5  4.56  5.22 25.83 22.794 19.94 21.06 23.627 21.198
## medv        29.4   0.0  0.00  0.00 30.30 22.600 21.70 22.40 23.300 21.500
## CV residual  5.7 -23.5 -4.56 -5.22  4.47 -0.194  1.76  1.34 -0.327  0.302
##               222  224  228  232   234   236   237  238    243    244
## Predicted   19.78 20.0 21.5 22.2  1.67 20.35 20.99 22.4 23.453 24.271
## cvpred      20.02 19.0 20.5 20.8  1.62 20.14 19.96 20.7 22.486 23.015
## medv        21.70 30.1 31.6 31.7  0.00 24.00 25.10 31.5 22.200 23.700
## CV residual  1.68 11.1 11.1 10.9 -1.62  3.86  5.14 10.8 -0.286  0.685
##               249   253   256   262    271   273  274   277  285   286
## Predicted   22.77 25.54 24.28  22.8 21.160 22.30 25.3 24.86 28.5 25.97
## cvpred      21.68 24.21 23.91  22.5 20.801 21.96 24.1 23.89 28.7 26.19
## medv        24.50 29.60 20.90   0.0 21.100 24.40 35.2 33.20 32.2 22.00
## CV residual  2.82  5.39 -3.01 -22.5  0.299  2.44 11.1  9.31  3.5 -4.19
##                301   304   312   327   330   333   334   342    344   346
## Predicted   25.064 25.37 18.94 21.98 23.24 23.93 23.24 26.82 23.904 22.89
## cvpred      23.958 24.34 18.48 21.23 24.01 23.88 23.27 26.68 24.567 23.28
## medv        24.800 33.10 22.10 23.00 22.60 19.40 22.20 32.70 23.900 17.50
## CV residual  0.842  8.76  3.62  1.77 -1.41 -4.48 -1.07  6.02 -0.667 -5.78
##               348   349   358    364  367   369   371   373   377   379
## Predicted   25.26 25.98 17.27 15.138 11.3  9.46  14.8  13.0 16.08 15.05
## cvpred      25.34 25.94 17.93 16.305 11.6  9.10  14.1  13.5 16.69 15.47
## medv        23.10 24.50 21.70 16.800 21.9  0.00   0.0   0.0 13.90 13.10
## CV residual -2.24 -1.44  3.77  0.495 10.3 -9.10 -14.1 -13.5 -2.79 -2.37
##               382  389    391  392   399   405  408   414    422   423
## Predicted   15.73 11.6 14.353 16.4  12.4 11.81 10.9  9.82 13.771 12.35
## cvpred      16.07 12.7 14.959 17.1  12.9 11.23 10.4  8.55 13.462 11.11
## medv        10.90 10.2 15.100 23.2   0.0  8.50 27.9 16.30 14.200 20.80
## CV residual -5.17 -2.5  0.141  6.1 -12.9 -2.73 17.5  7.75  0.738  9.69
##                424   429   433   434   435   436   438      447  450   453
## Predicted   14.855 15.33 15.49 15.86 14.04 15.94 14.12 15.13490 15.3 16.82
## cvpred      14.239 15.14 14.11 15.35 12.97 15.52 13.82 14.89402 14.7 17.29
## medv        13.400 11.00 16.10 14.30 11.70 13.40  8.70 14.90000 13.0 16.10
## CV residual -0.839 -4.14  1.99 -1.05 -1.27 -2.12 -5.12  0.00598 -1.7 -1.19
##                461   463   464   472    476   478    479   482   487   491
## Predicted   16.852 17.05 17.07 16.58 14.717 12.86 15.406 17.89 16.97 14.55
## cvpred      16.184 17.08 16.75 15.49 13.725 12.65 15.014 16.12 16.03 16.09
## medv        16.400 19.50 20.20 19.60 13.300 12.00 14.600 23.70 19.10  8.10
## CV residual  0.216  2.42  3.45  4.11 -0.425 -0.65 -0.414  7.58  3.07 -7.99
##               492    493     496   504   506
## Predicted   18.29 18.737 20.7878 21.06 19.71
## cvpred      20.54 20.686 23.0461 22.15 21.28
## medv        13.60 20.100 23.1000 23.90 11.90
## CV residual -6.94 -0.586  0.0539  1.75 -9.38
## 
## Sum of squares = 4418    Mean square = 35.6    n = 124 
## 
## fold 3 
## Observations in test set: 123 
##                   2    3      7    11    13   26    28    32    34   41
## Predicted   23.0129 24.8 22.599 22.58 23.59 16.1 17.14 17.28 16.78 24.7
## cvpred      21.6136 22.7 22.396 23.15 23.98 17.2 18.23 18.03 17.78 22.8
## medv        21.6000 34.7 22.900 15.00 21.70 13.9 14.80 14.50 13.10 34.9
## CV residual -0.0136 12.0  0.504 -8.15 -2.28 -3.3 -3.43 -3.53 -4.68 12.1
##                43    45    48    49    56    60     64    67    69     71
## Predicted   23.59 22.70 21.07 19.51 27.88 21.73 24.336 22.96 21.09 24.324
## cvpred      23.98 23.05 21.89 20.95 26.31 21.63 24.352 23.44 22.46 24.705
## medv        25.30 21.20 16.60 14.40 35.40 19.60 25.000 19.40 17.40 24.200
## CV residual  1.32 -1.85 -5.29 -6.55  9.09 -2.03  0.648 -4.04 -5.06 -0.505
##                 73   76    79     87    88    89   101   103   104    116
## Predicted   23.034 22.3 22.82 22.528 21.26 22.02 20.30 17.59 18.47 18.640
## cvpred      23.362 22.5 22.99 22.159 20.37 19.83 19.07 16.89 17.85 18.027
## medv        22.800 21.4 21.20 22.500 22.20 23.60 27.50 18.60 19.30 18.300
## CV residual -0.562 -1.1 -1.79  0.341  1.83  3.77  8.43  1.71  1.45  0.273
##               124    134   137   144   150   151   152   154   157   169
## Predicted   20.85 17.969 18.12 16.73 17.34 18.75 15.33 16.21 15.22 17.36
## cvpred      20.17 17.761 17.72 16.82 17.11 17.69 14.45 15.66 15.04 18.02
## medv        17.30 18.400 17.40 15.60 15.40 21.50 19.60 19.40 13.10 23.80
## CV residual -2.87  0.639 -0.32 -1.22 -1.71  3.81  5.15  3.74 -1.94  5.78
##               174   177   178  182  184  193   194   199    202   204
## Predicted   20.61 21.89 21.13 21.0 20.0 24.9 26.59 27.25 23.535  6.06
## cvpred      18.82 20.93 19.09 19.3 17.7 24.2 25.93 27.03 23.458  6.63
## medv        23.60 23.20 24.60 36.2 32.5 36.4 31.10 34.60 24.100  0.00
## CV residual  4.78  2.27  5.51 16.9 14.8 12.2  5.17  7.57  0.642 -6.63
##               207  209     212   221   225   229   241   247     251   252
## Predicted   22.18 21.9 18.4029 21.07  1.05  25.3 24.72 21.92 23.9043 22.90
## cvpred      22.43 22.0 19.3256 19.93  1.39  24.1 24.85 23.25 24.3101 23.23
## medv        24.40 24.4 19.3000 26.70  0.00   0.0 22.00 24.30 24.4000 24.80
## CV residual  1.97  2.4 -0.0256  6.77 -1.39 -24.1 -2.85  1.05  0.0899  1.57
##               254    258  259  264   268   269   279  282   284   289
## Predicted    4.84  0.986 21.5 22.2  2.66  23.4 23.57 25.8  7.26 23.96
## cvpred       7.73  0.829 19.4 20.7  3.73  21.8 22.94 24.1  7.64 23.69
## medv         0.00  0.000 36.0 31.0  0.00   0.0 29.10 35.4  0.00 22.30
## CV residual -7.73 -0.829 16.6 10.3 -3.73 -21.8  6.16 11.3 -7.64 -1.39
##               294   296   299   300   302  303  307   311  313   314   319
## Predicted   24.74 25.14 24.08 26.52 25.03 24.7 23.7 16.09 19.0 19.67 20.81
## cvpred      25.32 25.52 24.32 26.52 24.33 24.9 21.1 17.85 18.3 18.86 20.74
## medv        23.90 28.60 22.50 29.00 22.00 26.4 33.4 16.10 19.4 21.60 23.10
## CV residual -1.42  3.08 -1.82  2.48 -2.33  1.5 12.3 -1.75  1.1  2.74  2.36
##               326   329   343   347  356   357   359   360   361   362
## Predicted   23.01 22.01 23.65 22.02 21.5 15.84 16.57 16.74 16.55 16.66
## cvpred      22.92 22.43 22.86 22.72 22.4 15.55 16.02 16.08 15.35 15.78
## medv        24.60 19.30 16.50 17.20 20.6 17.80 22.70 22.60 25.00 19.90
## CV residual  1.68 -3.13 -6.36 -5.52 -1.8  2.25  6.68  6.52  9.65  4.12
##               372   376   378   384   386     387   393   394   397   402
## Predicted    13.5 16.93 16.63 13.65 12.61 -2.5921 12.43 15.42 16.38 15.28
## cvpred       12.7 16.52 16.34 13.72 13.39  0.0085 13.14 15.22 15.95 15.38
## medv          0.0 15.00 13.30 12.30  7.20 10.5000  9.70 13.80 12.50  7.20
## CV residual -12.7 -1.52 -3.04 -1.42 -6.19 10.4915 -3.44 -1.42 -3.45 -8.18
##               406   409  416   419   431   432   437   439   442   446
## Predicted    12.1 12.47 14.7 11.75 14.96 16.01 14.89 14.32 16.37 15.41
## cvpred       13.5 13.14 15.6 13.44 15.74 16.77 15.12 15.18 16.28 15.66
## medv          0.0 17.20  7.2  8.80 14.50 14.10  9.60  8.40 17.10 11.80
## CV residual -13.5  4.06 -8.4 -4.64 -1.24 -2.67 -5.52 -6.78  0.82 -3.86
##               448   451   452   459   465   467  474  480   494   495
## Predicted   15.84 16.49 17.43 15.72 17.28 15.27 19.2 15.0 19.12 19.98
## cvpred      15.75 16.41 17.08 16.12 17.99 15.66 19.2 15.6 18.33 19.69
## medv        12.60 13.40 15.20 14.90 21.40 19.00 29.8 21.4 21.80 24.50
## CV residual -3.15 -3.01 -1.88 -1.22  3.41  3.34 10.6  5.8  3.47  4.81
##               499   502
## Predicted   19.38 21.77
## cvpred      18.66 19.94
## medv        21.20 22.40
## CV residual  2.54  2.46
## 
## Sum of squares = 5008    Mean square = 40.7    n = 123 
## 
## Overall (Sum over all 123 folds) 
##   ms 
## 43.6
#-------------------------------------------------------------------------------------#
# All Subsets Regression method 3
library(leaps)
fit3<-regsubsets(medv ~log(crim)+indus+nox+rm+age+log(dis)+rad+tax+ptratio+black+log(lstat), data=reg1,nbest=10)
# view results 
summary(fit3)
## Subset selection object
## Call: regsubsets.formula(medv ~ log(crim) + indus + nox + rm + age + 
##     log(dis) + rad + tax + ptratio + black + log(lstat), data = reg1, 
##     nbest = 10)
## 11 Variables  (and intercept)
##            Forced in Forced out
## log(crim)      FALSE      FALSE
## indus          FALSE      FALSE
## nox            FALSE      FALSE
## rm             FALSE      FALSE
## age            FALSE      FALSE
## log(dis)       FALSE      FALSE
## rad            FALSE      FALSE
## tax            FALSE      FALSE
## ptratio        FALSE      FALSE
## black          FALSE      FALSE
## log(lstat)     FALSE      FALSE
## 10 subsets of each size up to 8
## Selection Algorithm: exhaustive
##           log(crim) indus nox rm  age log(dis) rad tax ptratio black
## 1  ( 1 )  " "       " "   " " "*" " " " "      " " " " " "     " "  
## 1  ( 2 )  "*"       " "   " " " " " " " "      " " " " " "     " "  
## 1  ( 3 )  " "       " "   " " " " " " "*"      " " " " " "     " "  
## 1  ( 4 )  " "       " "   " " " " "*" " "      " " " " " "     " "  
## 1  ( 5 )  " "       " "   "*" " " " " " "      " " " " " "     " "  
## 1  ( 6 )  " "       " "   " " " " " " " "      " " "*" " "     " "  
## 1  ( 7 )  " "       "*"   " " " " " " " "      " " " " " "     " "  
## 1  ( 8 )  " "       " "   " " " " " " " "      "*" " " " "     " "  
## 1  ( 9 )  " "       " "   " " " " " " " "      " " " " " "     " "  
## 1  ( 10 ) " "       " "   " " " " " " " "      " " " " " "     "*"  
## 2  ( 1 )  "*"       " "   " " "*" " " " "      " " " " " "     " "  
## 2  ( 2 )  " "       " "   " " "*" " " " "      " " "*" " "     " "  
## 2  ( 3 )  " "       " "   " " "*" "*" " "      " " " " " "     " "  
## 2  ( 4 )  " "       " "   " " "*" " " "*"      " " " " " "     " "  
## 2  ( 5 )  " "       " "   "*" "*" " " " "      " " " " " "     " "  
## 2  ( 6 )  " "       "*"   " " "*" " " " "      " " " " " "     " "  
## 2  ( 7 )  " "       " "   " " "*" " " " "      "*" " " " "     " "  
## 2  ( 8 )  " "       " "   " " "*" " " " "      " " " " " "     " "  
## 2  ( 9 )  " "       " "   " " "*" " " " "      " " " " " "     "*"  
## 2  ( 10 ) " "       " "   " " "*" " " " "      " " " " "*"     " "  
## 3  ( 1 )  "*"       " "   " " "*" "*" " "      " " " " " "     " "  
## 3  ( 2 )  " "       " "   " " "*" "*" " "      " " "*" " "     " "  
## 3  ( 3 )  "*"       " "   " " "*" " " "*"      " " " " " "     " "  
## 3  ( 4 )  "*"       "*"   " " "*" " " " "      " " " " " "     " "  
## 3  ( 5 )  "*"       " "   "*" "*" " " " "      " " " " " "     " "  
## 3  ( 6 )  "*"       " "   " " "*" " " " "      " " "*" " "     " "  
## 3  ( 7 )  "*"       " "   " " "*" " " " "      " " " " "*"     " "  
## 3  ( 8 )  " "       " "   " " "*" "*" " "      "*" " " " "     " "  
## 3  ( 9 )  "*"       " "   " " "*" " " " "      " " " " " "     "*"  
## 3  ( 10 ) "*"       " "   " " "*" " " " "      "*" " " " "     " "  
## 4  ( 1 )  "*"       " "   " " "*" "*" " "      " " "*" " "     " "  
## 4  ( 2 )  "*"       " "   " " "*" "*" " "      " " " " "*"     " "  
## 4  ( 3 )  "*"       "*"   " " "*" "*" " "      " " " " " "     " "  
## 4  ( 4 )  "*"       " "   " " "*" "*" " "      " " " " " "     "*"  
## 4  ( 5 )  "*"       " "   " " "*" "*" " "      " " " " " "     " "  
## 4  ( 6 )  "*"       " "   " " "*" "*" "*"      " " " " " "     " "  
## 4  ( 7 )  "*"       " "   " " "*" "*" " "      "*" " " " "     " "  
## 4  ( 8 )  "*"       " "   "*" "*" "*" " "      " " " " " "     " "  
## 4  ( 9 )  " "       " "   " " "*" "*" " "      " " "*" " "     "*"  
## 4  ( 10 ) " "       " "   " " "*" "*" "*"      " " "*" " "     " "  
## 5  ( 1 )  "*"       " "   " " "*" "*" " "      "*" "*" " "     " "  
## 5  ( 2 )  "*"       " "   " " "*" "*" " "      " " "*" " "     " "  
## 5  ( 3 )  "*"       " "   " " "*" "*" " "      " " "*" "*"     " "  
## 5  ( 4 )  "*"       " "   " " "*" "*" " "      " " "*" " "     "*"  
## 5  ( 5 )  "*"       "*"   " " "*" "*" " "      " " "*" " "     " "  
## 5  ( 6 )  "*"       " "   " " "*" "*" "*"      " " "*" " "     " "  
## 5  ( 7 )  "*"       " "   "*" "*" "*" " "      " " "*" " "     " "  
## 5  ( 8 )  "*"       " "   " " "*" "*" " "      " " " " "*"     " "  
## 5  ( 9 )  "*"       "*"   " " "*" "*" " "      " " " " "*"     " "  
## 5  ( 10 ) "*"       " "   " " "*" "*" " "      " " " " "*"     "*"  
## 6  ( 1 )  "*"       " "   " " "*" "*" " "      "*" "*" "*"     " "  
## 6  ( 2 )  "*"       " "   " " "*" "*" " "      "*" "*" " "     " "  
## 6  ( 3 )  "*"       " "   " " "*" "*" " "      "*" "*" " "     "*"  
## 6  ( 4 )  "*"       " "   "*" "*" "*" " "      "*" "*" " "     " "  
## 6  ( 5 )  "*"       " "   " " "*" "*" " "      " " "*" "*"     " "  
## 6  ( 6 )  "*"       " "   " " "*" "*" "*"      "*" "*" " "     " "  
## 6  ( 7 )  "*"       "*"   " " "*" "*" " "      "*" "*" " "     " "  
## 6  ( 8 )  "*"       " "   " " "*" "*" " "      " " "*" " "     "*"  
## 6  ( 9 )  "*"       "*"   " " "*" "*" " "      " " "*" " "     " "  
## 6  ( 10 ) "*"       " "   " " "*" "*" " "      " " "*" "*"     "*"  
## 7  ( 1 )  "*"       " "   " " "*" "*" " "      "*" "*" "*"     " "  
## 7  ( 2 )  "*"       " "   " " "*" "*" " "      "*" "*" "*"     "*"  
## 7  ( 3 )  "*"       " "   " " "*" "*" " "      "*" "*" " "     "*"  
## 7  ( 4 )  "*"       " "   "*" "*" "*" " "      "*" "*" " "     " "  
## 7  ( 5 )  "*"       " "   " " "*" "*" "*"      "*" "*" "*"     " "  
## 7  ( 6 )  "*"       "*"   " " "*" "*" " "      "*" "*" "*"     " "  
## 7  ( 7 )  "*"       " "   "*" "*" "*" " "      "*" "*" "*"     " "  
## 7  ( 8 )  "*"       "*"   " " "*" "*" " "      "*" "*" " "     " "  
## 7  ( 9 )  "*"       " "   " " "*" "*" "*"      "*" "*" " "     " "  
## 7  ( 10 ) "*"       " "   " " "*" "*" " "      " " "*" "*"     "*"  
## 8  ( 1 )  "*"       " "   " " "*" "*" " "      "*" "*" "*"     "*"  
## 8  ( 2 )  "*"       " "   " " "*" "*" "*"      "*" "*" "*"     " "  
## 8  ( 3 )  "*"       " "   "*" "*" "*" " "      "*" "*" "*"     " "  
## 8  ( 4 )  "*"       "*"   " " "*" "*" " "      "*" "*" "*"     " "  
## 8  ( 5 )  "*"       " "   "*" "*" "*" " "      "*" "*" " "     "*"  
## 8  ( 6 )  "*"       " "   " " "*" "*" "*"      "*" "*" "*"     "*"  
## 8  ( 7 )  "*"       "*"   " " "*" "*" " "      "*" "*" "*"     "*"  
## 8  ( 8 )  "*"       " "   "*" "*" "*" " "      "*" "*" "*"     "*"  
## 8  ( 9 )  "*"       "*"   " " "*" "*" " "      "*" "*" " "     "*"  
## 8  ( 10 ) "*"       " "   " " "*" "*" "*"      "*" "*" " "     "*"  
##           log(lstat)
## 1  ( 1 )  " "       
## 1  ( 2 )  " "       
## 1  ( 3 )  " "       
## 1  ( 4 )  " "       
## 1  ( 5 )  " "       
## 1  ( 6 )  " "       
## 1  ( 7 )  " "       
## 1  ( 8 )  " "       
## 1  ( 9 )  "*"       
## 1  ( 10 ) " "       
## 2  ( 1 )  " "       
## 2  ( 2 )  " "       
## 2  ( 3 )  " "       
## 2  ( 4 )  " "       
## 2  ( 5 )  " "       
## 2  ( 6 )  " "       
## 2  ( 7 )  " "       
## 2  ( 8 )  "*"       
## 2  ( 9 )  " "       
## 2  ( 10 ) " "       
## 3  ( 1 )  " "       
## 3  ( 2 )  " "       
## 3  ( 3 )  " "       
## 3  ( 4 )  " "       
## 3  ( 5 )  " "       
## 3  ( 6 )  " "       
## 3  ( 7 )  " "       
## 3  ( 8 )  " "       
## 3  ( 9 )  " "       
## 3  ( 10 ) " "       
## 4  ( 1 )  " "       
## 4  ( 2 )  " "       
## 4  ( 3 )  " "       
## 4  ( 4 )  " "       
## 4  ( 5 )  "*"       
## 4  ( 6 )  " "       
## 4  ( 7 )  " "       
## 4  ( 8 )  " "       
## 4  ( 9 )  " "       
## 4  ( 10 ) " "       
## 5  ( 1 )  " "       
## 5  ( 2 )  "*"       
## 5  ( 3 )  " "       
## 5  ( 4 )  " "       
## 5  ( 5 )  " "       
## 5  ( 6 )  " "       
## 5  ( 7 )  " "       
## 5  ( 8 )  "*"       
## 5  ( 9 )  " "       
## 5  ( 10 ) " "       
## 6  ( 1 )  " "       
## 6  ( 2 )  "*"       
## 6  ( 3 )  " "       
## 6  ( 4 )  " "       
## 6  ( 5 )  "*"       
## 6  ( 6 )  " "       
## 6  ( 7 )  " "       
## 6  ( 8 )  "*"       
## 6  ( 9 )  "*"       
## 6  ( 10 ) " "       
## 7  ( 1 )  "*"       
## 7  ( 2 )  " "       
## 7  ( 3 )  "*"       
## 7  ( 4 )  "*"       
## 7  ( 5 )  " "       
## 7  ( 6 )  " "       
## 7  ( 7 )  " "       
## 7  ( 8 )  "*"       
## 7  ( 9 )  "*"       
## 7  ( 10 ) "*"       
## 8  ( 1 )  "*"       
## 8  ( 2 )  "*"       
## 8  ( 3 )  "*"       
## 8  ( 4 )  "*"       
## 8  ( 5 )  "*"       
## 8  ( 6 )  " "       
## 8  ( 7 )  " "       
## 8  ( 8 )  " "       
## 8  ( 9 )  "*"       
## 8  ( 10 ) "*"
# plot a table of models showing variables in each model.
# models are ordered by the selection statistic.
plot(fit3,scale="r2")

#------------------------------------------------------------------#

#to save any file in the R environment into csv to the working directory

#getwd()
#write.table(reg1, file = "boston1.csv", sep = ",", col.names = NA)

#save the data package file in txt format
#write.table(reg1, "reg2.txt", sep="\t")