memory.size()
## [1] 30.46
memory.limit()
## [1] 8096
library(MASS)
library(car)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(readr)
data(Boston)
attach(Boston)
str(Boston)
## 'data.frame': 506 obs. of 14 variables:
## $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
## $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
## $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
## $ chas : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
## $ rm : num 6.58 6.42 7.18 7 7.15 ...
## $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
## $ dis : num 4.09 4.97 4.97 6.06 6.06 ...
## $ rad : int 1 2 2 3 3 3 5 5 5 5 ...
## $ tax : num 296 242 242 222 222 222 311 311 311 311 ...
## $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
## $ black : num 397 397 393 395 397 ...
## $ lstat : num 4.98 9.14 4.03 2.94 5.33 ...
## $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
cor(medv,rm)
## [1] 0.6953599
sd(medv)
## [1] 9.197104
sd(rm)
## [1] 0.7026171
cor(medv,rm)*sd(medv)*sd(rm)
## [1] 4.493446
cov(medv,rm)
## [1] 4.493446
cor(Boston)
## crim zn indus chas nox
## crim 1.00000000 -0.20046922 0.40658341 -0.055891582 0.42097171
## zn -0.20046922 1.00000000 -0.53382819 -0.042696719 -0.51660371
## indus 0.40658341 -0.53382819 1.00000000 0.062938027 0.76365145
## chas -0.05589158 -0.04269672 0.06293803 1.000000000 0.09120281
## nox 0.42097171 -0.51660371 0.76365145 0.091202807 1.00000000
## rm -0.21924670 0.31199059 -0.39167585 0.091251225 -0.30218819
## age 0.35273425 -0.56953734 0.64477851 0.086517774 0.73147010
## dis -0.37967009 0.66440822 -0.70802699 -0.099175780 -0.76923011
## rad 0.62550515 -0.31194783 0.59512927 -0.007368241 0.61144056
## tax 0.58276431 -0.31456332 0.72076018 -0.035586518 0.66802320
## ptratio 0.28994558 -0.39167855 0.38324756 -0.121515174 0.18893268
## black -0.38506394 0.17552032 -0.35697654 0.048788485 -0.38005064
## lstat 0.45562148 -0.41299457 0.60379972 -0.053929298 0.59087892
## medv -0.38830461 0.36044534 -0.48372516 0.175260177 -0.42732077
## rm age dis rad tax
## crim -0.21924670 0.35273425 -0.37967009 0.625505145 0.58276431
## zn 0.31199059 -0.56953734 0.66440822 -0.311947826 -0.31456332
## indus -0.39167585 0.64477851 -0.70802699 0.595129275 0.72076018
## chas 0.09125123 0.08651777 -0.09917578 -0.007368241 -0.03558652
## nox -0.30218819 0.73147010 -0.76923011 0.611440563 0.66802320
## rm 1.00000000 -0.24026493 0.20524621 -0.209846668 -0.29204783
## age -0.24026493 1.00000000 -0.74788054 0.456022452 0.50645559
## dis 0.20524621 -0.74788054 1.00000000 -0.494587930 -0.53443158
## rad -0.20984667 0.45602245 -0.49458793 1.000000000 0.91022819
## tax -0.29204783 0.50645559 -0.53443158 0.910228189 1.00000000
## ptratio -0.35550149 0.26151501 -0.23247054 0.464741179 0.46085304
## black 0.12806864 -0.27353398 0.29151167 -0.444412816 -0.44180801
## lstat -0.61380827 0.60233853 -0.49699583 0.488676335 0.54399341
## medv 0.69535995 -0.37695457 0.24992873 -0.381626231 -0.46853593
## ptratio black lstat medv
## crim 0.2899456 -0.38506394 0.4556215 -0.3883046
## zn -0.3916785 0.17552032 -0.4129946 0.3604453
## indus 0.3832476 -0.35697654 0.6037997 -0.4837252
## chas -0.1215152 0.04878848 -0.0539293 0.1752602
## nox 0.1889327 -0.38005064 0.5908789 -0.4273208
## rm -0.3555015 0.12806864 -0.6138083 0.6953599
## age 0.2615150 -0.27353398 0.6023385 -0.3769546
## dis -0.2324705 0.29151167 -0.4969958 0.2499287
## rad 0.4647412 -0.44441282 0.4886763 -0.3816262
## tax 0.4608530 -0.44180801 0.5439934 -0.4685359
## ptratio 1.0000000 -0.17738330 0.3740443 -0.5077867
## black -0.1773833 1.00000000 -0.3660869 0.3334608
## lstat 0.3740443 -0.36608690 1.0000000 -0.7376627
## medv -0.5077867 0.33346082 -0.7376627 1.0000000
library(corrgram)
corrgram(Boston)

data(Boston, package="MASS")
RegModel.1 <- lm(medv~rm, data=Boston)
summary(RegModel.1)
##
## Call:
## lm(formula = medv ~ rm, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.346 -2.547 0.090 2.986 39.433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -34.671 2.650 -13.08 <2e-16 ***
## rm 9.102 0.419 21.72 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.616 on 504 degrees of freedom
## Multiple R-squared: 0.4835, Adjusted R-squared: 0.4825
## F-statistic: 471.8 on 1 and 504 DF, p-value: < 2.2e-16
data(iris, package="datasets")
RegModel.2 <- lm(Petal.Length~Petal.Width+Sepal.Length+Sepal.Width,
data=iris)
summary(RegModel.2)
##
## Call:
## lm(formula = Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width,
## data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.99333 -0.17656 -0.01004 0.18558 1.06909
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.26271 0.29741 -0.883 0.379
## Petal.Width 1.44679 0.06761 21.399 <2e-16 ***
## Sepal.Length 0.72914 0.05832 12.502 <2e-16 ***
## Sepal.Width -0.64601 0.06850 -9.431 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.319 on 146 degrees of freedom
## Multiple R-squared: 0.968, Adjusted R-squared: 0.9674
## F-statistic: 1473 on 3 and 146 DF, p-value: < 2.2e-16
RegModel.3 <- lm(Petal.Length~Petal.Width+Sepal.Length,
data=iris)
summary(RegModel.3)
##
## Call:
## lm(formula = Petal.Length ~ Petal.Width + Sepal.Length, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.15506 -0.21920 -0.02115 0.25986 1.35204
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.50714 0.33696 -4.473 1.54e-05 ***
## Petal.Width 1.74810 0.07533 23.205 < 2e-16 ***
## Sepal.Length 0.54226 0.06934 7.820 9.41e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4032 on 147 degrees of freedom
## Multiple R-squared: 0.9485, Adjusted R-squared: 0.9478
## F-statistic: 1354 on 2 and 147 DF, p-value: < 2.2e-16
dwtest(Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width,
alternative="greater", data=iris)
##
## Durbin-Watson test
##
## data: Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width
## DW = 1.783, p-value = 0.07433
## alternative hypothesis: true autocorrelation is greater than 0
vif(RegModel.2)
## Petal.Width Sepal.Length Sepal.Width
## 3.889961 3.415733 1.305515
bptest(Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width, varformula
= ~ fitted.values(RegModel.2), studentize=FALSE, data=iris)
##
## Breusch-Pagan test
##
## data: Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width
## BP = 7.3936, df = 1, p-value = 0.006546
library(gvlma)
gvlma(RegModel.2)
##
## Call:
## lm(formula = Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width,
## data = iris)
##
## Coefficients:
## (Intercept) Petal.Width Sepal.Length Sepal.Width
## -0.2627 1.4468 0.7291 -0.6460
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma(x = RegModel.2)
##
## Value p-value Decision
## Global Stat 27.1496 1.854e-05 Assumptions NOT satisfied!
## Skewness 0.1331 7.153e-01 Assumptions acceptable.
## Kurtosis 2.2582 1.329e-01 Assumptions acceptable.
## Link Function 9.6629 1.880e-03 Assumptions NOT satisfied!
## Heteroscedasticity 15.0955 1.022e-04 Assumptions NOT satisfied!
ncvTest(RegModel.2)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 7.393616 Df = 1 p = 0.006545577
par(mfrow=c(2,2))
plot(RegModel.2)

distBCMod <- caret::BoxCoxTrans(iris$Petal.Length)
print(distBCMod)
## Box-Cox Transformation
##
## 150 data points used to estimate Lambda
##
## Input data summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.600 4.350 3.758 5.100 6.900
##
## Largest/Smallest: 6.9
## Sample Skewness: -0.269
##
## Estimated Lambda: 0.9
## With fudge factor, no transformation is applied
HousingData <- read_csv("C:/Users/Dell/Desktop/IMS proschool/4.0 Simple & Multiple Linear Regression/4.0 Simple & Multiple Linear Regression/HousingData.csv")
## Parsed with column specification:
## cols(
## SegmentofCity = col_character(),
## SellingPrice000s = col_integer(),
## HouseSize00Sqft = col_integer(),
## NumberofBathrms = col_integer(),
## NumberofBedrms = col_integer(),
## GarageSize = col_integer()
## )
str(HousingData)
## Classes 'tbl_df', 'tbl' and 'data.frame': 25 obs. of 6 variables:
## $ SegmentofCity : chr "Northwest" "South" "Northeast" "Northwest" ...
## $ SellingPrice000s: int 290 95 170 375 350 125 310 275 340 215 ...
## $ HouseSize00Sqft : int 21 11 19 38 24 10 31 25 27 22 ...
## $ NumberofBathrms : int 2 1 2 4 3 2 4 2 3 3 ...
## $ NumberofBedrms : int 4 2 3 5 4 2 4 3 5 4 ...
## $ GarageSize : int 2 0 2 3 2 0 2 2 3 2 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 6
## .. ..$ SegmentofCity : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ SellingPrice000s: list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ HouseSize00Sqft : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ NumberofBathrms : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ NumberofBedrms : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ GarageSize : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
cor(HousingData[2:6])
## SellingPrice000s HouseSize00Sqft NumberofBathrms
## SellingPrice000s 1.0000000 0.8772285 0.8320072
## HouseSize00Sqft 0.8772285 1.0000000 0.8527317
## NumberofBathrms 0.8320072 0.8527317 1.0000000
## NumberofBedrms 0.8182372 0.8554267 0.7736148
## GarageSize 0.7686695 0.8555314 0.7368829
## NumberofBedrms GarageSize
## SellingPrice000s 0.8182372 0.7686695
## HouseSize00Sqft 0.8554267 0.8555314
## NumberofBathrms 0.7736148 0.7368829
## NumberofBedrms 1.0000000 0.8878382
## GarageSize 0.8878382 1.0000000
library(corrgram)
corrgram(HousingData[2:6])

RegModel.1 <-
lm(SellingPrice000s~GarageSize+HouseSize00Sqft+NumberofBathrms+NumberofBedrms,
data=HousingData)
summary(RegModel.1)
##
## Call:
## lm(formula = SellingPrice000s ~ GarageSize + HouseSize00Sqft +
## NumberofBathrms + NumberofBedrms, data = HousingData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -80.724 -30.289 -8.493 31.024 79.276
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -59.416 54.607 -1.088 0.2895
## GarageSize -10.803 27.329 -0.395 0.6968
## HouseSize00Sqft 6.506 3.247 2.004 0.0588 .
## NumberofBathrms 26.400 18.808 1.404 0.1757
## NumberofBedrms 29.101 26.215 1.110 0.2801
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45.87 on 20 degrees of freedom
## Multiple R-squared: 0.8081, Adjusted R-squared: 0.7698
## F-statistic: 21.06 on 4 and 20 DF, p-value: 6.138e-07
library(car)
vif(RegModel.1)
## GarageSize HouseSize00Sqft NumberofBathrms NumberofBedrms
## 5.679113 6.577989 3.792467 5.826416
RegModel.1 <-
lm(SellingPrice000s~HouseSize00Sqft+NumberofBathrms+NumberofBedrms,
data=HousingData)
summary(RegModel.1)
##
## Call:
## lm(formula = SellingPrice000s ~ HouseSize00Sqft + NumberofBathrms +
## NumberofBedrms, data = HousingData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -80.371 -29.541 -7.677 30.384 79.629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -47.342 44.347 -1.068 0.2979
## HouseSize00Sqft 6.020 2.944 2.045 0.0536 .
## NumberofBathrms 27.029 18.360 1.472 0.1558
## NumberofBedrms 23.035 20.823 1.106 0.2811
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44.94 on 21 degrees of freedom
## Multiple R-squared: 0.8066, Adjusted R-squared: 0.779
## F-statistic: 29.2 on 3 and 21 DF, p-value: 1.104e-07
par(mfrow=c(2,2))
plot(RegModel.1)

RegModel.1 <-
lm(SellingPrice000s~HouseSize00Sqft+NumberofBathrms,
data=HousingData)
summary(RegModel.1)
##
## Call:
## lm(formula = SellingPrice000s ~ HouseSize00Sqft + NumberofBathrms,
## data = HousingData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -79.40 -29.32 -10.10 33.00 80.60
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.349 31.239 -0.395 0.69642
## HouseSize00Sqft 7.947 2.386 3.330 0.00304 **
## NumberofBathrms 30.344 18.206 1.667 0.10974
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45.17 on 22 degrees of freedom
## Multiple R-squared: 0.7954, Adjusted R-squared: 0.7768
## F-statistic: 42.76 on 2 and 22 DF, p-value: 2.634e-08
par(mfrow=c(2,2))
plot(RegModel.1)

RegModel.2 <- lm(SellingPrice000s~HouseSize00Sqft, data=HousingData)
summary(RegModel.2)
##
## Call:
## lm(formula = SellingPrice000s ~ HouseSize00Sqft, data = HousingData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -80.92 -24.93 -13.31 36.13 87.75
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.867 32.387 -0.305 0.763
## HouseSize00Sqft 11.338 1.294 8.763 8.67e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 46.88 on 23 degrees of freedom
## Multiple R-squared: 0.7695, Adjusted R-squared: 0.7595
## F-statistic: 76.8 on 1 and 23 DF, p-value: 8.675e-09
par(mfrow=c(2,2))
plot(RegModel.2)

attach(HousingData)
par(mfrow=c(1,1))
plot(HouseSize00Sqft,SellingPrice000s)
abline(RegModel.2)

anova(RegModel.1,RegModel.2)
## Analysis of Variance Table
##
## Model 1: SellingPrice000s ~ HouseSize00Sqft + NumberofBathrms
## Model 2: SellingPrice000s ~ HouseSize00Sqft
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 22 44884
## 2 23 50552 -1 -5667.8 2.7781 0.1097
anova(RegModel.2,RegModel.1)
## Analysis of Variance Table
##
## Model 1: SellingPrice000s ~ HouseSize00Sqft
## Model 2: SellingPrice000s ~ HouseSize00Sqft + NumberofBathrms
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 23 50552
## 2 22 44884 1 5667.8 2.7781 0.1097
predict(RegModel.2)
## 1 2 3 4 5 6 7 8
## 228.2385 114.8550 205.5618 420.9904 262.2535 103.5166 341.6220 273.5919
## 9 10 11 12 13 14 15 16
## 296.2686 239.5768 216.9001 262.2535 398.3137 352.9603 148.8700 194.2234
## 17 18 19 20 21 22 23 24
## 205.5618 318.9453 364.2987 250.9152 375.6370 273.5919 182.8851 228.2385
## 25
## 284.9302
predict(RegModel.2,data.frame(HouseSize00Sqft=29),level = 0.95)
## 1
## 318.9453
predict(RegModel.2,data.frame(HouseSize00Sqft=29),interval='confidence',level = 0.95)
## fit lwr upr
## 1 318.9453 295.3191 342.5715
predict(RegModel.2,data.frame(HouseSize00Sqft=29),interval='confidence',level = 0.99)
## fit lwr upr
## 1 318.9453 286.8826 351.0079