memory.size()
## [1] 30.46
memory.limit()
## [1] 8096
library(MASS)
library(car)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(readr)
#install.packages()
data(Boston)
attach(Boston)
str(Boston)
## 'data.frame':    506 obs. of  14 variables:
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ black  : num  397 397 393 395 397 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
cor(medv,rm)
## [1] 0.6953599
sd(medv)
## [1] 9.197104
sd(rm)
## [1] 0.7026171
cor(medv,rm)*sd(medv)*sd(rm)
## [1] 4.493446
cov(medv,rm)
## [1] 4.493446
cor(Boston)
##                crim          zn       indus         chas         nox
## crim     1.00000000 -0.20046922  0.40658341 -0.055891582  0.42097171
## zn      -0.20046922  1.00000000 -0.53382819 -0.042696719 -0.51660371
## indus    0.40658341 -0.53382819  1.00000000  0.062938027  0.76365145
## chas    -0.05589158 -0.04269672  0.06293803  1.000000000  0.09120281
## nox      0.42097171 -0.51660371  0.76365145  0.091202807  1.00000000
## rm      -0.21924670  0.31199059 -0.39167585  0.091251225 -0.30218819
## age      0.35273425 -0.56953734  0.64477851  0.086517774  0.73147010
## dis     -0.37967009  0.66440822 -0.70802699 -0.099175780 -0.76923011
## rad      0.62550515 -0.31194783  0.59512927 -0.007368241  0.61144056
## tax      0.58276431 -0.31456332  0.72076018 -0.035586518  0.66802320
## ptratio  0.28994558 -0.39167855  0.38324756 -0.121515174  0.18893268
## black   -0.38506394  0.17552032 -0.35697654  0.048788485 -0.38005064
## lstat    0.45562148 -0.41299457  0.60379972 -0.053929298  0.59087892
## medv    -0.38830461  0.36044534 -0.48372516  0.175260177 -0.42732077
##                  rm         age         dis          rad         tax
## crim    -0.21924670  0.35273425 -0.37967009  0.625505145  0.58276431
## zn       0.31199059 -0.56953734  0.66440822 -0.311947826 -0.31456332
## indus   -0.39167585  0.64477851 -0.70802699  0.595129275  0.72076018
## chas     0.09125123  0.08651777 -0.09917578 -0.007368241 -0.03558652
## nox     -0.30218819  0.73147010 -0.76923011  0.611440563  0.66802320
## rm       1.00000000 -0.24026493  0.20524621 -0.209846668 -0.29204783
## age     -0.24026493  1.00000000 -0.74788054  0.456022452  0.50645559
## dis      0.20524621 -0.74788054  1.00000000 -0.494587930 -0.53443158
## rad     -0.20984667  0.45602245 -0.49458793  1.000000000  0.91022819
## tax     -0.29204783  0.50645559 -0.53443158  0.910228189  1.00000000
## ptratio -0.35550149  0.26151501 -0.23247054  0.464741179  0.46085304
## black    0.12806864 -0.27353398  0.29151167 -0.444412816 -0.44180801
## lstat   -0.61380827  0.60233853 -0.49699583  0.488676335  0.54399341
## medv     0.69535995 -0.37695457  0.24992873 -0.381626231 -0.46853593
##            ptratio       black      lstat       medv
## crim     0.2899456 -0.38506394  0.4556215 -0.3883046
## zn      -0.3916785  0.17552032 -0.4129946  0.3604453
## indus    0.3832476 -0.35697654  0.6037997 -0.4837252
## chas    -0.1215152  0.04878848 -0.0539293  0.1752602
## nox      0.1889327 -0.38005064  0.5908789 -0.4273208
## rm      -0.3555015  0.12806864 -0.6138083  0.6953599
## age      0.2615150 -0.27353398  0.6023385 -0.3769546
## dis     -0.2324705  0.29151167 -0.4969958  0.2499287
## rad      0.4647412 -0.44441282  0.4886763 -0.3816262
## tax      0.4608530 -0.44180801  0.5439934 -0.4685359
## ptratio  1.0000000 -0.17738330  0.3740443 -0.5077867
## black   -0.1773833  1.00000000 -0.3660869  0.3334608
## lstat    0.3740443 -0.36608690  1.0000000 -0.7376627
## medv    -0.5077867  0.33346082 -0.7376627  1.0000000
#install.packages("corrgram")
library(corrgram)
corrgram(Boston)

data(Boston, package="MASS")
RegModel.1 <- lm(medv~rm, data=Boston)
summary(RegModel.1)
## 
## Call:
## lm(formula = medv ~ rm, data = Boston)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.346  -2.547   0.090   2.986  39.433 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -34.671      2.650  -13.08   <2e-16 ***
## rm             9.102      0.419   21.72   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.616 on 504 degrees of freedom
## Multiple R-squared:  0.4835, Adjusted R-squared:  0.4825 
## F-statistic: 471.8 on 1 and 504 DF,  p-value: < 2.2e-16
data(iris, package="datasets")
RegModel.2 <- lm(Petal.Length~Petal.Width+Sepal.Length+Sepal.Width, 
                 data=iris)
summary(RegModel.2)
## 
## Call:
## lm(formula = Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width, 
##     data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.99333 -0.17656 -0.01004  0.18558  1.06909 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.26271    0.29741  -0.883    0.379    
## Petal.Width   1.44679    0.06761  21.399   <2e-16 ***
## Sepal.Length  0.72914    0.05832  12.502   <2e-16 ***
## Sepal.Width  -0.64601    0.06850  -9.431   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.319 on 146 degrees of freedom
## Multiple R-squared:  0.968,  Adjusted R-squared:  0.9674 
## F-statistic:  1473 on 3 and 146 DF,  p-value: < 2.2e-16
RegModel.3 <- lm(Petal.Length~Petal.Width+Sepal.Length, 
                 data=iris)
summary(RegModel.3)
## 
## Call:
## lm(formula = Petal.Length ~ Petal.Width + Sepal.Length, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.15506 -0.21920 -0.02115  0.25986  1.35204 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.50714    0.33696  -4.473 1.54e-05 ***
## Petal.Width   1.74810    0.07533  23.205  < 2e-16 ***
## Sepal.Length  0.54226    0.06934   7.820 9.41e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4032 on 147 degrees of freedom
## Multiple R-squared:  0.9485, Adjusted R-squared:  0.9478 
## F-statistic:  1354 on 2 and 147 DF,  p-value: < 2.2e-16
dwtest(Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width, 
       alternative="greater", data=iris)
## 
##  Durbin-Watson test
## 
## data:  Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width
## DW = 1.783, p-value = 0.07433
## alternative hypothesis: true autocorrelation is greater than 0
vif(RegModel.2)
##  Petal.Width Sepal.Length  Sepal.Width 
##     3.889961     3.415733     1.305515
bptest(Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width, varformula 
       = ~ fitted.values(RegModel.2), studentize=FALSE, data=iris)
## 
##  Breusch-Pagan test
## 
## data:  Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width
## BP = 7.3936, df = 1, p-value = 0.006546
library(gvlma)
gvlma(RegModel.2)
## 
## Call:
## lm(formula = Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width, 
##     data = iris)
## 
## Coefficients:
##  (Intercept)   Petal.Width  Sepal.Length   Sepal.Width  
##      -0.2627        1.4468        0.7291       -0.6460  
## 
## 
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance =  0.05 
## 
## Call:
##  gvlma(x = RegModel.2) 
## 
##                      Value   p-value                   Decision
## Global Stat        27.1496 1.854e-05 Assumptions NOT satisfied!
## Skewness            0.1331 7.153e-01    Assumptions acceptable.
## Kurtosis            2.2582 1.329e-01    Assumptions acceptable.
## Link Function       9.6629 1.880e-03 Assumptions NOT satisfied!
## Heteroscedasticity 15.0955 1.022e-04 Assumptions NOT satisfied!
ncvTest(RegModel.2)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 7.393616    Df = 1     p = 0.006545577
par(mfrow=c(2,2))
plot(RegModel.2)

#Rectifying for heteroscedascity

distBCMod <- caret::BoxCoxTrans(iris$Petal.Length)
print(distBCMod)
## Box-Cox Transformation
## 
## 150 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.600   4.350   3.758   5.100   6.900 
## 
## Largest/Smallest: 6.9 
## Sample Skewness: -0.269 
## 
## Estimated Lambda: 0.9 
## With fudge factor, no transformation is applied
#


HousingData <- read_csv("C:/Users/Dell/Desktop/IMS proschool/4.0 Simple & Multiple Linear Regression/4.0 Simple & Multiple Linear Regression/HousingData.csv")
## Parsed with column specification:
## cols(
##   SegmentofCity = col_character(),
##   SellingPrice000s = col_integer(),
##   HouseSize00Sqft = col_integer(),
##   NumberofBathrms = col_integer(),
##   NumberofBedrms = col_integer(),
##   GarageSize = col_integer()
## )
str(HousingData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    25 obs. of  6 variables:
##  $ SegmentofCity   : chr  "Northwest" "South" "Northeast" "Northwest" ...
##  $ SellingPrice000s: int  290 95 170 375 350 125 310 275 340 215 ...
##  $ HouseSize00Sqft : int  21 11 19 38 24 10 31 25 27 22 ...
##  $ NumberofBathrms : int  2 1 2 4 3 2 4 2 3 3 ...
##  $ NumberofBedrms  : int  4 2 3 5 4 2 4 3 5 4 ...
##  $ GarageSize      : int  2 0 2 3 2 0 2 2 3 2 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 6
##   .. ..$ SegmentofCity   : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ SellingPrice000s: list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ HouseSize00Sqft : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ NumberofBathrms : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ NumberofBedrms  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ GarageSize      : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"
cor(HousingData[2:6])
##                  SellingPrice000s HouseSize00Sqft NumberofBathrms
## SellingPrice000s        1.0000000       0.8772285       0.8320072
## HouseSize00Sqft         0.8772285       1.0000000       0.8527317
## NumberofBathrms         0.8320072       0.8527317       1.0000000
## NumberofBedrms          0.8182372       0.8554267       0.7736148
## GarageSize              0.7686695       0.8555314       0.7368829
##                  NumberofBedrms GarageSize
## SellingPrice000s      0.8182372  0.7686695
## HouseSize00Sqft       0.8554267  0.8555314
## NumberofBathrms       0.7736148  0.7368829
## NumberofBedrms        1.0000000  0.8878382
## GarageSize            0.8878382  1.0000000
library(corrgram)
corrgram(HousingData[2:6])

RegModel.1 <- 
  lm(SellingPrice000s~GarageSize+HouseSize00Sqft+NumberofBathrms+NumberofBedrms,
     data=HousingData)
summary(RegModel.1)
## 
## Call:
## lm(formula = SellingPrice000s ~ GarageSize + HouseSize00Sqft + 
##     NumberofBathrms + NumberofBedrms, data = HousingData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -80.724 -30.289  -8.493  31.024  79.276 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -59.416     54.607  -1.088   0.2895  
## GarageSize       -10.803     27.329  -0.395   0.6968  
## HouseSize00Sqft    6.506      3.247   2.004   0.0588 .
## NumberofBathrms   26.400     18.808   1.404   0.1757  
## NumberofBedrms    29.101     26.215   1.110   0.2801  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45.87 on 20 degrees of freedom
## Multiple R-squared:  0.8081, Adjusted R-squared:  0.7698 
## F-statistic: 21.06 on 4 and 20 DF,  p-value: 6.138e-07
library(car)
vif(RegModel.1)
##      GarageSize HouseSize00Sqft NumberofBathrms  NumberofBedrms 
##        5.679113        6.577989        3.792467        5.826416
RegModel.1 <- 
  lm(SellingPrice000s~HouseSize00Sqft+NumberofBathrms+NumberofBedrms,
     data=HousingData)
summary(RegModel.1)
## 
## Call:
## lm(formula = SellingPrice000s ~ HouseSize00Sqft + NumberofBathrms + 
##     NumberofBedrms, data = HousingData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -80.371 -29.541  -7.677  30.384  79.629 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -47.342     44.347  -1.068   0.2979  
## HouseSize00Sqft    6.020      2.944   2.045   0.0536 .
## NumberofBathrms   27.029     18.360   1.472   0.1558  
## NumberofBedrms    23.035     20.823   1.106   0.2811  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44.94 on 21 degrees of freedom
## Multiple R-squared:  0.8066, Adjusted R-squared:  0.779 
## F-statistic:  29.2 on 3 and 21 DF,  p-value: 1.104e-07
par(mfrow=c(2,2))
plot(RegModel.1)

RegModel.1 <- 
  lm(SellingPrice000s~HouseSize00Sqft+NumberofBathrms,
     data=HousingData)
summary(RegModel.1)
## 
## Call:
## lm(formula = SellingPrice000s ~ HouseSize00Sqft + NumberofBathrms, 
##     data = HousingData)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -79.40 -29.32 -10.10  33.00  80.60 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -12.349     31.239  -0.395  0.69642   
## HouseSize00Sqft    7.947      2.386   3.330  0.00304 **
## NumberofBathrms   30.344     18.206   1.667  0.10974   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45.17 on 22 degrees of freedom
## Multiple R-squared:  0.7954, Adjusted R-squared:  0.7768 
## F-statistic: 42.76 on 2 and 22 DF,  p-value: 2.634e-08
par(mfrow=c(2,2))
plot(RegModel.1)

RegModel.2 <- lm(SellingPrice000s~HouseSize00Sqft, data=HousingData)
summary(RegModel.2)
## 
## Call:
## lm(formula = SellingPrice000s ~ HouseSize00Sqft, data = HousingData)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -80.92 -24.93 -13.31  36.13  87.75 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -9.867     32.387  -0.305    0.763    
## HouseSize00Sqft   11.338      1.294   8.763 8.67e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 46.88 on 23 degrees of freedom
## Multiple R-squared:  0.7695, Adjusted R-squared:  0.7595 
## F-statistic:  76.8 on 1 and 23 DF,  p-value: 8.675e-09
par(mfrow=c(2,2))
plot(RegModel.2)

attach(HousingData)
par(mfrow=c(1,1))

plot(HouseSize00Sqft,SellingPrice000s)
abline(RegModel.2)

anova(RegModel.1,RegModel.2)
## Analysis of Variance Table
## 
## Model 1: SellingPrice000s ~ HouseSize00Sqft + NumberofBathrms
## Model 2: SellingPrice000s ~ HouseSize00Sqft
##   Res.Df   RSS Df Sum of Sq      F Pr(>F)
## 1     22 44884                           
## 2     23 50552 -1   -5667.8 2.7781 0.1097
anova(RegModel.2,RegModel.1)
## Analysis of Variance Table
## 
## Model 1: SellingPrice000s ~ HouseSize00Sqft
## Model 2: SellingPrice000s ~ HouseSize00Sqft + NumberofBathrms
##   Res.Df   RSS Df Sum of Sq      F Pr(>F)
## 1     23 50552                           
## 2     22 44884  1    5667.8 2.7781 0.1097
predict(RegModel.2)
##        1        2        3        4        5        6        7        8 
## 228.2385 114.8550 205.5618 420.9904 262.2535 103.5166 341.6220 273.5919 
##        9       10       11       12       13       14       15       16 
## 296.2686 239.5768 216.9001 262.2535 398.3137 352.9603 148.8700 194.2234 
##       17       18       19       20       21       22       23       24 
## 205.5618 318.9453 364.2987 250.9152 375.6370 273.5919 182.8851 228.2385 
##       25 
## 284.9302
predict(RegModel.2,data.frame(HouseSize00Sqft=29),level = 0.95)
##        1 
## 318.9453
predict(RegModel.2,data.frame(HouseSize00Sqft=29),interval='confidence',level = 0.95)
##        fit      lwr      upr
## 1 318.9453 295.3191 342.5715
predict(RegModel.2,data.frame(HouseSize00Sqft=29),interval='confidence',level = 0.99)
##        fit      lwr      upr
## 1 318.9453 286.8826 351.0079