df=read.csv("C:/Users/Soyeon/Downloads/boston_csv.csv")
df[which(df[,"TRACT"] %in%  c(2042,2084,3585,3823,905,911,923,1805)),"MEDV"]=c(22.1,24.2,33,27,8.2,14.8,14.4,19)
df=subset(df,select=-CMEDV)
df1 = df[,-c(1:6)]
nullmodel = lm(MEDV ~ 1, data=df1)

town

library(ggplot2)
ggplot(data=df,aes(x=as.factor(TOWN),y=MEDV))+
  geom_bar(stat='summary',fun.y='median',fill='slateblue',color='darkslategrey')+
  theme(axis.text.x=element_text(angle=45,hjust=1))+
  geom_hline(yintercept=median(df$MEDV),linetype='dashed',color='blue')+
  geom_hline(yintercept=median(df$MEDV)*0.5,linetype='dashed',color='black')+
  geom_hline(yintercept=median(df$MEDV)*1.5,linetype='dashed',color='red')

standard line

v1 = median(df$MEDV)*1.5
v2 = median(df$MEDV)
v3 = median(df$MEDV)*0.5

data split

data1=df[df$MEDV >= v1,]
data2=df[df$MEDV >= v2,]
data3=df[df$MEDV >= v3,]
data4=df[df$MEDV < v3,]

data1=data1[-c(1:6)]
data2=data2[-c(1:6)]
data3=data3[-c(1:6)]
data4=data4[-c(1:6)]

ols

summary(lm(MEDV~., data1))
## 
## Call:
## lm(formula = MEDV ~ ., data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3784 -2.2160 -0.3298  1.9769 10.3782 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 76.133917  32.088454   2.373  0.02119 *  
## CRIM         2.845555   0.833171   3.415  0.00120 ** 
## ZN           0.023641   0.024143   0.979  0.33177    
## INDUS        0.075276   0.203811   0.369  0.71329    
## CHAS        -1.350325   1.444603  -0.935  0.35401    
## NOX         -1.270119  14.814804  -0.086  0.93199    
## RM           5.458100   0.879607   6.205 7.49e-08 ***
## AGE         -0.023467   0.034458  -0.681  0.49872    
## DIS         -0.789209   0.448804  -1.758  0.08423 .  
## RAD          0.312064   0.261931   1.191  0.23861    
## TAX         -0.023539   0.009794  -2.404  0.01964 *  
## PTRATIO     -0.507316   0.357298  -1.420  0.16129    
## B           -0.140430   0.074393  -1.888  0.06435 .  
## LSTAT       -1.095182   0.324146  -3.379  0.00134 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.7 on 55 degrees of freedom
## Multiple R-squared:  0.7653, Adjusted R-squared:  0.7098 
## F-statistic: 13.79 on 13 and 55 DF,  p-value: 6.28e-13
summary(lm(MEDV~., data2))
## 
## Call:
## lm(formula = MEDV ~ ., data = data2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.8027  -2.8150  -0.7192   2.3930  23.5108 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 24.733985   8.773119   2.819 0.005213 ** 
## CRIM         0.753561   0.319874   2.356 0.019284 *  
## ZN           0.034051   0.017530   1.943 0.053240 .  
## INDUS       -0.021778   0.090497  -0.241 0.810036    
## CHAS         1.713507   1.090644   1.571 0.117472    
## NOX         -8.049136   7.065586  -1.139 0.255750    
## RM           5.510101   0.594500   9.268  < 2e-16 ***
## AGE         -0.014084   0.018534  -0.760 0.448042    
## DIS         -1.361005   0.277668  -4.902 1.75e-06 ***
## RAD          0.397122   0.139524   2.846 0.004805 ** 
## TAX         -0.017863   0.006378  -2.801 0.005515 ** 
## PTRATIO     -0.700572   0.203118  -3.449 0.000664 ***
## B           -0.005400   0.014698  -0.367 0.713633    
## LSTAT       -0.638199   0.103402  -6.172 2.83e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.94 on 241 degrees of freedom
## Multiple R-squared:  0.658,  Adjusted R-squared:  0.6396 
## F-statistic: 35.68 on 13 and 241 DF,  p-value: < 2.2e-16
summary(lm(MEDV~., data3))
## 
## Call:
## lm(formula = MEDV ~ ., data = data3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.6285  -2.6702  -0.6423   1.5991  26.4182 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  32.804166   5.208879   6.298 7.05e-10 ***
## CRIM         -0.072140   0.070319  -1.026 0.305479    
## ZN            0.046581   0.013618   3.421 0.000680 ***
## INDUS         0.026938   0.061075   0.441 0.659370    
## CHAS          2.536719   0.851409   2.979 0.003040 ** 
## NOX         -16.761981   3.838477  -4.367 1.56e-05 ***
## RM            4.157814   0.429872   9.672  < 2e-16 ***
## AGE          -0.002288   0.013297  -0.172 0.863480    
## DIS          -1.489785   0.199238  -7.477 3.86e-13 ***
## RAD           0.313269   0.072307   4.332 1.81e-05 ***
## TAX          -0.013086   0.003883  -3.370 0.000815 ***
## PTRATIO      -0.891252   0.129545  -6.880 1.96e-11 ***
## B             0.009768   0.003194   3.058 0.002359 ** 
## LSTAT        -0.520934   0.054225  -9.607  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.681 on 461 degrees of freedom
## Multiple R-squared:  0.7181, Adjusted R-squared:  0.7101 
## F-statistic: 90.33 on 13 and 461 DF,  p-value: < 2.2e-16
summary(lm(MEDV~., data4))
## 
## Call:
## lm(formula = MEDV ~ ., data = data4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0999 -0.6267 -0.2893  0.9024  2.7954 
## 
## Coefficients: (5 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 16.784830  10.616189   1.581   0.1281  
## CRIM        -0.007291   0.017488  -0.417   0.6808  
## ZN                 NA         NA      NA       NA  
## INDUS       -0.046733   0.193220  -0.242   0.8111  
## CHAS               NA         NA      NA       NA  
## NOX          7.816987  12.789745   0.611   0.5473  
## RM          -1.110628   0.604163  -1.838   0.0796 .
## AGE         -0.021139   0.038256  -0.553   0.5861  
## DIS          0.535069   3.158936   0.169   0.8670  
## RAD                NA         NA      NA       NA  
## TAX                NA         NA      NA       NA  
## PTRATIO            NA         NA      NA       NA  
## B           -0.001742   0.003417  -0.510   0.6152  
## LSTAT       -0.192668   0.089469  -2.153   0.0425 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.564 on 22 degrees of freedom
## Multiple R-squared:  0.2974, Adjusted R-squared:  0.04186 
## F-statistic: 1.164 on 8 and 22 DF,  p-value: 0.3633

r square

c(summary(lm(MEDV~., data1))$r.squared,summary(lm(MEDV~., data2))$r.squared,summary(lm(MEDV~., data3))$r.squared,summary(lm(MEDV~., data4))$r.squared)
## [1] 0.7652835 0.6580495 0.7180884 0.2973664

adj r square

c(summary(lm(MEDV~., data1))$adj.r.squared,summary(lm(MEDV~., data2))$adj.r.squared,summary(lm(MEDV~., data3))$adj.r.squared,summary(lm(MEDV~., data4))$adj.r.squared)
## [1] 0.70980503 0.63960406 0.71013863 0.04186322

stepwise : both

d1=summary(stepAIC(lm(MEDV~., data1), direction='both', trace = F)) ; d1
## 
## Call:
## lm(formula = MEDV ~ CRIM + ZN + RM + DIS + RAD + TAX + PTRATIO + 
##     B + LSTAT, data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6654 -2.2911 -0.2838  2.0756 10.2981 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 79.166949  27.830680   2.845 0.006103 ** 
## CRIM         2.617029   0.737955   3.546 0.000773 ***
## ZN           0.028691   0.020397   1.407 0.164785    
## RM           5.291905   0.837078   6.322 3.76e-08 ***
## DIS         -0.656910   0.354964  -1.851 0.069230 .  
## RAD          0.306250   0.226875   1.350 0.182216    
## TAX         -0.021362   0.008448  -2.529 0.014145 *  
## PTRATIO     -0.458703   0.290194  -1.581 0.119299    
## B           -0.154094   0.067374  -2.287 0.025791 *  
## LSTAT       -1.135433   0.261199  -4.347 5.53e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.612 on 59 degrees of freedom
## Multiple R-squared:   0.76,  Adjusted R-squared:  0.7234 
## F-statistic: 20.76 on 9 and 59 DF,  p-value: 3.207e-15
d2=summary(stepAIC(lm(MEDV~., data2), direction='both', trace = F)) ; d2
## 
## Call:
## lm(formula = MEDV ~ CRIM + ZN + CHAS + NOX + RM + DIS + RAD + 
##     TAX + PTRATIO + LSTAT, data = data2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.2666  -2.8281  -0.6053   2.4246  22.9893 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  23.463355   7.535659   3.114 0.002069 ** 
## CRIM          0.769255   0.266863   2.883 0.004296 ** 
## ZN            0.035943   0.016867   2.131 0.034090 *  
## CHAS          1.577904   1.060425   1.488 0.138044    
## NOX         -10.330693   6.356655  -1.625 0.105416    
## RM            5.415554   0.571937   9.469  < 2e-16 ***
## DIS          -1.301917   0.266789  -4.880 1.92e-06 ***
## RAD           0.398558   0.132190   3.015 0.002841 ** 
## TAX          -0.018024   0.005876  -3.067 0.002402 ** 
## PTRATIO      -0.709435   0.195977  -3.620 0.000358 ***
## LSTAT        -0.648331   0.101863  -6.365 9.61e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.917 on 244 degrees of freedom
## Multiple R-squared:  0.657,  Adjusted R-squared:  0.643 
## F-statistic: 46.74 on 10 and 244 DF,  p-value: < 2.2e-16
d3=summary(stepAIC(lm(MEDV~., data3), direction='both', trace = F)) ; d3
## 
## Call:
## lm(formula = MEDV ~ ZN + CHAS + NOX + RM + DIS + RAD + TAX + 
##     PTRATIO + B + LSTAT, data = data3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.418  -2.627  -0.605   1.590  26.614 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  31.91288    5.09971   6.258 8.89e-10 ***
## ZN            0.04488    0.01336   3.358 0.000848 ***
## CHAS          2.61140    0.84337   3.096 0.002078 ** 
## NOX         -16.08468    3.53139  -4.555 6.71e-06 ***
## RM            4.15975    0.41489  10.026  < 2e-16 ***
## DIS          -1.46480    0.18305  -8.002 9.88e-15 ***
## RAD           0.27952    0.06481   4.313 1.97e-05 ***
## TAX          -0.01244    0.00355  -3.504 0.000502 ***
## PTRATIO      -0.88072    0.12772  -6.896 1.76e-11 ***
## B             0.01061    0.00306   3.468 0.000573 ***
## LSTAT        -0.52649    0.04989 -10.553  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.673 on 464 degrees of freedom
## Multiple R-squared:  0.7173, Adjusted R-squared:  0.7112 
## F-statistic: 117.7 on 10 and 464 DF,  p-value: < 2.2e-16
d4=summary(stepAIC(lm(MEDV~., data4), direction='both', trace = F)) ; d4
## 
## Call:
## lm(formula = MEDV ~ RM + B + LSTAT, data = data4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4805 -0.8169 -0.1452  1.0899  2.2740 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 19.251490   4.455823   4.321 0.000189 ***
## RM          -0.991539   0.534156  -1.856 0.074359 .  
## B           -0.002548   0.001777  -1.434 0.163083    
## LSTAT       -0.180817   0.064778  -2.791 0.009520 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.469 on 27 degrees of freedom
## Multiple R-squared:  0.239,  Adjusted R-squared:  0.1545 
## F-statistic: 2.827 on 3 and 27 DF,  p-value: 0.05737

r square

c(d1$r.squared,d2$r.squared,d3$r.squared,d4$r.squared)
## [1] 0.7599718 0.6570164 0.7172895 0.2390412

adj r square

c(d1$adj.r.squared,d2$adj.r.squared,d3$adj.r.squared,d4$adj.r.squared)
## [1] 0.7233573 0.6429597 0.7111966 0.1544902