Problem 1

#setwd("~/Downloads/R Studio Downloads/BostonHousing.csv")
# Q1. Import the data
boston<- read.csv("BostonHousing.csv")

#Problem 2 ##

# Q2. Print the first 10 rows
head(boston, 10)
##       crim   zn indus chas   nox    rm   age    dis rad tax ptratio lstat medv
## 1  0.00632 18.0  2.31    0 0.538 6.575  65.2 4.0900   1 296    15.3  4.98 24.0
## 2  0.02731  0.0  7.07    0 0.469 6.421  78.9 4.9671   2 242    17.8  9.14 21.6
## 3  0.02729  0.0  7.07    0 0.469 7.185  61.1 4.9671   2 242    17.8  4.03 34.7
## 4  0.03237  0.0  2.18    0 0.458 6.998  45.8 6.0622   3 222    18.7  2.94 33.4
## 5  0.06905  0.0  2.18    0 0.458 7.147  54.2 6.0622   3 222    18.7  5.33 36.2
## 6  0.02985  0.0  2.18    0 0.458 6.430  58.7 6.0622   3 222    18.7  5.21 28.7
## 7  0.08829 12.5  7.87    0 0.524 6.012  66.6 5.5605   5 311    15.2 12.43 22.9
## 8  0.14455 12.5  7.87    0 0.524 6.172  96.1 5.9505   5 311    15.2 19.15 27.1
## 9  0.21124 12.5  7.87    0 0.524 5.631 100.0 6.0821   5 311    15.2 29.93 16.5
## 10 0.17004 12.5  7.87    0 0.524 6.004  85.9 6.5921   5 311    15.2 17.10 18.9

#Problem 3 ##

# Q3. Summary of all variables
summary(boston)
##       crim                zn             indus            chas        
##  Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   Min.   :0.00000  
##  1st Qu.: 0.08205   1st Qu.:  0.00   1st Qu.: 5.19   1st Qu.:0.00000  
##  Median : 0.25651   Median :  0.00   Median : 9.69   Median :0.00000  
##  Mean   : 3.61352   Mean   : 11.36   Mean   :11.14   Mean   :0.06917  
##  3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10   3rd Qu.:0.00000  
##  Max.   :88.97620   Max.   :100.00   Max.   :27.74   Max.   :1.00000  
##       nox               rm             age              dis        
##  Min.   :0.3850   Min.   :3.561   Min.   :  2.90   Min.   : 1.130  
##  1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100  
##  Median :0.5380   Median :6.208   Median : 77.50   Median : 3.207  
##  Mean   :0.5547   Mean   :6.285   Mean   : 68.57   Mean   : 3.795  
##  3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188  
##  Max.   :0.8710   Max.   :8.780   Max.   :100.00   Max.   :12.127  
##       rad              tax           ptratio          lstat      
##  Min.   : 1.000   Min.   :187.0   Min.   :12.60   Min.   : 1.73  
##  1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40   1st Qu.: 6.95  
##  Median : 5.000   Median :330.0   Median :19.05   Median :11.36  
##  Mean   : 9.549   Mean   :408.2   Mean   :18.46   Mean   :12.65  
##  3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20   3rd Qu.:16.95  
##  Max.   :24.000   Max.   :711.0   Max.   :22.00   Max.   :37.97  
##       medv      
##  Min.   : 5.00  
##  1st Qu.:17.02  
##  Median :21.20  
##  Mean   :22.53  
##  3rd Qu.:25.00  
##  Max.   :50.00

#Problem 4 ##

# Q4. Calculate the mean value of medv
mean(boston$medv)
## [1] 22.53281
mean_medv <-mean(boston$medv)

#Problem 5 ##

# Q5. Histogram of medv with vertical line at mean
hist(boston$medv,
     main = "Histogram of Median Home Values (medv)",
     xlab = "Median Value ($1000s)",
     col = "lightblue",
     border = "black")
abline(v = mean_medv, col = "red", lwd = 2, lty = 2)

# Adding a legend
legend("topright", legend = paste("Mean =", round(mean_medv, 2)),
       col = "red", lwd = 2, lty = 2)

# Q6. Create cat.medv variable
boston$cat.medv <- ifelse(boston$medv > 30, 1, 0)

# Q7. Mean of cat.medv
mean_cat_medv <- mean(boston$cat.medv)
mean_cat_medv
## [1] 0.1660079
## The cat.medv variable is representative of when the mean value of the home is above 30000, so by taking the mean, we can find the proportion of rows in the dataset where the median home value exceeds that $30000.

#Task 3: Bar Chart ##

#Task 3: Bar Chart

# Q8. Mean of cat.medv for tracts next to Charles River
mean_chas1 <- mean(boston$cat.medv[boston$chas == 1],na.rm = TRUE)

# Q9. Mean of cat.medv for tracts not next to Charles River
mean_chas0 <- mean(boston$cat.medv[boston$chas == 0], na.rm = TRUE)

# Q10. Create vector of the two means
means_vector <- c(mean_chas1, mean_chas0)

# Q11. Bar chart
bp <- barplot(means_vector,
        names.arg= c("Bounds Charles River (chas=1)", "Doesn't Bound Charles River (chas=0"),
        col=c ("blue","pink"),
        ylim=c(0,1),
        main = ("Proportion of tracts with medv > $30k by Proximity to River"),
        ylab = "Mean of cat.medv")
text (x=bp, y=means_vector+0.05, labels=round(means_vector,2))

#Need to discuss finding from the bar chart.

#Task 4, Box Plot ##

# Q13. Boxplot of medv by Charles River proximity
boxplot(medv ~ chas,data = boston,
        col=c("orange","purple"),
        names = c("Bounds Charles River (chas=1)", "Doesn't Bound Charles River (chas=0"),
        main = "Boxplot of Median Value (medv) by Charles River Proximity",
        ylab = "Median Value of Homes ($1000s)")

#Task 5, Scatter Plot ##

# Task 5: Scatter Plot

# Q15. Scatter plot of rm vs medval
plot(boston$rm, boston$medv,
     main = "MEDVL vs LSTAT",
     xlab = "LSTAT",
     ylab = "Median Value of Homes (medv)",
     pch = 19, col = "blue")

# Q16. Add regression line
model <- lm(medv ~ rm, data = boston)
summary(model)
## 
## Call:
## lm(formula = medv ~ rm, data = boston)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.346  -2.547   0.090   2.986  39.433 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -34.671      2.650  -13.08   <2e-16 ***
## rm             9.102      0.419   21.72   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.616 on 504 degrees of freedom
## Multiple R-squared:  0.4835, Adjusted R-squared:  0.4825 
## F-statistic: 471.8 on 1 and 504 DF,  p-value: < 2.2e-16
abline(model, col = "red", lwd = 2)

LS0tCnRpdGxlOiAiQnVzaW5lc3MgQW5hbHl0aWNzIChNR1QgNDA1MCkiCnN1YnRpdGxlOiAiSG9tZXdvcmsgMSIKYXV0aG9yOiAiQ29saW4gU2FnZSBBcm5vbGQiCmRhdGU6ICIwOS8wNy8yMDI1IgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIGNvZGVfZG93bmxvYWQ6IHllcwogICAgdG9jOiB5ZXMKICAgIHRvY19mbG9hdDoKICAgICAgY29sbGFwc2VkOiBubwogICAgICBzbW9vdGhfc2Nyb2xsOiB5ZXMKICBwZGZfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwogIHdvcmRfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUpCmBgYAogCmBgYHtyIGVjaG89RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIHBhY2thZ2VzfQoKCiNsaWJyYXJ5KGthYmxlRXh0cmEpIyBtYWtlIGEgZGVjZW50IHRhYmxlCiNsaWJyYXJ5KHh0YWJsZSkgIyBtYWtlIGEgZGVjZW50IHRhYmxlCmBgYAogPCEtLShvcHRpb25hbCkgV2UgdXNlIHRoZSBDU1MgYmVsb3cgdG8gY29udHJvbCBmb250IHNpemUgYW5kIGNvbG9yIGluIGFuIFItbWFya2Rvd24gZmlsZS4gSXQgYmFzaWNhbGx5IG92ZXJyaWRlcyB0aGUgQ1NTIHN0eWxlIC0tPgo8c3R5bGUgdHlwZT0idGV4dC9jc3MiPgoKYm9keXsgLyogTm9ybWFsICAqLwogICAgICBmb250LXNpemU6IDE2cHg7CiAgICAgICAgZm9udC1mYW1pbHk6ICJUaW1lcyBOZXcgUm9tYW4iLCBUaW1lcywgc2VyaWY7CiAgfQp0ZCB7ICAvKiBUYWJsZSAgKi8KICBmb250LXNpemU6IDEycHg7Cn0KaDEudGl0bGUgewogIGZvbnQtc2l6ZTogMzhweDsKICBjb2xvcjogRGFya1JlZDsKfQpoMSB7IC8qIEhlYWRlciAxICovCiAgZm9udC1zaXplOiAyOHB4OwogIGNvbG9yOiBEYXJrQmx1ZTsKfQpoMiB7IC8qIEhlYWRlciAyICovCiAgICBmb250LXNpemU6IDIycHg7CiAgY29sb3I6IERhcmtCbHVlOwp9CmgzIHsgLyogSGVhZGVyIDMgKi8KICBmb250LXNpemU6IDE4cHg7CgogIGNvbG9yOiBEYXJrQmx1ZTsKfQpjb2RlLnJ7IC8qIENvZGUgYmxvY2sgKi8KICAgIGZvbnQtc2l6ZTogMTJweDsKICAgICBmb250LWZhbWlseTogIlRpbWVzIE5ldyBSb21hbiIsIFRpbWVzLCBzZXJpZjsKfQpwcmUgeyAvKiBDb2RlIGJsb2NrIC0gZGV0ZXJtaW5lcyBjb2RlIHNwYWNpbmcgYmV0d2VlbiBsaW5lcyAqLwogICAgZm9udC1zaXplOiAxNHB4OwogICAgIGZvbnQtZmFtaWx5OiAiVGltZXMgTmV3IFJvbWFuIiwgVGltZXMsIHNlcmlmOwp9Cjwvc3R5bGU+CiA8IS0tICBDU1Mgc3R5bGUgIC0tPgoKCiMgUHJvYmxlbSAxCiMjIApgYGB7cn0KI3NldHdkKCJ+L0Rvd25sb2Fkcy9SIFN0dWRpbyBEb3dubG9hZHMvQm9zdG9uSG91c2luZy5jc3YiKQojIFExLiBJbXBvcnQgdGhlIGRhdGEKYm9zdG9uPC0gcmVhZC5jc3YoIkJvc3RvbkhvdXNpbmcuY3N2IikKYGBgCiNQcm9ibGVtIDIKIyMKCmBgYHtyfQojIFEyLiBQcmludCB0aGUgZmlyc3QgMTAgcm93cwpoZWFkKGJvc3RvbiwgMTApCmBgYAoKI1Byb2JsZW0gMwojIwoKYGBge3J9CiMgUTMuIFN1bW1hcnkgb2YgYWxsIHZhcmlhYmxlcwpzdW1tYXJ5KGJvc3RvbikKYGBgCiNQcm9ibGVtIDQKIyMKCmBgYHtyfQojIFE0LiBDYWxjdWxhdGUgdGhlIG1lYW4gdmFsdWUgb2YgbWVkdgptZWFuKGJvc3RvbiRtZWR2KQptZWFuX21lZHYgPC1tZWFuKGJvc3RvbiRtZWR2KQpgYGAKI1Byb2JsZW0gNQojIwoKYGBge3J9CiMgUTUuIEhpc3RvZ3JhbSBvZiBtZWR2IHdpdGggdmVydGljYWwgbGluZSBhdCBtZWFuCmhpc3QoYm9zdG9uJG1lZHYsCiAgICAgbWFpbiA9ICJIaXN0b2dyYW0gb2YgTWVkaWFuIEhvbWUgVmFsdWVzIChtZWR2KSIsCiAgICAgeGxhYiA9ICJNZWRpYW4gVmFsdWUgKCQxMDAwcykiLAogICAgIGNvbCA9ICJsaWdodGJsdWUiLAogICAgIGJvcmRlciA9ICJibGFjayIpCmFibGluZSh2ID0gbWVhbl9tZWR2LCBjb2wgPSAicmVkIiwgbHdkID0gMiwgbHR5ID0gMikKCiMgQWRkaW5nIGEgbGVnZW5kCmxlZ2VuZCgidG9wcmlnaHQiLCBsZWdlbmQgPSBwYXN0ZSgiTWVhbiA9Iiwgcm91bmQobWVhbl9tZWR2LCAyKSksCiAgICAgICBjb2wgPSAicmVkIiwgbHdkID0gMiwgbHR5ID0gMikKIyBRNi4gQ3JlYXRlIGNhdC5tZWR2IHZhcmlhYmxlCmJvc3RvbiRjYXQubWVkdiA8LSBpZmVsc2UoYm9zdG9uJG1lZHYgPiAzMCwgMSwgMCkKCiMgUTcuIE1lYW4gb2YgY2F0Lm1lZHYKbWVhbl9jYXRfbWVkdiA8LSBtZWFuKGJvc3RvbiRjYXQubWVkdikKbWVhbl9jYXRfbWVkdgoKIyMgVGhlIGNhdC5tZWR2IHZhcmlhYmxlIGlzIHJlcHJlc2VudGF0aXZlIG9mIHdoZW4gdGhlIG1lYW4gdmFsdWUgb2YgdGhlIGhvbWUgaXMgYWJvdmUgMzAwMDAsIHNvIGJ5IHRha2luZyB0aGUgbWVhbiwgd2UgY2FuIGZpbmQgdGhlIHByb3BvcnRpb24gb2Ygcm93cyBpbiB0aGUgZGF0YXNldCB3aGVyZSB0aGUgbWVkaWFuIGhvbWUgdmFsdWUgZXhjZWVkcyB0aGF0ICQzMDAwMC4KYGBgCgojVGFzayAzOiBCYXIgQ2hhcnQKIyMKCmBgYHtyfQojVGFzayAzOiBCYXIgQ2hhcnQKCiMgUTguIE1lYW4gb2YgY2F0Lm1lZHYgZm9yIHRyYWN0cyBuZXh0IHRvIENoYXJsZXMgUml2ZXIKbWVhbl9jaGFzMSA8LSBtZWFuKGJvc3RvbiRjYXQubWVkdltib3N0b24kY2hhcyA9PSAxXSxuYS5ybSA9IFRSVUUpCgojIFE5LiBNZWFuIG9mIGNhdC5tZWR2IGZvciB0cmFjdHMgbm90IG5leHQgdG8gQ2hhcmxlcyBSaXZlcgptZWFuX2NoYXMwIDwtIG1lYW4oYm9zdG9uJGNhdC5tZWR2W2Jvc3RvbiRjaGFzID09IDBdLCBuYS5ybSA9IFRSVUUpCgojIFExMC4gQ3JlYXRlIHZlY3RvciBvZiB0aGUgdHdvIG1lYW5zCm1lYW5zX3ZlY3RvciA8LSBjKG1lYW5fY2hhczEsIG1lYW5fY2hhczApCgojIFExMS4gQmFyIGNoYXJ0CmJwIDwtIGJhcnBsb3QobWVhbnNfdmVjdG9yLAogICAgICAgIG5hbWVzLmFyZz0gYygiQm91bmRzIENoYXJsZXMgUml2ZXIgKGNoYXM9MSkiLCAiRG9lc24ndCBCb3VuZCBDaGFybGVzIFJpdmVyIChjaGFzPTAiKSwKICAgICAgICBjb2w9YyAoImJsdWUiLCJwaW5rIiksCiAgICAgICAgeWxpbT1jKDAsMSksCiAgICAgICAgbWFpbiA9ICgiUHJvcG9ydGlvbiBvZiB0cmFjdHMgd2l0aCBtZWR2ID4gJDMwayBieSBQcm94aW1pdHkgdG8gUml2ZXIiKSwKICAgICAgICB5bGFiID0gIk1lYW4gb2YgY2F0Lm1lZHYiKQp0ZXh0ICh4PWJwLCB5PW1lYW5zX3ZlY3RvciswLjA1LCBsYWJlbHM9cm91bmQobWVhbnNfdmVjdG9yLDIpKQojTmVlZCB0byBkaXNjdXNzIGZpbmRpbmcgZnJvbSB0aGUgYmFyIGNoYXJ0LgpgYGAKI1Rhc2sgNCwgQm94IFBsb3QKIyMKCmBgYHtyfQojIFExMy4gQm94cGxvdCBvZiBtZWR2IGJ5IENoYXJsZXMgUml2ZXIgcHJveGltaXR5CmJveHBsb3QobWVkdiB+IGNoYXMsZGF0YSA9IGJvc3RvbiwKICAgICAgICBjb2w9Yygib3JhbmdlIiwicHVycGxlIiksCiAgICAgICAgbmFtZXMgPSBjKCJCb3VuZHMgQ2hhcmxlcyBSaXZlciAoY2hhcz0xKSIsICJEb2Vzbid0IEJvdW5kIENoYXJsZXMgUml2ZXIgKGNoYXM9MCIpLAogICAgICAgIG1haW4gPSAiQm94cGxvdCBvZiBNZWRpYW4gVmFsdWUgKG1lZHYpIGJ5IENoYXJsZXMgUml2ZXIgUHJveGltaXR5IiwKICAgICAgICB5bGFiID0gIk1lZGlhbiBWYWx1ZSBvZiBIb21lcyAoJDEwMDBzKSIpCmBgYAoKI1Rhc2sgNSwgU2NhdHRlciBQbG90CiMjCgpgYGB7cn0KIyBUYXNrIDU6IFNjYXR0ZXIgUGxvdAoKIyBRMTUuIFNjYXR0ZXIgcGxvdCBvZiBybSB2cyBtZWR2YWwKcGxvdChib3N0b24kcm0sIGJvc3RvbiRtZWR2LAogICAgIG1haW4gPSAiTUVEVkwgdnMgTFNUQVQiLAogICAgIHhsYWIgPSAiTFNUQVQiLAogICAgIHlsYWIgPSAiTWVkaWFuIFZhbHVlIG9mIEhvbWVzIChtZWR2KSIsCiAgICAgcGNoID0gMTksIGNvbCA9ICJibHVlIikKCiMgUTE2LiBBZGQgcmVncmVzc2lvbiBsaW5lCm1vZGVsIDwtIGxtKG1lZHYgfiBybSwgZGF0YSA9IGJvc3RvbikKc3VtbWFyeShtb2RlbCkKYWJsaW5lKG1vZGVsLCBjb2wgPSAicmVkIiwgbHdkID0gMikKCmBgYAoKCgo=