Task 1- Import data and initial exploration

1.1: Import Data

setwd("~/Downloads/Rscript Rstudio projects/Rscript Homework")
setwd("~/Downloads")
df <- read.csv("BostonHousing.csv")

###1.2: Add first 10 rows

head(df,10)
##       crim   zn indus chas   nox    rm   age    dis rad tax ptratio lstat medv
## 1  0.00632 18.0  2.31    0 0.538 6.575  65.2 4.0900   1 296    15.3  4.98 24.0
## 2  0.02731  0.0  7.07    0 0.469 6.421  78.9 4.9671   2 242    17.8  9.14 21.6
## 3  0.02729  0.0  7.07    0 0.469 7.185  61.1 4.9671   2 242    17.8  4.03 34.7
## 4  0.03237  0.0  2.18    0 0.458 6.998  45.8 6.0622   3 222    18.7  2.94 33.4
## 5  0.06905  0.0  2.18    0 0.458 7.147  54.2 6.0622   3 222    18.7  5.33 36.2
## 6  0.02985  0.0  2.18    0 0.458 6.430  58.7 6.0622   3 222    18.7  5.21 28.7
## 7  0.08829 12.5  7.87    0 0.524 6.012  66.6 5.5605   5 311    15.2 12.43 22.9
## 8  0.14455 12.5  7.87    0 0.524 6.172  96.1 5.9505   5 311    15.2 19.15 27.1
## 9  0.21124 12.5  7.87    0 0.524 5.631 100.0 6.0821   5 311    15.2 29.93 16.5
## 10 0.17004 12.5  7.87    0 0.524 6.004  85.9 6.5921   5 311    15.2 17.10 18.9

###1.3: Summary of all variables

summary(df)
##       crim                zn             indus            chas        
##  Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   Min.   :0.00000  
##  1st Qu.: 0.08205   1st Qu.:  0.00   1st Qu.: 5.19   1st Qu.:0.00000  
##  Median : 0.25651   Median :  0.00   Median : 9.69   Median :0.00000  
##  Mean   : 3.61352   Mean   : 11.36   Mean   :11.14   Mean   :0.06917  
##  3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10   3rd Qu.:0.00000  
##  Max.   :88.97620   Max.   :100.00   Max.   :27.74   Max.   :1.00000  
##       nox               rm             age              dis        
##  Min.   :0.3850   Min.   :3.561   Min.   :  2.90   Min.   : 1.130  
##  1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100  
##  Median :0.5380   Median :6.208   Median : 77.50   Median : 3.207  
##  Mean   :0.5547   Mean   :6.285   Mean   : 68.57   Mean   : 3.795  
##  3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188  
##  Max.   :0.8710   Max.   :8.780   Max.   :100.00   Max.   :12.127  
##       rad              tax           ptratio          lstat      
##  Min.   : 1.000   Min.   :187.0   Min.   :12.60   Min.   : 1.73  
##  1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40   1st Qu.: 6.95  
##  Median : 5.000   Median :330.0   Median :19.05   Median :11.36  
##  Mean   : 9.549   Mean   :408.2   Mean   :18.46   Mean   :12.65  
##  3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20   3rd Qu.:16.95  
##  Max.   :24.000   Max.   :711.0   Max.   :22.00   Max.   :37.97  
##       medv      
##  Min.   : 5.00  
##  1st Qu.:17.02  
##  Median :21.20  
##  Mean   :22.53  
##  3rd Qu.:25.00  
##  Max.   :50.00

###1.4: Mean value

mean_medv <- mean(df$medv) 
mean_medv
## [1] 22.53281

###1.5: Histogram of Medv Variable, mark the mean val of medv by adding a vertical line

hist(df$medv,
  main= "Histogram of MEDV Aka Median Home Value",
  xlab= "medv($1,000s)",
  border = "blue")
abline(v= mean_medv, col= "red", lwd=2, lty=2)
legend("topright",legend=paste0("Mean=", round(mean_medv, 2)),
      lty = 2, lwd = 2, col="red", bty = "y")

Task 2- Create a variable

###2.6: cat.medv confirmation thing

df$cat.medv <- as.integer(df$medv > 30)

###2.7: calculate the mean of cat.medv + explain what it means

mean_cat_medv <- mean(df$cat.medv, na.rm=TRUE)
mean_cat_medv
## [1] 0.1660079
cat("After getting the mean of the home prices of medv that is over $30,000, which is ", mean_cat_medv,
    " This means ", round(mean_cat_medv*100,2), "% of the census tracts have median home values above $30,000.\n")
## After getting the mean of the home prices of medv that is over $30,000, which is  0.1660079  This means  16.6 % of the census tracts have median home values above $30,000.

Task 3- Bar Chart

###3.8: Calculate the mean of cat.medv for the tracts that bound the Charles River (chas==1) and save it to a variable

m_chas1 <- mean(df$cat.medv[df$chas==1], na.rm = TRUE)
m_chas1
## [1] 0.3142857

###3.9 Mean of cat.medv for tracts that do not bound the river (chas==0)

m_chas0 <- mean(df$cat.medv[df$chas== 0], no.rm = TRUE)
m_chas0
## [1] 0.1549894

###3.10 Create a vector of the two means

comb_chas <-c(m_chas1, m_chas0)
comb_chas
## [1] 0.3142857 0.1549894

###3.11 Plot a bar chart

bc <- barplot(comb_chas,
    names.arg = c("chas = 1 (Bounds River)", "chas = 0 (Does Not Bound)"),
    ylim=c(0,1),
    ylab="proportion with MEDV>30k",
    main = "Chart of Tracts with MEDV > $30k by the Charles River Boundary",
    border= "black")
text(x = bc, y = comb_chas, labels = round(comb_chas, 3), pos = 3)

###3.12 What I can tell from the bar chart

cat("From the bar chart I can tell that there are more Tracts that bound the charles river from homes that are worth more than $30,000. 31% of homes that border the Charles river are worth over $30,000 usd bound the river while only 15.5% of homes that dont border it, are worth over $30,000 usd")
## From the bar chart I can tell that there are more Tracts that bound the charles river from homes that are worth more than $30,000. 31% of homes that border the Charles river are worth over $30,000 usd bound the river while only 15.5% of homes that dont border it, are worth over $30,000 usd

Task 4: Boxplot

###4.13 side by side boxplot

boxplot(medv ~ as.factor(chas), data = df,
        names = c("chas = 0 (No)", "chas = 1 (Yes)"),
        xlab = "Tract bounds Charles River?",
        ylab = "MEDV ($1,000s)",
        main = "MEDV by Charles River")

Task 5: Scatter Plot

###5.14 Scatter plot of medv
plot(df$lstat, df$medv,
     xlab = "lstat (% lower socioeconomic status)",
     ylab = "medv ($1,000s)",
     main = "Scatterplot of MEDV vs LSTAT",
     pch = 19, 
     cex = 0.7, 
     col = "black")
     
###5.15 Linear Regression

fit <- lm(medv ~ lstat, data = df)
summary(fit)
## 
## Call:
## lm(formula = medv ~ lstat, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.168  -3.990  -1.318   2.034  24.500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.55384    0.56263   61.41   <2e-16 ***
## lstat       -0.95005    0.03873  -24.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.216 on 504 degrees of freedom
## Multiple R-squared:  0.5441, Adjusted R-squared:  0.5432 
## F-statistic: 601.6 on 1 and 504 DF,  p-value: < 2.2e-16
###5.16 Line + Legend

abline(fit, col = "blue", lwd = 2)
legend("topright", legend = "Fitted line: medv ~ lstat",
       col = "blue", lwd = 2, bty = "n")

LS0tCnRpdGxlOiAiQnVzaW5lc3MgQW5hbHl0aWNzIChNR1QgNDA1MCkiCnN1YnRpdGxlOiAiSG9tZXdvcmsgMSAoSW50cm9kdWN0aW9uIHRvIFIpIgphdXRob3I6ICJDaGVycnkgQ2h1bmciCmRhdGU6ICIwOS8wOC8yMDI1IgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIGNvZGVfZG93bmxvYWQ6IHllcwogICAgdG9jOiB5ZXMKICAgIHRvY19mbG9hdDoKICAgICAgY29sbGFwc2VkOiBubwogICAgICBzbW9vdGhfc2Nyb2xsOiB5ZXMKICBwZGZfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwogIHdvcmRfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUpCmBgYAogCmBgYHtyIGVjaG89RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIHBhY2thZ2VzfQoKCiNsaWJyYXJ5KGthYmxlRXh0cmEpIyBtYWtlIGEgZGVjZW50IHRhYmxlCiNsaWJyYXJ5KHh0YWJsZSkgIyBtYWtlIGEgZGVjZW50IHRhYmxlCmBgYAogPCEtLShvcHRpb25hbCkgV2UgdXNlIHRoZSBDU1MgYmVsb3cgdG8gY29udHJvbCBmb250IHNpemUgYW5kIGNvbG9yIGluIGFuIFItbWFya2Rvd24gZmlsZS4gSXQgYmFzaWNhbGx5IG92ZXJyaWRlcyB0aGUgQ1NTIHN0eWxlIC0tPgo8c3R5bGUgdHlwZT0idGV4dC9jc3MiPgoKYm9keXsgLyogTm9ybWFsICAqLwogICAgICBmb250LXNpemU6IDE2cHg7CiAgICAgICAgZm9udC1mYW1pbHk6ICJUaW1lcyBOZXcgUm9tYW4iLCBUaW1lcywgc2VyaWY7CiAgfQp0ZCB7ICAvKiBUYWJsZSAgKi8KICBmb250LXNpemU6IDEycHg7Cn0KaDEudGl0bGUgewogIGZvbnQtc2l6ZTogMzhweDsKICBjb2xvcjogRGFya1JlZDsKfQpoMSB7IC8qIEhlYWRlciAxICovCiAgZm9udC1zaXplOiAyOHB4OwogIGNvbG9yOiBEYXJrQmx1ZTsKfQpoMiB7IC8qIEhlYWRlciAyICovCiAgICBmb250LXNpemU6IDIycHg7CiAgY29sb3I6IERhcmtCbHVlOwp9CmgzIHsgLyogSGVhZGVyIDMgKi8KICBmb250LXNpemU6IDE4cHg7CgogIGNvbG9yOiBEYXJrQmx1ZTsKfQpjb2RlLnJ7IC8qIENvZGUgYmxvY2sgKi8KICAgIGZvbnQtc2l6ZTogMTJweDsKICAgICBmb250LWZhbWlseTogIlRpbWVzIE5ldyBSb21hbiIsIFRpbWVzLCBzZXJpZjsKfQpwcmUgeyAvKiBDb2RlIGJsb2NrIC0gZGV0ZXJtaW5lcyBjb2RlIHNwYWNpbmcgYmV0d2VlbiBsaW5lcyAqLwogICAgZm9udC1zaXplOiAxNHB4OwogICAgIGZvbnQtZmFtaWx5OiAiVGltZXMgTmV3IFJvbWFuIiwgVGltZXMsIHNlcmlmOwp9Cjwvc3R5bGU+CiA8IS0tICBDU1Mgc3R5bGUgIC0tPgoKCiMjIFRhc2sgMS0gSW1wb3J0IGRhdGEgYW5kIGluaXRpYWwgZXhwbG9yYXRpb24KCiMjIyAxLjE6IEltcG9ydCBEYXRhCmBgYHtyfQpzZXR3ZCgifi9Eb3dubG9hZHMvUnNjcmlwdCBSc3R1ZGlvIHByb2plY3RzL1JzY3JpcHQgSG9tZXdvcmsiKQpzZXR3ZCgifi9Eb3dubG9hZHMiKQpkZiA8LSByZWFkLmNzdigiQm9zdG9uSG91c2luZy5jc3YiKQpgYGAKCiMjIzEuMjogQWRkIGZpcnN0IDEwIHJvd3MKYGBge3J9CmhlYWQoZGYsMTApCmBgYAoKIyMjMS4zOiBTdW1tYXJ5IG9mIGFsbCB2YXJpYWJsZXMKYGBge3J9CnN1bW1hcnkoZGYpCmBgYAoKIyMjMS40OiBNZWFuIHZhbHVlCmBgYHtyfQptZWFuX21lZHYgPC0gbWVhbihkZiRtZWR2KSAKbWVhbl9tZWR2CmBgYAoKIyMjMS41OiBIaXN0b2dyYW0gb2YgTWVkdiBWYXJpYWJsZSwgbWFyayB0aGUgbWVhbiB2YWwgb2YgbWVkdiBieSBhZGRpbmcgYSB2ZXJ0aWNhbCBsaW5lCmBgYHtyfQpoaXN0KGRmJG1lZHYsCiAgbWFpbj0gIkhpc3RvZ3JhbSBvZiBNRURWIEFrYSBNZWRpYW4gSG9tZSBWYWx1ZSIsCiAgeGxhYj0gIm1lZHYoJDEsMDAwcykiLAogIGJvcmRlciA9ICJibHVlIikKYWJsaW5lKHY9IG1lYW5fbWVkdiwgY29sPSAicmVkIiwgbHdkPTIsIGx0eT0yKQpsZWdlbmQoInRvcHJpZ2h0IixsZWdlbmQ9cGFzdGUwKCJNZWFuPSIsIHJvdW5kKG1lYW5fbWVkdiwgMikpLAogICAgICBsdHkgPSAyLCBsd2QgPSAyLCBjb2w9InJlZCIsIGJ0eSA9ICJ5IikKYGBgCgojIyBUYXNrIDItIENyZWF0ZSBhIHZhcmlhYmxlCgojIyMyLjY6IGNhdC5tZWR2IGNvbmZpcm1hdGlvbiB0aGluZwpgYGB7cn0KZGYkY2F0Lm1lZHYgPC0gYXMuaW50ZWdlcihkZiRtZWR2ID4gMzApCmBgYAoKIyMjMi43OiBjYWxjdWxhdGUgdGhlIG1lYW4gb2YgY2F0Lm1lZHYgKyBleHBsYWluIHdoYXQgaXQgbWVhbnMKYGBge3J9Cm1lYW5fY2F0X21lZHYgPC0gbWVhbihkZiRjYXQubWVkdiwgbmEucm09VFJVRSkKbWVhbl9jYXRfbWVkdgpjYXQoIkFmdGVyIGdldHRpbmcgdGhlIG1lYW4gb2YgdGhlIGhvbWUgcHJpY2VzIG9mIG1lZHYgdGhhdCBpcyBvdmVyICQzMCwwMDAsIHdoaWNoIGlzICIsIG1lYW5fY2F0X21lZHYsCiAgICAiIFRoaXMgbWVhbnMgIiwgcm91bmQobWVhbl9jYXRfbWVkdioxMDAsMiksICIlIG9mIHRoZSBjZW5zdXMgdHJhY3RzIGhhdmUgbWVkaWFuIGhvbWUgdmFsdWVzIGFib3ZlICQzMCwwMDAuXG4iKQpgYGAKCiMjIFRhc2sgMy0gQmFyIENoYXJ0CgojIyMzLjg6IENhbGN1bGF0ZSB0aGUgbWVhbiBvZiBjYXQubWVkdiBmb3IgdGhlIHRyYWN0cyB0aGF0IGJvdW5kIHRoZSBDaGFybGVzIFJpdmVyIChjaGFzPT0xKSBhbmQgc2F2ZSBpdCB0byBhIHZhcmlhYmxlCmBgYHtyfQptX2NoYXMxIDwtIG1lYW4oZGYkY2F0Lm1lZHZbZGYkY2hhcz09MV0sIG5hLnJtID0gVFJVRSkKbV9jaGFzMQpgYGAKCiMjIzMuOSBNZWFuIG9mIGNhdC5tZWR2IGZvciB0cmFjdHMgdGhhdCBkbyBub3QgYm91bmQgdGhlIHJpdmVyIChjaGFzPT0wKQpgYGB7cn0KbV9jaGFzMCA8LSBtZWFuKGRmJGNhdC5tZWR2W2RmJGNoYXM9PSAwXSwgbm8ucm0gPSBUUlVFKQptX2NoYXMwCmBgYAoKIyMjMy4xMCBDcmVhdGUgYSB2ZWN0b3Igb2YgdGhlIHR3byBtZWFucwpgYGB7cn0KY29tYl9jaGFzIDwtYyhtX2NoYXMxLCBtX2NoYXMwKQpjb21iX2NoYXMKYGBgCgojIyMzLjExIFBsb3QgYSBiYXIgY2hhcnQKYGBge3J9CmJjIDwtIGJhcnBsb3QoY29tYl9jaGFzLAogICAgbmFtZXMuYXJnID0gYygiY2hhcyA9IDEgKEJvdW5kcyBSaXZlcikiLCAiY2hhcyA9IDAgKERvZXMgTm90IEJvdW5kKSIpLAogICAgeWxpbT1jKDAsMSksCiAgICB5bGFiPSJwcm9wb3J0aW9uIHdpdGggTUVEVj4zMGsiLAogICAgbWFpbiA9ICJDaGFydCBvZiBUcmFjdHMgd2l0aCBNRURWID4gJDMwayBieSB0aGUgQ2hhcmxlcyBSaXZlciBCb3VuZGFyeSIsCiAgICBib3JkZXI9ICJibGFjayIpCnRleHQoeCA9IGJjLCB5ID0gY29tYl9jaGFzLCBsYWJlbHMgPSByb3VuZChjb21iX2NoYXMsIDMpLCBwb3MgPSAzKQpgYGAKCiMjIzMuMTIgV2hhdCBJIGNhbiB0ZWxsIGZyb20gdGhlIGJhciBjaGFydApgYGB7cn0KY2F0KCJGcm9tIHRoZSBiYXIgY2hhcnQgSSBjYW4gdGVsbCB0aGF0IHRoZXJlIGFyZSBtb3JlIFRyYWN0cyB0aGF0IGJvdW5kIHRoZSBjaGFybGVzIHJpdmVyIGZyb20gaG9tZXMgdGhhdCBhcmUgd29ydGggbW9yZSB0aGFuICQzMCwwMDAuIDMxJSBvZiBob21lcyB0aGF0IGJvcmRlciB0aGUgQ2hhcmxlcyByaXZlciBhcmUgd29ydGggb3ZlciAkMzAsMDAwIHVzZCBib3VuZCB0aGUgcml2ZXIgd2hpbGUgb25seSAxNS41JSBvZiBob21lcyB0aGF0IGRvbnQgYm9yZGVyIGl0LCBhcmUgd29ydGggb3ZlciAkMzAsMDAwIHVzZCIpCmBgYAoKCiMjIFRhc2sgNDogQm94cGxvdAojIyM0LjEzIHNpZGUgYnkgc2lkZSBib3hwbG90CmBgYHtyfQpib3hwbG90KG1lZHYgfiBhcy5mYWN0b3IoY2hhcyksIGRhdGEgPSBkZiwKICAgICAgICBuYW1lcyA9IGMoImNoYXMgPSAwIChObykiLCAiY2hhcyA9IDEgKFllcykiKSwKICAgICAgICB4bGFiID0gIlRyYWN0IGJvdW5kcyBDaGFybGVzIFJpdmVyPyIsCiAgICAgICAgeWxhYiA9ICJNRURWICgkMSwwMDBzKSIsCiAgICAgICAgbWFpbiA9ICJNRURWIGJ5IENoYXJsZXMgUml2ZXIiKQpgYGAKCiMjIFRhc2sgNTogU2NhdHRlciBQbG90CmBgYHtyfQojIyM1LjE0IFNjYXR0ZXIgcGxvdCBvZiBtZWR2CnBsb3QoZGYkbHN0YXQsIGRmJG1lZHYsCiAgICAgeGxhYiA9ICJsc3RhdCAoJSBsb3dlciBzb2Npb2Vjb25vbWljIHN0YXR1cykiLAogICAgIHlsYWIgPSAibWVkdiAoJDEsMDAwcykiLAogICAgIG1haW4gPSAiU2NhdHRlcnBsb3Qgb2YgTUVEViB2cyBMU1RBVCIsCiAgICAgcGNoID0gMTksIAogICAgIGNleCA9IDAuNywgCiAgICAgY29sID0gImJsYWNrIikKICAgICAKIyMjNS4xNSBMaW5lYXIgUmVncmVzc2lvbgoKZml0IDwtIGxtKG1lZHYgfiBsc3RhdCwgZGF0YSA9IGRmKQpzdW1tYXJ5KGZpdCkKCgojIyM1LjE2IExpbmUgKyBMZWdlbmQKCmFibGluZShmaXQsIGNvbCA9ICJibHVlIiwgbHdkID0gMikKbGVnZW5kKCJ0b3ByaWdodCIsIGxlZ2VuZCA9ICJGaXR0ZWQgbGluZTogbWVkdiB+IGxzdGF0IiwKICAgICAgIGNvbCA9ICJibHVlIiwgbHdkID0gMiwgYnR5ID0gIm4iKQpgYGAKCg==