knitr::opts_chunk$set(echo = TRUE)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.6.3
## corrplot 0.84 loaded
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
housing.raw <- read.csv(choose.files(), header = TRUE, sep = ",")
head(housing.raw)
## RM LSTAT PTRATIO MEDV
## 1 6.575 4.98 15.3 504000
## 2 6.421 9.14 17.8 453600
## 3 7.185 4.03 17.8 728700
## 4 6.998 2.94 18.7 701400
## 5 7.147 5.33 18.7 760200
## 6 6.430 5.21 18.7 602700
tail(housing.raw)
## RM LSTAT PTRATIO MEDV
## 484 6.027 14.33 19.2 352800
## 485 6.593 9.67 21.0 470400
## 486 6.120 9.08 21.0 432600
## 487 6.976 5.64 21.0 501900
## 488 6.794 6.48 21.0 462000
## 489 6.030 7.88 21.0 249900
dim(housing.raw)
## [1] 489 4
nrow(housing.raw)
## [1] 489
ncol(housing.raw)
## [1] 4
str(housing.raw)
## 'data.frame': 489 obs. of 4 variables:
## $ RM : num 6.58 6.42 7.18 7 7.15 ...
## $ LSTAT : num 4.98 9.14 4.03 2.94 5.33 ...
## $ PTRATIO: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
## $ MEDV : num 504000 453600 728700 701400 760200 ...
names(housing.raw)
## [1] "RM" "LSTAT" "PTRATIO" "MEDV"
sapply(housing.raw, typeof)
## RM LSTAT PTRATIO MEDV
## "double" "double" "double" "double"
summary(housing.raw)
## RM LSTAT PTRATIO MEDV
## Min. :3.561 Min. : 1.98 Min. :12.60 Min. : 105000
## 1st Qu.:5.880 1st Qu.: 7.37 1st Qu.:17.40 1st Qu.: 350700
## Median :6.185 Median :11.69 Median :19.10 Median : 438900
## Mean :6.240 Mean :12.94 Mean :18.52 Mean : 454343
## 3rd Qu.:6.575 3rd Qu.:17.12 3rd Qu.:20.20 3rd Qu.: 518700
## Max. :8.398 Max. :37.97 Max. :22.00 Max. :1024800
cor(housing.raw)
## RM LSTAT PTRATIO MEDV
## RM 1.0000000 -0.6120332 -0.3045593 0.6972092
## LSTAT -0.6120332 1.0000000 0.3604446 -0.7606701
## PTRATIO -0.3045593 0.3604446 1.0000000 -0.5190335
## MEDV 0.6972092 -0.7606701 -0.5190335 1.0000000
corrplot(cor(housing.raw))
# Data Visulization
pairs(~ RM + LSTAT + PTRATIO + MEDV, data = housing.raw, main = "Boston Data")
# Scatter Plot
plot(RM~LSTAT, housing.raw)
# Boxplot of all variables
par(mfrow = c(1, 4))
boxplot(housing.raw$RM, main='RM',col='Sky Blue')
boxplot(housing.raw$LSTAT, main='LSTAT',col='Sky Blue')
boxplot(housing.raw$PTRATIO, main='PTRATIO',col='Sky Blue')
boxplot(housing.raw$MEDV, main='MEDV',col='Sky Blue')
# Barchart of MDEV
ggplot(housing.raw) +
geom_bar(aes(x = MEDV), fill = 'blue')
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.