# Boston Housing Data

library(pillar)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:pillar':
## 
##     dim_desc
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(corrplot)
## corrplot 0.92 loaded
Boston <- read.csv("C:\\Users\\pdbro\\OneDrive\\Documents\\AI4OPT\\Boston_Housing.csv")
rawData <- Boston

{r Data Cleaning}

Cleaning Data

head(Boston)

str(Boston)

#Check for duplicated values sum(duplicated(Boston))

#Check for missing values

sum(is.na(Boston))



{r summary}

#summary statistics
summary(Boston)

#checking correlation between variables
library(corrplot)
corrplot(cor(Boston), method = "number", type = "upper", diag = FALSE)
cutpoints<-quantile(boston$MEDV,seq(0,1,length=4),na.rm=TRUE)
Boston$MEDVQuantiles <- cut(Boston$MEDV,breaks=cutpoints,include.lowest=TRUE,labels =c("Low priced","Mid Priced","High priced"))
table(Boston$MEDVQuantiles)

numberOfNA <- length(which(is.na(Boston)==T))
if(numberOfNA>0) {
  Boston <- Housing[complete.cases(Housing),]
}



quantile(Boston)


library(Hmisc)
describe(Boston)

library(ggplot2)
data(Boston, package"DMwR2")
freqOcc <- table(Boston)


attach(Boston)
plot1<-qplot(MEDV, RM)
plot2<-qplot(MEDV, LSTAT)
plot3<-qplot(MEDV, PTRATIO)

library(gridExtra)
grid.arrange(plot1,plot2,plot3, nrow = 1)