# Boston Housing Data
library(pillar)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:pillar':
##
## dim_desc
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(corrplot)
## corrplot 0.92 loaded
Boston <- read.csv("C:\\Users\\pdbro\\OneDrive\\Documents\\AI4OPT\\Boston_Housing.csv")
rawData <- Boston
{r Data Cleaning}
head(Boston)
str(Boston)
#Check for duplicated values sum(duplicated(Boston))
#Check for missing values
sum(is.na(Boston))
{r summary}
#summary statistics
summary(Boston)
#checking correlation between variables
library(corrplot)
corrplot(cor(Boston), method = "number", type = "upper", diag = FALSE)
cutpoints<-quantile(boston$MEDV,seq(0,1,length=4),na.rm=TRUE)
Boston$MEDVQuantiles <- cut(Boston$MEDV,breaks=cutpoints,include.lowest=TRUE,labels =c("Low priced","Mid Priced","High priced"))
table(Boston$MEDVQuantiles)
numberOfNA <- length(which(is.na(Boston)==T))
if(numberOfNA>0) {
Boston <- Housing[complete.cases(Housing),]
}
quantile(Boston)
library(Hmisc)
describe(Boston)
library(ggplot2)
data(Boston, package"DMwR2")
freqOcc <- table(Boston)
attach(Boston)
plot1<-qplot(MEDV, RM)
plot2<-qplot(MEDV, LSTAT)
plot3<-qplot(MEDV, PTRATIO)
library(gridExtra)
grid.arrange(plot1,plot2,plot3, nrow = 1)