## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: car
## Loading required package: carData
## Loading required package: sandwich
## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.
## Loading required package: effects
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## The Commander GUI is launched only in interactive sessions
##
## Attaching package: 'Rcmdr'
## The following object is masked from 'package:base':
##
## errorCondition
> setwd("C:")
> houses <- read.csv("C:/Users/filip/OneDrive/Desktop/MSCI 3230/HousePrices.csv")
> houses <-
+ read.table("C:/Users/filip/OneDrive/Desktop/MSCI 3230/HousePrices.csv",
+ header=TRUE, stringsAsFactors=TRUE, sep=",", na.strings="NA", dec=".",
+ strip.white=TRUE)
> library(abind, pos=17)
> with(houses, Hist(Price, scale="frequency", breaks="Sturges",
+ col="darkgray"))
> Boxplot( ~ Price, data=houses, id=list(method="y"))
[1] "104"
> normalityTest(~Price, test="shapiro.test", data=houses)
Shapiro-Wilk normality test
data: Price
W = 0.98023, p-value = 0.05836
> with(houses, Hist(Price, groups=Neighborhood, scale="frequency",
+ breaks="Sturges", col="darkgray"))
## By using neighbourhood as a variable to group by, we are able to see the price distribution for each neighbourhood ## All are normally distributed, however West seems to be slightly right skewed
> with(houses, Hist(Price, groups=Brick, scale="frequency", breaks="Sturges",
+ col="darkgray"))
## The same can be said when using brick as a grouping variable. Houses with and without brick are normally distributed
> local({
+ .Table <- xtabs(~Neighborhood+Brick, data=houses)
+ cat("\nFrequency table:\n")
+ print(.Table)
+ .Test <- chisq.test(.Table, correct=FALSE)
+ print(.Test)
+ })
Frequency table:
Brick
Neighborhood No Yes
East 26 19
North 37 7
West 23 16
Pearson's Chi-squared test
data: .Table
X-squared = 8.7033, df = 2, p-value = 0.01289
> scatterplot(Price~Bathrooms, regLine=FALSE, smooth=FALSE, boxplots=FALSE,
+ data=houses)
## Using scatterplots to compare price with another variable can be a good visualization of how price is impacted ## Houses with 2 bathrooms are normally distributed with a mean price lower than houses with 3. There is only one instance of a house with 4 bathrooms.
> scatterplot(Price~Bedrooms, regLine=FALSE, smooth=FALSE, boxplots=FALSE,
+ data=houses)
## The same can be said for bathrooms, as number of bathrooms increases so does price
> scatterplot(Price~SqFt, regLine=FALSE, smooth=FALSE, boxplots=FALSE,
+ data=houses)
## This is also a trend for square footage, as it has a line of best fit with a positive slope.
> scatterplot(Price~Offers, regLine=FALSE, smooth=FALSE, boxplots=FALSE,
+ data=houses)
## By comparing offers and price, we can see that the more offers, the lower the price
> cor(houses[,c("Bathrooms","Bedrooms","Price","SqFt")], use="complete")
Bathrooms Bedrooms Price SqFt
Bathrooms 1.0000000 0.4145560 0.5232578 0.5227453
Bedrooms 0.4145560 1.0000000 0.5259261 0.4838071
Price 0.5232578 0.5259261 1.0000000 0.5529822
SqFt 0.5227453 0.4838071 0.5529822 1.0000000