Give an introduction here

Options for r chunks

setwd("C:/Users/12267/Desktop/UWindsor/Winter 2021/MSCI 3230 Data Science Tools & Methods/RSTUDIO Work") # Always set the working directory at the beginning
#############List all libraries used
library(ggplot2)
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(reshape)

##########################
#### Read the data
df <- read.csv("data/WestRoxbury (2).csv")
###########################

t(t(names(df))) ##display column names in a user friendly manner
##       [,1]         
##  [1,] "TOTAL.VALUE"
##  [2,] "TAX"        
##  [3,] "LOT.SQFT"   
##  [4,] "YR.BUILT"   
##  [5,] "GROSS.AREA" 
##  [6,] "LIVING.AREA"
##  [7,] "FLOORS"     
##  [8,] "ROOMS"      
##  [9,] "BEDROOMS"   
## [10,] "FULL.BATH"  
## [11,] "HALF.BATH"  
## [12,] "KITCHEN"    
## [13,] "FIREPLACE"  
## [14,] "REMODEL"
#change the column name if so desired
names(df)[3]<- "LOT.SIZE"

#change to lowercase if so desired
names(df) <- tolower(names(df))

Explanation goes here


#boxplots for quantitative columns
# use par() to split the plots into panels.

ggplot(df, aes(x = "", y = total.value)) + 
  geom_boxplot(fill= "green", color="blue", outlier.color = "red", outlier.shape = "o",width = 0.2)+
  labs(title = "Distribution of Total Property Value", y = "Value in (in 000s)")+
  coord_cartesian(ylim = c(100, 1200))
Figure 1.1

Figure 1.1

Explain the above Figure 1.2

ggplot(df, aes(x = remodel, y = total.value, fill = remodel )) + 
  geom_boxplot(outlier.color = "red", outlier.shape = "o")+
  labs(title = "Distribution of Total value", x = "Remodel types", y = "Value in (in 000s)")
Figure 1.2

Figure 1.2

Explain the above Figure 1.2

ggplot(df, aes(x= "", y = lot.size)) + 
  geom_boxplot(fill='green', color="blue", outlier.color = "red", outlier.shape = "o", width = 0.2)+
  labs(title = "Distribution of Lot Size", x = "", y = "Lot size (Sq Ft)")

# Scatter plot
ggplot(df, aes(x = lot.size, y = total.value)) + geom_point()

ggplot(df, aes(x = gross.area, y = total.value)) + geom_point(aes(colour = factor(floors)))

ggplot(df, aes(x = living.area, y = total.value)) + geom_point(aes(colour = factor(floors)))

ggplot(df, aes(x = gross.area, y = living.area)) + geom_point()

##bar charts
ggplot(df, aes(x = remodel, fill = remodel)) + 
  geom_bar()

ggplot(df, aes(x = remodel, y = total.value, fill = remodel)) + 
  geom_bar(stat = "summary", fun = "max") 

aggregate(total.value ~ remodel, data = df, max)
##   remodel total.value
## 1    None      1217.8
## 2     Old       815.3
## 3  Recent       935.1
##histogram
ggplot(df, aes(x= total.value)) + geom_histogram( fill = "red", col = "blue", binwidth = 50)

## heatmap with values
t(t(names(df)))
##       [,1]         
##  [1,] "total.value"
##  [2,] "tax"        
##  [3,] "lot.size"   
##  [4,] "yr.built"   
##  [5,] "gross.area" 
##  [6,] "living.area"
##  [7,] "floors"     
##  [8,] "rooms"      
##  [9,] "bedrooms"   
## [10,] "full.bath"  
## [11,] "half.bath"  
## [12,] "kitchen"    
## [13,] "fireplace"  
## [14,] "remodel"
df1 <- df[ , c("total.value", "lot.size", "gross.area", "living.area", "floors", "rooms")]
heatmap.2(cor(df1), Rowv = FALSE, Colv = FALSE, dendrogram = "none", 
          cellnote = round(cor(df1),2), 
          notecol = "black", key = FALSE, trace = 'none', margins = c(10,10))

Explain the output