This is a data visualization exercise based on the West Roxbury House Prices dataset.The purpose of this exercise is to show how to create various graphs such as scatter plot, box plot, histogram, and heat map using R.
df <- mlba::WestRoxbury
names(df)[3]<- "LOT.SIZE" #change the column name if so desired
names(df) <- tolower(names(df)) #change to lowercase if so desired
opar<- par(no.readonly=TRUE)
par(mar=c(2,4,2,2)) #Resize the boundary of the graph
par(mfrow=c(2,2)) #Create a graph matrix populated by rows, with rows in rows and columns in ncols
boxplot(df$total.value,col="cyan",border="blue",main="Distribution of Total Value",ylab="in 000s")
f <- factor(df$remodel)
boxplot(df$total.value~f,df,col=c("red","green","yellow"),border = "blue",main="Total Value vs. Remodel Types",ylab="in 000s")
boxplot(df$lot.size,col="orange",border = "blue",main="Distribution of Lot Size",ylab="Lot Size")
boxplot(df$living.area,col="cyan",border = "blue",main="Distribution of Living Area",ylab="Living Area")
par(opar)
opar<- par(no.readonly=TRUE)
par(mar=c(2,4,2,2))
par(mfrow=c(2,2))
hist(df$total.value,breaks=20,col="red",main="Histogram of Total Value",xlab="total.value",ylab="count")
hist(df$lot.size,breaks=40,col="purple",main="Histogram of Lot Size",xlab="lot.size",ylab="count")
hist(df$gross.area,breaks=20,col="green",main="Histogram of Gross Area",xlab="gross.area",ylab="count")
hist(df$living.area,breaks=20,col="yellow",main="Histogram of Living Area",xlab="living.area",ylab="count")
par(opar)
library(ggplot2)
library(cowplot)
p1<-ggplot(df, aes( x = lot.size, y = total.value)) +
geom_point(size=2) +
geom_smooth(method = 'lm', formula = y ~ x, se = T)+
scale_y_continuous(breaks = c(250,500,750,1000,1250),labels = c("$250","$500","$750","$1000","$1250"))
p2<-ggplot(df,aes(x = lot.size, y = total.value)) +
geom_point(aes( color = factor(floors)), size=2) +
geom_smooth(method = 'lm', formula = y ~ x, se = T)+
theme(legend.position = "bottom" ,legend.box = "horizontal")+
scale_y_continuous(breaks = c(250,500,750,1000,1250),labels = c("$250","$500","$750","$1000","$1250"))
p3<-ggplot(df, aes( x = living.area, y = total.value)) +
geom_point( aes( color = factor(floors) ), size=2) +
geom_smooth(method = 'lm', formula = y ~ x, se = T)+
theme(legend.position = "none")+
scale_y_continuous(breaks = c(250,500,750,1000,1250),labels = c("$250","$500","$750","$1000","$1250"))
p4<-ggplot(df, aes( x = rooms, y = total.value)) +
geom_point( aes( color = factor(floors) ), size=2) +
geom_smooth(method = 'lm', formula = y ~ x, se = T)+
theme(legend.position = "none")+
scale_y_continuous(breaks = c(250,500,750,1000,1250),labels = c("$250","$500","$750","$1000","$1250"))
p <- cowplot::plot_grid(p1, p2, p3, p4, nrow = 2, labels = LETTERS[1:4])
p
library(ggplot2)
library(cowplot)
Avg.value_1 <- aggregate(df$total.value, by=list(type=df$remodel),mean)
names(Avg.value_1) <- c("remodel","Avg.Value")
p1 <- ggplot(data = Avg.value_1, mapping = aes(x = remodel, y = Avg.Value)) +
geom_col(aes(fill=LETTERS[1:3]))+
geom_text(aes(label=round(Avg.Value)),size=3)+
scale_y_continuous(breaks = c(0,100,200,300,400),labels = c("$0","$100","$200","$300","$400")) + theme(legend.position = "none")
Avg.value_2 <- aggregate(df$total.value, by=list(type=df$floors),mean)
names(Avg.value_2) <- c("floors","Avg.Value")
p2 <- ggplot(data = Avg.value_2, mapping = aes(x = floors, y = Avg.Value)) +
geom_col(aes(fill=LETTERS[1:5]))+
geom_text(aes(label=round(Avg.Value)),size=3)+
scale_y_continuous(breaks = c(0,200,400),labels = c("$0","$200","$400")) +
theme(legend.position = "none")
Avg.value_3 <- aggregate(df$total.value, by=list(type=df$rooms),mean)
names(Avg.value_3) <- c("rooms","Avg.Value")
p3 <- ggplot(data = Avg.value_3, mapping = aes(x = rooms, y = Avg.Value)) + geom_col(aes(fill=LETTERS[1:12]))+
geom_text(aes(label=round(Avg.Value)),size=3)+
scale_y_continuous(breaks = c(0,200,400,600),labels = c("$0","$200","$400","$600"))+
theme(legend.position = "none")
Avg.value_4 <- aggregate(df$total.value, by=list(type=df$bedrooms),mean)
names(Avg.value_4) <- c("bedrooms","Avg.Value")
p4 <- ggplot(data = Avg.value_4, mapping = aes(x = bedrooms, y = Avg.Value)) +
geom_col(aes(fill=LETTERS[1:9]))+
geom_text(aes(label=round(Avg.Value)),size=3) + scale_y_continuous(breaks = c(0,250,500,750),labels = c("$0","$250","$500","$750")) +
theme(legend.position = "none")
p <- cowplot::plot_grid(p1, p2, p3, p4, nrow = 2, labels = LETTERS[1:4])
p
attach(df)
library(gplots)
##
## 载入程辑包:'gplots'
## The following object is masked from 'package:stats':
##
## lowess
df2 <- df[,c(1,3,5:9)]
heatmap.2(cor(df2), Rowv=FALSE, Colv=FALSE, dendrogram="none",
cellnote=round(cor(df2),2),
notecol="black", key=FALSE, trace="none", margins=c(10,10),
main="Correlation Values")
detach(df)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(viridis)
## 载入需要的程辑包:viridisLite
data <- subset(df,bedrooms<=4)
data$bedrooms <- factor(data$bedrooms)
p1<-ggparcoord(data,scale="globalminmax",showPoints = TRUE,title = "No scaling", columns = c(6,1,5,3), groupColumn = 9,alphaLines = 0.1)+
scale_color_viridis(discrete=TRUE)+
theme(plot.title = element_text(size=10),legend.position = "bottom")+
xlab("")
p2<-ggparcoord(data,scale="uniminmax",showPoints = TRUE,title = "Standardize to Min=0 and Max=1", columns = c(6,1,5,3), groupColumn = 9,alphaLines = 0.1)+
scale_color_viridis(discrete=TRUE)+xlab("")+
theme(plot.title = element_text(size=10),legend.position = "none")+
xlab("")
p3<- ggparcoord(data,scale="std",showPoints = TRUE,title = "Normolize univariately (substract mean & divide by sd)", columns = c(6,1,5,3), groupColumn = 9,alphaLines = 0.1)+
scale_color_viridis(discrete=TRUE)+
theme(plot.title = element_text(size=10),legend.position = "none")+
xlab("")
p4<-ggparcoord(data,scale="center",showPoints = TRUE,title = "Standardize and center variable", columns = c(6,1,5,3), groupColumn = 9,alphaLines = 0.1)+
scale_color_viridis(discrete=TRUE)+
theme(plot.title = element_text(size=10),legend.position = "none")+
xlab("")
p <- cowplot::plot_grid(p1, p2, p3, p4, nrow = 2, labels = "AUTO")
p
attach(df)
library(vcd)
## 载入需要的程辑包:grid
table<-xtabs(~floors+remodel, df)
mosaic( ~remodel+floors,table,shade=TRUE,legend=TRUE,color=TRUE)
detach(df)
## Cleveland Dot Plot using Cereals dataset
CR <- mlba::Cereals
attach(CR)
options(repr.plot.width = 1, repr.plot.height =40)
ggplot(CR,aes(y = reorder(name,calories),x = calories))+
geom_point(shape=16,size=1)+
xlab("calories")+
ylab("Name of cereal")+
theme(axis.text=element_text(size=4.5,face = "bold"))
detach(CR)