Sameer Mathur
rent.df <- read.csv(paste("HousingRentData.csv", sep=""))
attach(rent.df)
head(rent.df)
Rent Bedroom Distance
1 2066 3 3
2 1373 3 1
3 911 1 3
4 964 1 3
5 1086 1 1
6 1610 3 2
# checking data types of the data fields
str(rent.df)
'data.frame': 400 obs. of 3 variables:
$ Rent : int 2066 1373 911 964 1086 1610 1072 1990 1577 1468 ...
$ Bedroom : int 3 3 1 1 1 3 1 4 3 3 ...
$ Distance: int 3 1 3 3 1 2 1 1 3 1 ...
# summarize the data
library(psych)
describe(rent.df)[,c(2,3,4,5,8,9)] # selected columns
n mean sd median min max
Rent 400 1549.36 406.44 1616.0 672 2480
Bedroom 400 2.50 1.12 2.5 1 4
Distance 400 1.98 0.84 2.0 1 3
mytab <- aggregate(Rent, list(Distance, Bedroom), mean)
colnames(mytab) <- c("Distance", "Bedroom", "Average Rent")
round(mytab,0)
Distance Bedroom Average Rent
1 1 1 982
2 2 1 989
3 3 1 1006
4 1 2 1683
5 2 2 1706
6 3 2 1689
7 1 3 1513
8 2 3 1516
9 3 3 1553
10 1 4 1976
11 2 4 2012
12 3 4 1979
mytab <- aggregate(Rent, list(Distance, Bedroom), sd)
colnames(mytab) <- c("Distance", "Bedroom", "SD")
round(mytab,0)
Distance Bedroom SD
1 1 1 92
2 2 1 130
3 3 1 113
4 1 2 179
5 2 2 158
6 3 2 189
7 1 3 188
8 2 3 236
9 3 3 251
10 1 4 193
11 2 4 216
12 3 4 233
library(corrgram)
corrgram(rent.df[,c("Rent","Bedroom","Distance")], order=TRUE,
lower.panel=panel.shade,
upper.panel=panel.conf, text.panel=panel.txt,
main="A Corrgram of a Different Color")