This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
gr_train=read.csv("D:/Great Lakes Sales price/gr_train.csv",na.strings = c(" ","","NA"))
head(gr_train)
## PRT_ID AREA INT_SQFT DATE_SALE DIST_MAINROAD N_BEDROOM N_BATHROOM
## 1 P03210 Karapakkam 1004 04-05-2011 131 1 1
## 2 P09411 Anna Nagar 1986 19-12-2006 26 2 1
## 3 P01812 Adyar 909 04-02-2012 70 1 1
## 4 P05346 Velachery 1855 13-03-2010 14 3 2
## 5 P06210 Karapakkam 1226 05-10-2009 84 1 1
## 6 P00219 Chrompet 1220 11-09-2014 36 2 1
## N_ROOM SALE_COND PARK_FACIL DATE_BUILD BUILDTYPE UTILITY_AVAIL
## 1 3 AbNormal Yes 15-05-1967 Commercial AllPub
## 2 5 AbNormal No 22-12-1995 Commercial AllPub
## 3 3 AbNormal Yes 09-02-1992 Commercial ELO
## 4 5 Family No 18-03-1988 Others NoSewr
## 5 3 AbNormal Yes 13-10-1979 Others AllPub
## 6 4 Partial No 12-09-2009 Commercial NoSeWa
## STREET MZZONE QS_ROOMS QS_BATHROOM QS_BEDROOM QS_OVERALL REG_FEE
## 1 Paved A 4.0 3.9 4.9 4.330 380000
## 2 Gravel RH 4.9 4.2 2.5 3.765 760122
## 3 Gravel RL 4.1 3.8 2.2 3.090 421094
## 4 Paved I 4.7 3.9 3.6 4.010 356321
## 5 Gravel C 3.0 2.5 4.1 3.290 237000
## 6 No Access RH 4.5 2.6 3.1 3.320 409027
## COMMIS SALES_PRICE
## 1 144400 7600000
## 2 304049 21717770
## 3 92114 13159200
## 4 77042 9630290
## 5 74063 7406250
## 6 198316 12394750
dim(gr_train)
## [1] 7109 22
summary(gr_train)
## PRT_ID AREA INT_SQFT DATE_SALE
## P00001 : 1 Chrompet :1681 Min. : 500 06-10-2009: 12
## P00002 : 1 Karapakkam:1363 1st Qu.: 993 06-01-2009: 10
## P00004 : 1 KK Nagar : 996 Median :1373 12-04-2011: 10
## P00005 : 1 Velachery : 979 Mean :1382 15-03-2012: 10
## P00006 : 1 Anna Nagar: 783 3rd Qu.:1744 17-11-2010: 10
## P00007 : 1 Adyar : 773 Max. :2500 26-02-2012: 10
## (Other):7103 (Other) : 534 (Other) :7047
## DIST_MAINROAD N_BEDROOM N_BATHROOM N_ROOM
## Min. : 0.0 Min. :1.000 Min. :1.000 Min. :2.000
## 1st Qu.: 50.0 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:3.000
## Median : 99.0 Median :1.000 Median :1.000 Median :4.000
## Mean : 99.6 Mean :1.637 Mean :1.213 Mean :3.689
## 3rd Qu.:148.0 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:4.000
## Max. :200.0 Max. :4.000 Max. :2.000 Max. :6.000
## NA's :1 NA's :5
## SALE_COND PARK_FACIL DATE_BUILD BUILDTYPE
## AdjLand :1433 No :3520 02-07-1987: 6 Comercial : 4
## Partial :1429 Noo: 2 04-04-1999: 5 Commercial:2325
## Normal Sale:1423 Yes:3587 02-10-1990: 4 House :2444
## AbNormal :1406 02-12-1982: 4 Other : 26
## Family :1403 03-01-1979: 4 Others :2310
## Adj Land : 6 03-10-1999: 4
## (Other) : 9 (Other) :7082
## UTILITY_AVAIL STREET MZZONE QS_ROOMS QS_BATHROOM
## All Pub: 1 Gravel :2520 A : 537 Min. :2.000 Min. :2.000
## AllPub :1886 No Access:2010 C : 550 1st Qu.:2.700 1st Qu.:2.700
## ELO :1522 NoAccess : 7 I : 525 Median :3.500 Median :3.500
## NoSeWa :1871 Pavd : 12 RH:1822 Mean :3.517 Mean :3.507
## NoSewr :1829 Paved :2560 RL:1858 3rd Qu.:4.300 3rd Qu.:4.300
## RM:1817 Max. :5.000 Max. :5.000
##
## QS_BEDROOM QS_OVERALL REG_FEE COMMIS
## Min. :2.000 Min. :2.000 Min. : 71177 Min. : 5055
## 1st Qu.:2.700 1st Qu.:3.130 1st Qu.:272406 1st Qu.: 84219
## Median :3.500 Median :3.500 Median :349486 Median :127628
## Mean :3.485 Mean :3.503 Mean :376938 Mean :141006
## 3rd Qu.:4.300 3rd Qu.:3.890 3rd Qu.:451562 3rd Qu.:184506
## Max. :5.000 Max. :4.970 Max. :983922 Max. :495405
## NA's :48
## SALES_PRICE
## Min. : 2156875
## 1st Qu.: 8272100
## Median :10335050
## Mean :10894910
## 3rd Qu.:12993900
## Max. :23667340
##
colSums(is.na(gr_train))
## PRT_ID AREA INT_SQFT DATE_SALE DIST_MAINROAD
## 0 0 0 0 0
## N_BEDROOM N_BATHROOM N_ROOM SALE_COND PARK_FACIL
## 1 5 0 0 0
## DATE_BUILD BUILDTYPE UTILITY_AVAIL STREET MZZONE
## 0 0 0 0 0
## QS_ROOMS QS_BATHROOM QS_BEDROOM QS_OVERALL REG_FEE
## 0 0 0 48 0
## COMMIS SALES_PRICE
## 0 0
str(gr_train)
## 'data.frame': 7109 obs. of 22 variables:
## $ PRT_ID : Factor w/ 7109 levels "P00001","P00002",..: 2267 6665 1271 3756 4394 157 6448 6859 2383 6821 ...
## $ AREA : Factor w/ 17 levels "Adyar","Adyr",..: 11 5 1 16 11 8 8 16 8 16 ...
## $ INT_SQFT : int 1004 1986 909 1855 1226 1220 1167 1847 771 1635 ...
## $ DATE_SALE : Factor w/ 2798 levels "01-01-2005","01-01-2007",..: 311 1746 289 1118 443 985 396 1114 489 1985 ...
## $ DIST_MAINROAD: int 131 26 70 14 84 36 137 176 175 74 ...
## $ N_BEDROOM : int 1 2 1 3 1 2 1 3 1 2 ...
## $ N_BATHROOM : int 1 1 1 2 1 1 1 2 1 1 ...
## $ N_ROOM : int 3 5 3 5 3 4 3 5 2 4 ...
## $ SALE_COND : Factor w/ 9 levels "Ab Normal","AbNormal",..: 2 2 2 5 2 7 7 5 4 2 ...
## $ PARK_FACIL : Factor w/ 3 levels "No","Noo","Yes": 3 1 3 1 3 1 1 1 1 1 ...
## $ DATE_BUILD : Factor w/ 5808 levels "01-01-1967","01-01-1970",..: 2732 4227 1512 3301 2407 2225 2146 2710 2521 4898 ...
## $ BUILDTYPE : Factor w/ 5 levels "Comercial","Commercial",..: 2 2 2 5 5 2 4 2 5 5 ...
## $ UTILITY_AVAIL: Factor w/ 5 levels "All Pub","AllPub",..: 2 2 3 5 2 4 2 2 5 3 ...
## $ STREET : Factor w/ 5 levels "Gravel","No Access",..: 5 1 1 5 1 2 2 1 5 2 ...
## $ MZZONE : Factor w/ 6 levels "A","C","I","RH",..: 1 4 5 3 2 4 5 6 6 3 ...
## $ QS_ROOMS : num 4 4.9 4.1 4.7 3 4.5 3.6 2.4 2.9 3.1 ...
## $ QS_BATHROOM : num 3.9 4.2 3.8 3.9 2.5 2.6 2.1 4.5 3.7 3.1 ...
## $ QS_BEDROOM : num 4.9 2.5 2.2 3.6 4.1 3.1 2.5 2.1 4 3.3 ...
## $ QS_OVERALL : num 4.33 3.77 3.09 4.01 3.29 ...
## $ REG_FEE : int 380000 760122 421094 356321 237000 409027 263152 604809 257578 323346 ...
## $ COMMIS : int 144400 304049 92114 77042 74063 198316 33955 235204 33236 121255 ...
## $ SALES_PRICE : int 7600000 21717770 13159200 9630290 7406250 12394750 8488790 16800250 8308970 8083650 ...
names(gr_train)
## [1] "PRT_ID" "AREA" "INT_SQFT" "DATE_SALE"
## [5] "DIST_MAINROAD" "N_BEDROOM" "N_BATHROOM" "N_ROOM"
## [9] "SALE_COND" "PARK_FACIL" "DATE_BUILD" "BUILDTYPE"
## [13] "UTILITY_AVAIL" "STREET" "MZZONE" "QS_ROOMS"
## [17] "QS_BATHROOM" "QS_BEDROOM" "QS_OVERALL" "REG_FEE"
## [21] "COMMIS" "SALES_PRICE"
levels(gr_train$AREA)
## [1] "Adyar" "Adyr" "Ana Nagar" "Ann Nagar" "Anna Nagar"
## [6] "Chormpet" "Chrmpet" "Chrompet" "Chrompt" "Karapakam"
## [11] "Karapakkam" "KK Nagar" "KKNagar" "T Nagar" "TNagar"
## [16] "Velachery" "Velchery"
gr_train$AREA=as.character(gr_train$AREA)
table(gr_train$AREA)
##
## Adyar Adyr Ana Nagar Ann Nagar Anna Nagar Chormpet
## 773 1 3 2 783 6
## Chrmpet Chrompet Chrompt Karapakam Karapakkam KK Nagar
## 6 1681 9 3 1363 996
## KKNagar T Nagar TNagar Velachery Velchery
## 1 496 5 979 2
library("car")
## Warning: package 'car' was built under R version 3.4.2
gr_train$AREA<-recode(gr_train$AREA,"c('Adyr')='Adyar'")
gr_train$AREA<-recode(gr_train$AREA,"c('Ana Nagar','Ann Nagar')='Anna Nagar'")
#gr_train[gr_train$AREA=="Anna Nagar",]
table(gr_train$AREA)
##
## Adyar Anna Nagar Chormpet Chrmpet Chrompet Chrompt
## 774 788 6 6 1681 9
## Karapakam Karapakkam KK Nagar KKNagar T Nagar TNagar
## 3 1363 996 1 496 5
## Velachery Velchery
## 979 2
gr_train$AREA<-recode(gr_train$AREA,"c('Chormpet','Chrmpet','Chrompt')
='Chrompet'")
table(gr_train$AREA)
##
## Adyar Anna Nagar Chrompet Karapakam Karapakkam KK Nagar
## 774 788 1702 3 1363 996
## KKNagar T Nagar TNagar Velachery Velchery
## 1 496 5 979 2
gr_train$AREA<-recode(gr_train$AREA,"c('Karapakam')='Karapakkam'")
gr_train$AREA<-recode(gr_train$AREA,"c('KKNagar')='KK Nagar'")
gr_train$AREA<-recode(gr_train$AREA,"c('TNagar')='T Nagar'")
gr_train$AREA<-recode(gr_train$AREA,"c('Velchery')='Velachery'")
gr_train$AREA=as.factor(gr_train$AREA)
levels(gr_train$AREA)
## [1] "Adyar" "Anna Nagar" "Chrompet" "Karapakkam" "KK Nagar"
## [6] "T Nagar" "Velachery"
table(gr_train$AREA)
##
## Adyar Anna Nagar Chrompet Karapakkam KK Nagar T Nagar
## 774 788 1702 1366 997 501
## Velachery
## 981
agg=aggregate(SALES_PRICE~AREA,data = gr_train,FUN = mean,na.rm=T)
agg$SALES_PRICE
## [1] 9185668 15168202 10013810 7340100 12696229 15616743 11047353
agg$SALES_PRICE=as.factor(agg$SALES_PRICE)
agg$SALES_PRICE=as.numeric(as.character(agg$SALES_PRICE))
agg$AREA
## [1] Adyar Anna Nagar Chrompet Karapakkam KK Nagar T Nagar
## [7] Velachery
## 7 Levels: Adyar Anna Nagar Chrompet Karapakkam KK Nagar ... Velachery
class(agg$SALES_PRICE)
## [1] "numeric"
library("ggplot2")
ggplot(data = gr_train,mapping = aes(x=AREA,fill=AREA))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg,
aes(x=AREA,y=mean(SALES_PRICE),label=round(agg$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+geom_boxplot()
ggplot(data = gr_train,mapping = aes(INT_SQFT))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = gr_train,mapping = aes(x=INT_SQFT,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(log(gr_train$INT_SQFT),gr_train$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: log(gr_train$INT_SQFT) and gr_train$SALES_PRICE
## t = 64.993, df = 7107, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5957708 0.6249377
## sample estimates:
## cor
## 0.6105612
boxplot.stats(gr_train$INT_SQFT)$out
## integer(0)
library("lubridate")
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
class(gr_train$DATE_SALE)
## [1] "factor"
gr_train$DATE_SALE=dmy(gr_train$DATE_SALE)
class(gr_train$DATE_BUILD)
## [1] "factor"
gr_train$DATE_BUILD=dmy(gr_train$DATE_BUILD)
ggplot(data = gr_train,mapping = aes(DIST_MAINROAD,fill=SALES_PRICE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = gr_train,mapping = aes(x=DIST_MAINROAD,y=SALES_PRICE))+geom_point(color='darkblue')
boxplot.stats(gr_train$DIST_MAINROAD)$out
## integer(0)
table(gr_train$N_BEDROOM)
##
## 1 2 3 4
## 3795 2352 707 254
class(gr_train$N_BEDROOM)
## [1] "integer"
gr_train$N_BEDROOM=as.factor(gr_train$N_BEDROOM)
agg1=aggregate(SALES_PRICE~N_BEDROOM,data = gr_train,FUN = mean,na.rm=T)
agg1$SALES_PRICE
## [1] 9790184 11817473 12478131 14451305
agg1$SALES_PRICE=as.factor(agg1$SALES_PRICE)
agg1$SALES_PRICE=as.numeric(as.character(agg1$SALES_PRICE))
agg1$N_BEDROOM
## [1] 1 2 3 4
## Levels: 1 2 3 4
class(agg1$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(N_BEDROOM,fill=N_BEDROOM))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg1,
aes(x=N_BEDROOM,y=mean(SALES_PRICE),label=round(agg1$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+geom_boxplot()
which(is.na(gr_train$N_BEDROOM))
## [1] 4061
gr_train[is.na(gr_train$N_BEDROOM),]
## PRT_ID AREA INT_SQFT DATE_SALE DIST_MAINROAD N_BEDROOM
## 4061 P01066 Anna Nagar 1556 2013-01-16 181 <NA>
## N_BATHROOM N_ROOM SALE_COND PARK_FACIL DATE_BUILD BUILDTYPE
## 4061 1 4 Partiall No 1986-01-23 House
## UTILITY_AVAIL STREET MZZONE QS_ROOMS QS_BATHROOM QS_BEDROOM
## 4061 NoSewr No Access RM 4.8 2.7 2
## QS_OVERALL REG_FEE COMMIS SALES_PRICE
## 4061 3.155 313136 107978 10797790
which(is.na(gr_train$N_BATHROOM))
## [1] 71 5088 6135 6372 6536
gr_train[is.na(gr_train$N_BATHROOM),]
## PRT_ID AREA INT_SQFT DATE_SALE DIST_MAINROAD N_BEDROOM
## 71 P05304 Anna Nagar 1589 2010-03-22 39 1
## 5088 P01333 Chrompet 1016 2012-08-02 105 1
## 6135 P01332 Chrompet 916 2012-08-02 173 1
## 6372 P01189 Chrompet 1035 2012-11-06 90 1
## 6536 P09189 Anna Nagar 1864 2007-03-05 184 2
## N_BATHROOM N_ROOM SALE_COND PARK_FACIL DATE_BUILD BUILDTYPE
## 71 NA 4 Partial No 1966-04-02 Others
## 5088 NA 3 AbNormal Yes 1980-08-10 Others
## 6135 NA 3 Normal Sale Yes 1974-08-12 Others
## 6372 NA 3 Partial No 1981-11-14 Others
## 6536 NA 5 Family Yes 1997-03-07 Others
## UTILITY_AVAIL STREET MZZONE QS_ROOMS QS_BATHROOM QS_BEDROOM
## 71 AllPub Gravel RL 2.1 2.5 4.5
## 5088 NoSewr Gravel RM 3.2 4.2 2.0
## 6135 ELO Paved RL 3.4 3.5 3.0
## 6372 NoSeWa No Access RM 2.3 3.5 3.2
## 6536 NoSewr Paved RM 3.3 3.7 4.7
## QS_OVERALL REG_FEE COMMIS SALES_PRICE
## 71 3.16 451857 85486 12212350
## 5088 3.24 330086 106479 10647920
## 6135 NA 265423 44237 8847420
## 6372 3.05 223403 24823 8274200
## 6536 3.96 575606 124455 15556920
gr_train$N_BATHROOM=as.factor(gr_train$N_BATHROOM)
agg2=aggregate(SALES_PRICE~N_BATHROOM,data = gr_train,FUN = mean,na.rm=T)
agg2$SALES_PRICE
## [1] 10681096 11682991
agg2$SALES_PRICE=as.factor(agg2$SALES_PRICE)
agg2$SALES_PRICE=as.numeric(as.character(agg2$SALES_PRICE))
agg2$N_BATHROOM
## [1] 1 2
## Levels: 1 2
class(agg2$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(N_BATHROOM,fill=N_BATHROOM))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg2,
aes(x=N_BATHROOM,y=mean(SALES_PRICE),label=round(agg2$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+geom_boxplot()
table(gr_train$N_ROOM)
##
## 2 3 4 5 6
## 921 2125 2563 1246 254
class(gr_train$N_ROOM)
## [1] "integer"
gr_train$N_ROOM=as.factor(gr_train$N_ROOM)
agg3=aggregate(SALES_PRICE~N_ROOM,data = gr_train,FUN = mean,na.rm=T)
agg3$SALES_PRICE
## [1] 6982718 9298812 11691432 14145331 14451305
agg3$SALES_PRICE=as.factor(agg3$SALES_PRICE)
agg3$SALES_PRICE=as.numeric(as.character(agg3$SALES_PRICE))
agg3$N_ROOM
## [1] 2 3 4 5 6
## Levels: 2 3 4 5 6
class(agg3$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(N_ROOM,fill=N_ROOM))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg3,
aes(x=N_ROOM,y=mean(SALES_PRICE),label=round(agg3$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+geom_boxplot()
levels(gr_train$SALE_COND)
## [1] "Ab Normal" "AbNormal" "Adj Land" "AdjLand" "Family"
## [6] "Normal Sale" "Partial" "Partiall" "PartiaLl"
table(gr_train$SALE_COND)
##
## Ab Normal AbNormal Adj Land AdjLand Family Normal Sale
## 5 1406 6 1433 1403 1423
## Partial Partiall PartiaLl
## 1429 3 1
gr_train$SALE_COND<-recode(gr_train$SALE_COND,"c('Ab Normal')='AbNormal'")
gr_train$SALE_COND<-recode(gr_train$SALE_COND,"c('Adj Land')='AdjLand'")
gr_train$SALE_COND<-recode(gr_train$SALE_COND,"c('Partiall','PartiaLl')='Partial'")
levels(gr_train$SALE_COND)
## [1] "AbNormal" "AdjLand" "Family" "Normal Sale" "Partial"
table(gr_train$SALE_COND)
##
## AbNormal AdjLand Family Normal Sale Partial
## 1411 1439 1403 1423 1433
gr_train$SALE_COND=as.factor(gr_train$SALE_COND)
agg4=aggregate(SALES_PRICE~SALE_COND,data = gr_train,FUN = mean,na.rm=T)
agg4$SALES_PRICE
## [1] 10914632 11209315 10736415 10994178 10616369
agg4$SALES_PRICE=as.factor(agg4$SALES_PRICE)
agg4$SALES_PRICE=as.numeric(as.character(agg4$SALES_PRICE))
agg4$SALE_COND
## [1] AbNormal AdjLand Family Normal Sale Partial
## Levels: AbNormal AdjLand Family Normal Sale Partial
class(agg4$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(SALE_COND,fill=SALE_COND))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg4,
aes(x=SALE_COND,y=mean(SALES_PRICE),label=round(agg4$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+geom_boxplot()
levels(gr_train$PARK_FACIL)
## [1] "No" "Noo" "Yes"
table(gr_train$PARK_FACIL)
##
## No Noo Yes
## 3520 2 3587
gr_train$PARK_FACIL<-recode(gr_train$PARK_FACIL,"c('Noo')='No'")
ggplot(data = gr_train,mapping = aes(PARK_FACIL,fill=PARK_FACIL))+geom_bar()
gr_train$PARK_FACIL=as.factor(gr_train$PARK_FACIL)
agg5=aggregate(SALES_PRICE~PARK_FACIL,data = gr_train,FUN = mean,na.rm=T)
agg5$SALES_PRICE
## [1] 10338050 11441678
agg5$SALES_PRICE=as.factor(agg5$SALES_PRICE)
agg5$SALES_PRICE=as.numeric(as.character(agg5$SALES_PRICE))
agg5$PARK_FACIL
## [1] No Yes
## Levels: No Yes
class(agg5$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(PARK_FACIL,fill=PARK_FACIL))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg5,
aes(x=PARK_FACIL,y=mean(SALES_PRICE),label=round(agg5$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+geom_boxplot()
levels(gr_train$BUILDTYPE)
## [1] "Comercial" "Commercial" "House" "Other" "Others"
table(gr_train$BUILDTYPE)
##
## Comercial Commercial House Other Others
## 4 2325 2444 26 2310
gr_train$BUILDTYPE<-recode(gr_train$BUILDTYPE,"c('Comercial','Commercil')='Commercial'")
gr_train$BUILDTYPE<-recode(gr_train$BUILDTYPE,"c('Other')='Others'")
ggplot(data = gr_train,mapping = aes(BUILDTYPE,fill=BUILDTYPE))+geom_bar()
gr_train$BUILDTYPE=as.character(gr_train$BUILDTYPE)
colSums(is.na(gr_train))
## PRT_ID AREA INT_SQFT DATE_SALE DIST_MAINROAD
## 0 0 0 0 0
## N_BEDROOM N_BATHROOM N_ROOM SALE_COND PARK_FACIL
## 1 5 0 0 0
## DATE_BUILD BUILDTYPE UTILITY_AVAIL STREET MZZONE
## 0 0 0 0 0
## QS_ROOMS QS_BATHROOM QS_BEDROOM QS_OVERALL REG_FEE
## 0 0 0 48 0
## COMMIS SALES_PRICE
## 0 0
gr_train$BUILDTYPE[gr_train$BUILDTYPE=="Commercial"]="3BHK APTS"
gr_train$BUILDTYPE[gr_train$BUILDTYPE=="Others"]="2BHK APTS"
gr_train$BUILDTYPE=as.factor(gr_train$BUILDTYPE)
agg6=aggregate(SALES_PRICE~BUILDTYPE,data = gr_train,FUN = mean,na.rm=T)
agg6$SALES_PRICE
## [1] 9807998 13869836 9098847
agg6$SALES_PRICE=as.factor(agg6$SALES_PRICE)
agg6$SALES_PRICE=as.numeric(as.character(agg6$SALES_PRICE))
agg6$BUILDTYPE
## [1] 2BHK APTS 3BHK APTS House
## Levels: 2BHK APTS 3BHK APTS House
nlevels(agg6$BUILDTYPE)
## [1] 3
class(agg6$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(BUILDTYPE,fill=BUILDTYPE))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=BUILDTYPE,y=SALES_PRICE,fill=BUILDTYPE))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg6,
aes(x=BUILDTYPE,y=mean(SALES_PRICE),label=round(agg6$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=BUILDTYPE,y=SALES_PRICE,fill=BUILDTYPE))+geom_boxplot()
levels(gr_train$UTILITY_AVAIL)
## [1] "All Pub" "AllPub" "ELO" "NoSeWa" "NoSewr "
table(gr_train$UTILITY_AVAIL)
##
## All Pub AllPub ELO NoSeWa NoSewr
## 1 1886 1522 1871 1829
gr_train$UTILITY_AVAIL<-recode(gr_train$UTILITY_AVAIL,"c('NoSewr ')='NoSewr'")
gr_train$UTILITY_AVAIL<-recode(gr_train$UTILITY_AVAIL,"c('All Pub')='AllPub'")
gr_train$UTILITY_AVAIL=as.factor(gr_train$UTILITY_AVAIL)
agg7=aggregate(SALES_PRICE~UTILITY_AVAIL,data = gr_train,FUN = mean,na.rm=T)
agg7$SALES_PRICE
## [1] 11210016 10469415 10893079 10925759
agg7$SALES_PRICE=as.factor(agg7$SALES_PRICE)
agg7$SALES_PRICE=as.numeric(as.character(agg7$SALES_PRICE))
agg7$UTILITY_AVAIL
## [1] AllPub ELO NoSeWa NoSewr
## Levels: AllPub ELO NoSeWa NoSewr
nlevels(agg7$UTILITY_AVAIL)
## [1] 4
class(agg7$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(UTILITY_AVAIL,fill=UTILITY_AVAIL))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg7,
aes(x=UTILITY_AVAIL,y=mean(SALES_PRICE),label=round(agg7$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+geom_boxplot()
levels(gr_train$STREET)
## [1] "Gravel" "No Access" "NoAccess" "Pavd" "Paved"
table(gr_train$STREET)
##
## Gravel No Access NoAccess Pavd Paved
## 2520 2010 7 12 2560
gr_train$STREET<-recode(gr_train$STREET,"c('NoAccess')='No Access'")
gr_train$STREET<-recode(gr_train$STREET,"c('Pavd')='Paved'")
gr_train$STREET=as.factor(gr_train$STREET)
agg8=aggregate(SALES_PRICE~STREET,data = gr_train,FUN = mean,na.rm=T)
agg8$SALES_PRICE
## [1] 11444619 10000738 11057537
agg8$SALES_PRICE=as.factor(agg8$SALES_PRICE)
agg8$SALES_PRICE=as.numeric(as.character(agg8$SALES_PRICE))
agg8$STREET
## [1] Gravel No Access Paved
## Levels: Gravel No Access Paved
nlevels(agg8$STREET)
## [1] 3
class(agg8$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(STREET,fill=STREET))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg8,
aes(x=STREET,y=mean(SALES_PRICE),label=round(agg8$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+geom_boxplot()
levels(gr_train$MZZONE)
## [1] "A" "C" "I" "RH" "RL" "RM"
table(gr_train$MZZONE)
##
## A C I RH RL RM
## 537 550 525 1822 1858 1817
gr_train$MZZONE=as.factor(gr_train$MZZONE)
agg9=aggregate(SALES_PRICE~MZZONE,data = gr_train,FUN = mean,na.rm=T)
agg9$SALES_PRICE
## [1] 7292591 8052270 8738633 11039972 11765763 12407070
agg9$SALES_PRICE=as.factor(agg9$SALES_PRICE)
agg9$SALES_PRICE=as.numeric(as.character(agg9$SALES_PRICE))
agg9$MZZONE
## [1] A C I RH RL RM
## Levels: A C I RH RL RM
nlevels(agg9$MZZONE)
## [1] 6
class(agg9$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(MZZONE,fill=MZZONE))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg9,
aes(x=MZZONE,y=mean(SALES_PRICE),label=round(agg9$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+geom_boxplot()
summary(gr_train$QS_ROOMS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.700 3.500 3.517 4.300 5.000
table(gr_train$QS_ROOMS)
##
## 2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3 3.1 3.2 3.3 3.4 3.5 3.6 3.7
## 203 236 213 224 208 265 237 200 226 220 228 230 208 239 240 227 255 205
## 3.8 3.9 4 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5
## 259 245 218 222 239 225 219 218 252 239 239 242 228
gr_train$QS_ROOMS <- with(gr_train,ifelse(QS_ROOMS>4.4,5,ifelse(QS_ROOMS>3.5,4,
ifelse(QS_ROOMS>2.4,3,2))))
gr_train$QS_ROOMS=as.factor(gr_train$QS_ROOMS)
agg10=aggregate(SALES_PRICE~QS_ROOMS,data = gr_train,FUN = mean,na.rm=T)
agg10$SALES_PRICE
## [1] 10810950 10802809 10978843 10999236
agg10$SALES_PRICE=as.factor(agg10$SALES_PRICE)
agg10$SALES_PRICE=as.numeric(as.character(agg10$SALES_PRICE))
agg10$QS_ROOMS
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(agg10$QS_ROOMS)
## [1] 4
class(agg10$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(QS_ROOMS,fill=QS_ROOMS))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg10,
aes(x=QS_ROOMS,y=mean(SALES_PRICE),label=round(agg10$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+geom_boxplot()
table(gr_train$QS_BATHROOM)
##
## 2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3 3.1 3.2 3.3 3.4 3.5 3.6 3.7
## 222 224 234 220 230 233 226 256 206 228 241 232 226 227 234 225 221 251
## 3.8 3.9 4 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5
## 209 211 232 210 237 224 219 231 234 247 255 245 219
gr_train$QS_BATHROOM <- with(gr_train,ifelse(QS_BATHROOM>4.4,5,ifelse(QS_BATHROOM>3.5,4,
ifelse(QS_BATHROOM>2.4,3,2))))
gr_train$QS_BATHROOM=as.factor(gr_train$QS_BATHROOM)
agg11=aggregate(SALES_PRICE~QS_BATHROOM,data = gr_train,FUN = mean,na.rm=T)
agg11$SALES_PRICE
## [1] 10894591 10958150 10825820 10880414
agg11$SALES_PRICE=as.factor(agg11$SALES_PRICE)
agg11$SALES_PRICE=as.numeric(as.character(agg11$SALES_PRICE))
agg11$QS_BATHROOM
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(agg11$QS_BATHROOM)
## [1] 4
class(agg11$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(QS_BATHROOM,fill=QS_BATHROOM))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg11,
aes(x=QS_BATHROOM,y=mean(SALES_PRICE),label=round(agg11$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+geom_boxplot()
table(gr_train$QS_BEDROOM)
##
## 2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3 3.1 3.2 3.3 3.4 3.5 3.6 3.7
## 221 242 237 200 244 226 273 222 210 219 241 243 253 234 239 237 225 203
## 3.8 3.9 4 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5
## 244 220 248 223 212 237 237 227 233 228 211 203 217
gr_train$QS_BEDROOM <- with(gr_train,ifelse(QS_BEDROOM>4.4,5,ifelse(QS_BEDROOM>3.5,4,
ifelse(QS_BEDROOM>2.4,3,2))))
gr_train$QS_BEDROOM=as.factor(gr_train$QS_BEDROOM)
agg12=aggregate(SALES_PRICE~QS_BEDROOM,data = gr_train,FUN = mean,na.rm=T)
agg12$SALES_PRICE
## [1] 10670155 10905621 10986054 10927167
agg12$SALES_PRICE=as.factor(agg12$SALES_PRICE)
agg12$SALES_PRICE=as.numeric(as.character(agg12$SALES_PRICE))
agg12$QS_BEDROOM
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(agg12$QS_BEDROOM)
## [1] 4
class(agg12$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(QS_BEDROOM,fill=QS_BEDROOM))+geom_bar()
ggplot(data = gr_train,mapping = aes(x=QS_BEDROOM,y=SALES_PRICE,fill=QS_BEDROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=agg12,
aes(x=QS_BEDROOM,y=mean(SALES_PRICE),label=round(agg12$SALES_PRICE)))
ggplot(data = gr_train,mapping = aes(x=QS_BEDROOM,y=SALES_PRICE,fill=QS_BEDROOM))+geom_boxplot()
gr_train$houseage=gr_train$DATE_SALE-gr_train$DATE_BUILD
class(gr_train$houseage)
## [1] "difftime"
gr_train$houseage=as.numeric(gr_train$houseage)
ggplot(data = gr_train,mapping = aes(houseage))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = gr_train,mapping = aes(x=houseage,y=SALES_PRICE))+geom_point(color='darkblue')
ggplot(data = gr_train,mapping = aes(REG_FEE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = gr_train,mapping = aes(x=REG_FEE,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(gr_train$INT_SQFT,gr_train$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: gr_train$INT_SQFT and gr_train$SALES_PRICE
## t = 65.259, df = 7107, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5973789 0.6264570
## sample estimates:
## cor
## 0.6121249
boxplot.stats(gr_train$REG_FEE)$out
## [1] 760122 742113 917550 808639 816325 885666 792637 809377 839848 792581
## [11] 850347 807919 804310 722637 725167 817115 725563 726866 858938 820722
## [21] 751564 740832 762560 747611 732846 923026 963029 851979 823239 880115
## [31] 752727 775789 913666 797727 836283 765428 893744 940813 782091 756758
## [41] 759431 891866 778771 800245 777136 729338 769310 769130 731893 786861
## [51] 809394 740382 787401 746911 941567 752815 800231 781305 752593 790306
## [61] 780507 745098 840592 746576 816678 812872 807579 808102 781044 823645
## [71] 777513 765316 850799 855567 726560 727590 770309 723532 767960 735740
## [81] 724899 826735 808496 844168 728057 909160 787045 783698 759022 802148
## [91] 738288 773321 870464 769757 731292 729771 802811 787504 775222 983922
## [101] 720625 741521 887450 730011 847638 818342 741809 770779 821501 732764
## [111] 743729 843562 931224 810693 773456 827999 809661 725904 758028 845790
## [121] 731190 755385 732194 742623 829837 838032 865802 792155 753674 883120
## [131] 839975 821637 869438 733135 746538 732043 803373 743923 805571 740159
## [141] 751507 740232 766669 741852 880095 761653 760083 839394 735368 947124
## [151] 821467 853494 929714 904779 845947 765080 733335 815506 853017 839704
## [161] 739547 870024 874696 790990 731930 745579 804292 866652 761292 860513
## [171] 729450 742540 747262 863115 731464 724201 750484 823250 782811 869775
## [181] 798100 952411 761636 826735 727335 745505 760754 839611 722752 753187
## [191] 743552 803512 854535 772422 936314 756744 783738 823071 851325 743023
## [201] 825243 760155 779777 942859 740155 903181 860696 735253 723058 750333
## [211] 883743 783633 757649 752460 852466 981117 751181 756135 790593 888724
## [221] 774637 815534 736659 823312 754472 747457 826242 828486
ggplot(data = gr_train,mapping = aes(COMMIS))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = gr_train,mapping = aes(x=COMMIS,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(gr_train$COMMIS,gr_train$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: gr_train$COMMIS and gr_train$SALES_PRICE
## t = 67.723, df = 7107, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6119380 0.6402014
## sample estimates:
## cor
## 0.6262754
boxplot.stats(gr_train$COMMIS)$out
## [1] 339934 471247 387484 348491 456247 495405 364434 368162 381199 352277
## [11] 342558 351886 397115 375431 342437 421452 443349 343422 378677 341073
## [21] 408950 402279 337717 442485 348080 372260 418432 466156 343858 341883
## [31] 374390 422406 341707 389978 455034 406315 389863 394030 357180 371273
## [41] 406974 347700 379683 345860 362422 470784 334948 345888 395932 416414
## [51] 340569 347505 366648 373286 399281 377053 469920 427494 365960 352022
## [61] 343873 339524 370996 336398 370640 362451 463218 372519 349046 371776
## [71] 357993 427485 363804 347960 339749 371953 361696 341155 391849 424072
## [81] 344381 355720 342114 393752 491961 360312 404278 359952 351174 359272
## [91] 374378 479297 340751 343440 348975 390689 362952 424557 340400 433936
## [101] 381920 348458 481001 337500 398987 398245 361706 342639 346772 349731
## [111] 360667 361417 355848 348714 341124 396831 362683 437319 377184 359347
## [121] 399868 368833 339657 355229 352530 354798 426508 340052 336820 409087
## [131] 348943 353696 372525 409432 391867 433326 345743 347910 369468 419917
## [141] 485924 382492 355800 416606 340923 344511 377825 389456 371776 393009
## [151] 363362 370852 390114 394428 343163 444749 379879 339206 350600 429011
## [161] 420340 368297 389913 356408 342111 396783 475795 451314 363424 337244
## [171] 367599 386469 373335 370302 415659 392474 378853 376956 411656 373606
## [181] 353880
summary(gr_train$QS_OVERALL)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.000 3.130 3.500 3.503 3.890 4.970 48
gr_train$QS_OVERALL <- with(gr_train,ifelse(QS_OVERALL>4.4,5,ifelse(QS_OVERALL>3.5,4,
ifelse(QS_OVERALL>2.4,3,2))))
gr_train$QS_OVERALL=as.factor(gr_train$QS_OVERALL)
colnames(gr_train)
## [1] "PRT_ID" "AREA" "INT_SQFT" "DATE_SALE"
## [5] "DIST_MAINROAD" "N_BEDROOM" "N_BATHROOM" "N_ROOM"
## [9] "SALE_COND" "PARK_FACIL" "DATE_BUILD" "BUILDTYPE"
## [13] "UTILITY_AVAIL" "STREET" "MZZONE" "QS_ROOMS"
## [17] "QS_BATHROOM" "QS_BEDROOM" "QS_OVERALL" "REG_FEE"
## [21] "COMMIS" "SALES_PRICE" "houseage"
gr_train=gr_train[,-c(1,4,11)]
colSums(is.na(gr_train))
## AREA INT_SQFT DIST_MAINROAD N_BEDROOM N_BATHROOM
## 0 0 0 1 5
## N_ROOM SALE_COND PARK_FACIL BUILDTYPE UTILITY_AVAIL
## 0 0 0 0 0
## STREET MZZONE QS_ROOMS QS_BATHROOM QS_BEDROOM
## 0 0 0 0 0
## QS_OVERALL REG_FEE COMMIS SALES_PRICE houseage
## 48 0 0 0 0
library("missForest")
## Warning: package 'missForest' was built under R version 3.4.3
## Loading required package: randomForest
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## Loading required package: foreach
## Loading required package: itertools
## Warning: package 'itertools' was built under R version 3.4.2
## Loading required package: iterators
gr_train<-missForest(gr_train,verbose = TRUE)
## missForest iteration 1 in progress...done!
## estimated error(s): 0 0.01105671
## difference(s): 0 0.0002511906
## time: 2.82 seconds
##
## missForest iteration 2 in progress...done!
## estimated error(s): 0 0.01110729
## difference(s): 0 0
## time: 2.09 seconds
##
## missForest iteration 3 in progress...done!
## estimated error(s): 0 0.01101625
## difference(s): 0 2.009525e-05
## time: 2.08 seconds
class(gr_train)
## [1] "missForest"
gr_train<-data.frame(gr_train$ximp)
gr_train$OOBerror
## NULL
colSums(is.na(gr_train))
## AREA INT_SQFT DIST_MAINROAD N_BEDROOM N_BATHROOM
## 0 0 0 0 0
## N_ROOM SALE_COND PARK_FACIL BUILDTYPE UTILITY_AVAIL
## 0 0 0 0 0
## STREET MZZONE QS_ROOMS QS_BATHROOM QS_BEDROOM
## 0 0 0 0 0
## QS_OVERALL REG_FEE COMMIS SALES_PRICE houseage
## 0 0 0 0 0
House=gr_train[gr_train$BUILDTYPE=="House",]
BHK_2=gr_train[gr_train$BUILDTYPE=="2BHK APTS",]
BHK_3=gr_train[gr_train$BUILDTYPE=="3BHK APTS",]
summary(House$SALES_PRICE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2640250 7407715 8985370 9098847 10844520 15880930
ggplot(data = House,mapping = aes(SALES_PRICE))+geom_density(fill="steelblue")
ggplot(data = BHK_2,mapping = aes(SALES_PRICE))+geom_density(fill="steelblue")
ggplot(data = BHK_3,mapping = aes(SALES_PRICE))+geom_density(fill="steelblue")
ggplot()+geom_density(data = House,mapping = aes(SALES_PRICE),colour="red")+
geom_density(data = BHK_2,mapping = aes(SALES_PRICE),colour="blue")+
geom_density(data = BHK_3,mapping = aes(SALES_PRICE),colour="green")
library("car")
boxplot.stats(House$SALES_PRICE)$out
## numeric(0)
boxplot.stats(BHK_2$SALES_PRICE)$out
## [1] 2156875
boxplot.stats(BHK_3$SALES_PRICE)$out
## numeric(0)
#1 AREA
levels(House$AREA)
## [1] "Adyar" "Anna Nagar" "Chrompet" "Karapakkam" "KK Nagar"
## [6] "T Nagar" "Velachery"
table(House$AREA)
##
## Adyar Anna Nagar Chrompet Karapakkam KK Nagar T Nagar
## 263 276 591 486 348 156
## Velachery
## 324
ggplot(data = House,mapping = aes(x=AREA,fill=AREA))+geom_bar()
aggr1=aggregate(SALES_PRICE~AREA,data = House,FUN = mean,na.rm=T)
aggr1$SALES_PRICE
## [1] 7973683 12405999 8553965 6266440 10666748 12664535 9036630
aggr1$SALES_PRICE=as.factor(aggr1$SALES_PRICE)
aggr1$SALES_PRICE=as.numeric(as.character(aggr1$SALES_PRICE))
aggr1$AREA
## [1] Adyar Anna Nagar Chrompet Karapakkam KK Nagar T Nagar
## [7] Velachery
## 7 Levels: Adyar Anna Nagar Chrompet Karapakkam KK Nagar ... Velachery
class(aggr1$SALES_PRICE)
## [1] "numeric"
library("ggplot2")
ggplot(data = House,mapping = aes(x=AREA,fill=AREA))+geom_bar()
ggplot(data = House,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr1,
aes(x=AREA,y=mean(SALES_PRICE),label=round(aggr1$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+geom_boxplot()
levels(BHK_2$AREA)
## [1] "Adyar" "Anna Nagar" "Chrompet" "Karapakkam" "KK Nagar"
## [6] "T Nagar" "Velachery"
table(BHK_2$AREA)
##
## Adyar Anna Nagar Chrompet Karapakkam KK Nagar T Nagar
## 264 260 549 447 327 167
## Velachery
## 322
ggplot(data = BHK_2,mapping = aes(x=AREA,fill=AREA))+geom_bar()
aggr2=aggregate(SALES_PRICE~AREA,data = BHK_2,FUN = mean,na.rm=T)
aggr2$SALES_PRICE
## [1] 8209260 13473524 9154363 6737757 11634699 13575771 9726395
aggr2$SALES_PRICE=as.factor(aggr2$SALES_PRICE)
aggr2$SALES_PRICE=as.numeric(as.character(aggr2$SALES_PRICE))
aggr2$AREA
## [1] Adyar Anna Nagar Chrompet Karapakkam KK Nagar T Nagar
## [7] Velachery
## 7 Levels: Adyar Anna Nagar Chrompet Karapakkam KK Nagar ... Velachery
class(aggr2$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(x=AREA,fill=AREA))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr2,
aes(x=AREA,y=mean(SALES_PRICE),label=round(aggr2$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+geom_boxplot()
levels(BHK_3$AREA)
## [1] "Adyar" "Anna Nagar" "Chrompet" "Karapakkam" "KK Nagar"
## [6] "T Nagar" "Velachery"
table(BHK_3$AREA)
##
## Adyar Anna Nagar Chrompet Karapakkam KK Nagar T Nagar
## 247 252 562 433 322 178
## Velachery
## 335
ggplot(data = BHK_3,mapping = aes(x=AREA,fill=AREA))+geom_bar()
aggr3=aggregate(SALES_PRICE~AREA,data = BHK_3,FUN = mean,na.rm=T)
aggr3$SALES_PRICE
## [1] 11519773 19941948 12388551 9166997 15967594 20118915 14261749
aggr3$SALES_PRICE=as.factor(aggr3$SALES_PRICE)
aggr3$SALES_PRICE=as.numeric(as.character(aggr3$SALES_PRICE))
aggr3$AREA
## [1] Adyar Anna Nagar Chrompet Karapakkam KK Nagar T Nagar
## [7] Velachery
## 7 Levels: Adyar Anna Nagar Chrompet Karapakkam KK Nagar ... Velachery
class(aggr3$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(x=AREA,fill=AREA))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr3,
aes(x=AREA,y=mean(SALES_PRICE),label=round(aggr3$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+geom_boxplot()
#2 INT_SQFT
ggplot(data = House,mapping = aes(INT_SQFT))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = House,mapping = aes(x=INT_SQFT,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(House$INT_SQFT,House$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: House$INT_SQFT and House$SALES_PRICE
## t = 49.022, df = 2442, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6837107 0.7237090
## sample estimates:
## cor
## 0.7042683
boxplot.stats(House$INT_SQFT)$out
## numeric(0)
summary(House$INT_SQFT)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 500.0 987.8 1366.5 1377.7 1733.0 2500.0
ggplot(data = BHK_2,mapping = aes(INT_SQFT))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = BHK_2,mapping = aes(x=INT_SQFT,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(BHK_2$INT_SQFT,BHK_2$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: BHK_2$INT_SQFT and BHK_2$SALES_PRICE
## t = 53.881, df = 2334, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7258993 0.7620809
## sample estimates:
## cor
## 0.7445364
boxplot.stats(BHK_2$INT_SQFT)$out
## numeric(0)
summary(BHK_2$INT_SQFT)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 500 996 1353 1381 1749 2499
ggplot(data = BHK_3,mapping = aes(INT_SQFT))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = BHK_3,mapping = aes(x=INT_SQFT,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(BHK_3$INT_SQFT,BHK_3$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: BHK_3$INT_SQFT and BHK_3$SALES_PRICE
## t = 60.981, df = 2327, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7681326 0.7994314
## sample estimates:
## cor
## 0.7842805
boxplot.stats(BHK_3$INT_SQFT)$out
## numeric(0)
summary(BHK_3$INT_SQFT)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 500 1000 1414 1388 1749 2498
bed0=House[House$N_BEDROOM==1,]
bed1=House[House$N_BEDROOM==2,]
bed2=House[House$N_BEDROOM==3,]
bed3=House[House$N_BEDROOM==4,]
ggplot(data = bed1,mapping = aes(x=INT_SQFT,y=SALES_PRICE))+
geom_point()
#3 DIST_MAINROAD
ggplot(data = House,mapping = aes(DIST_MAINROAD,fill=SALES_PRICE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = House,mapping = aes(x=DIST_MAINROAD,y=SALES_PRICE))+geom_point(color='darkblue')
boxplot.stats(House$DIST_MAINROAD)$out
## numeric(0)
summary(House$DIST_MAINROAD)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 47.75 96.50 98.06 149.00 200.00
ggplot(data = BHK_2,mapping = aes(DIST_MAINROAD,fill=SALES_PRICE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = BHK_2,mapping = aes(x=DIST_MAINROAD,y=SALES_PRICE))+geom_point(color='darkblue')
boxplot.stats(BHK_2$DIST_MAINROAD)$out
## numeric(0)
summary(BHK_2$DIST_MAINROAD)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 51.0 100.0 99.4 147.0 200.0
ggplot(data = BHK_3,mapping = aes(DIST_MAINROAD,fill=SALES_PRICE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = BHK_3,mapping = aes(x=DIST_MAINROAD,y=SALES_PRICE))+geom_point(color='darkblue')
boxplot.stats(BHK_3$DIST_MAINROAD)$out
## numeric(0)
summary(BHK_3$DIST_MAINROAD)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 52.0 102.0 101.4 148.0 200.0
#4 N_BEDROOM
aggr4=aggregate(SALES_PRICE~N_BEDROOM,data = House,FUN = mean,na.rm=T)
aggr4$SALES_PRICE
## [1] 8286822 9702640 10269939 12139722
aggr4$SALES_PRICE=as.factor(aggr4$SALES_PRICE)
aggr4$SALES_PRICE=as.numeric(as.character(aggr4$SALES_PRICE))
aggr4$N_BEDROOM
## [1] 1 2 3 4
## Levels: 1 2 3 4
class(aggr4$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(N_BEDROOM,fill=N_BEDROOM))+geom_bar()
ggplot(data = House,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr4,
aes(x=N_BEDROOM,y=mean(SALES_PRICE),label=round(aggr4$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+geom_boxplot()
aggr5=aggregate(SALES_PRICE~N_BEDROOM,data = BHK_2,FUN = mean,na.rm=T)
aggr5$SALES_PRICE
## [1] 8862535 10607800 11087235 13231643
aggr5$SALES_PRICE=as.factor(aggr5$SALES_PRICE)
aggr5$SALES_PRICE=as.numeric(as.character(aggr5$SALES_PRICE))
aggr5$N_BEDROOM
## [1] 1 2 3 4
## Levels: 1 2 3 4
class(aggr5$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(N_BEDROOM,fill=N_BEDROOM))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr5,
aes(x=N_BEDROOM,y=mean(SALES_PRICE),label=round(aggr5$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+geom_boxplot()
aggr6=aggregate(SALES_PRICE~N_BEDROOM,data = BHK_3,FUN = mean,na.rm=T)
aggr6$SALES_PRICE
## [1] 12321601 15192929 16015603 18775575
aggr6$SALES_PRICE=as.factor(aggr6$SALES_PRICE)
aggr6$SALES_PRICE=as.numeric(as.character(aggr6$SALES_PRICE))
aggr6$N_BEDROOM
## [1] 1 2 3 4
## Levels: 1 2 3 4
class(aggr6$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(N_BEDROOM,fill=N_BEDROOM))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr6,
aes(x=N_BEDROOM,y=mean(SALES_PRICE),label=round(aggr6$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+geom_boxplot()
bed0=House[House$N_BEDROOM==1,]
bed1=House[House$N_BEDROOM==2,]
bed2=House[House$N_BEDROOM==3,]
bed3=House[House$N_BEDROOM==4,]
ggplot(data = bed1,mapping = aes(x=AREA,fill=AREA))+geom_bar()
ggplot(data = bed2,mapping = aes(x=AREA,fill=AREA))+geom_bar()
ggplot(data = bed3,mapping = aes(x=AREA,fill=AREA))+geom_bar()
ggplot(data = bed1,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+
geom_bar(stat="identity",position = "dodge")
bed4=BHK_2[BHK_2$N_BEDROOM==1,]
bed5=BHK_2[BHK_2$N_BEDROOM==2,]
bed6=BHK_2[BHK_2$N_BEDROOM==3,]
bed7=BHK_2[BHK_2$N_BEDROOM==4,]
ggplot(data = bed4,mapping = aes(x=AREA,fill=AREA))+geom_bar()
#5 N_BATHROOM
aggr7=aggregate(SALES_PRICE~N_BATHROOM,data = House,FUN = mean,na.rm=T)
aggr7$SALES_PRICE
## [1] 8948484 9621536
aggr7$SALES_PRICE=as.factor(aggr7$SALES_PRICE)
aggr7$SALES_PRICE=as.numeric(as.character(aggr7$SALES_PRICE))
aggr7$N_BATHROOM
## [1] 1 2
## Levels: 1 2
class(aggr7$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(N_BATHROOM,fill=N_BATHROOM))+geom_bar()
ggplot(data = House,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr7,
aes(x=N_BATHROOM,y=mean(SALES_PRICE),label=round(aggr7$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+geom_boxplot()
bath1=House[House$N_BATHROOM==1,]
bath2=House[House$N_BATHROOM==2,]
ggplot(data = bath2,mapping = aes(AREA,fill=AREA))+geom_bar()
aggr8=aggregate(SALES_PRICE~N_BATHROOM,data = BHK_2,FUN = mean,na.rm=T)
aggr8$SALES_PRICE
## [1] 9588267 10655400
aggr8$SALES_PRICE=as.factor(aggr8$SALES_PRICE)
aggr8$SALES_PRICE=as.numeric(as.character(aggr8$SALES_PRICE))
aggr8$N_BATHROOM
## [1] 1 2
## Levels: 1 2
class(aggr8$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(N_BATHROOM,fill=N_BATHROOM))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr8,
aes(x=N_BATHROOM,y=mean(SALES_PRICE),label=round(aggr8$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+geom_boxplot()
aggr9=aggregate(SALES_PRICE~N_BATHROOM,data = BHK_3,FUN = mean,na.rm=T)
aggr9$SALES_PRICE
## [1] 13569649 15002305
aggr9$SALES_PRICE=as.factor(aggr9$SALES_PRICE)
aggr9$SALES_PRICE=as.numeric(as.character(aggr9$SALES_PRICE))
aggr9$N_BATHROOM
## [1] 1 2
## Levels: 1 2
class(aggr9$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(N_BATHROOM,fill=N_BATHROOM))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr9,
aes(x=N_BATHROOM,y=mean(SALES_PRICE),label=round(aggr9$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+geom_boxplot()
#6 N_ROOMS
aggr10=aggregate(SALES_PRICE~N_ROOM,data = House,FUN = mean,na.rm=T)
aggr10$SALES_PRICE
## [1] 6209357 7966188 9633542 11535305 12139722
aggr10$SALES_PRICE=as.factor(aggr10$SALES_PRICE)
aggr10$SALES_PRICE=as.numeric(as.character(aggr10$SALES_PRICE))
aggr10$N_ROOM
## [1] 2 3 4 5 6
## Levels: 2 3 4 5 6
class(aggr10$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(N_ROOM,fill=N_ROOM))+geom_bar()
ggplot(data = House,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr10,
aes(x=N_ROOM,y=mean(SALES_PRICE),label=round(aggr10$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+geom_boxplot()
room1=House[House$N_ROOM==2,]
room2=House[House$N_ROOM==3,]
room3=House[House$N_ROOM==4,]
room4=House[House$N_ROOM==5,]
room5=House[House$N_ROOM==6,]
ggplot(data = room1,mapping = aes(AREA,fill=AREA))+geom_bar()
ggplot(data = room2,mapping = aes(AREA,fill=AREA))+geom_bar()
ggplot(data = room3,mapping = aes(AREA,fill=AREA))+geom_bar()
ggplot(data = room4,mapping = aes(AREA,fill=AREA))+geom_bar()
ggplot(data = room5,mapping = aes(AREA,fill=AREA))+geom_bar()
aggr11=aggregate(SALES_PRICE~N_ROOM,data = BHK_2,FUN = mean,na.rm=T)
aggr11$SALES_PRICE
## [1] 6481595 8390607 10592620 12445687 13231643
aggr11$SALES_PRICE=as.factor(aggr11$SALES_PRICE)
aggr11$SALES_PRICE=as.numeric(as.character(aggr11$SALES_PRICE))
aggr11$N_ROOM
## [1] 2 3 4 5 6
## Levels: 2 3 4 5 6
class(aggr11$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(N_ROOM,fill=N_ROOM))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr11,
aes(x=N_ROOM,y=mean(SALES_PRICE),label=round(aggr11$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+geom_boxplot()
aggr12=aggregate(SALES_PRICE~N_ROOM,data = BHK_3,FUN = mean,na.rm=T)
aggr12$SALES_PRICE
## [1] 8471806 11523821 14956904 18328314 18775575
aggr12$SALES_PRICE=as.factor(aggr12$SALES_PRICE)
aggr12$SALES_PRICE=as.numeric(as.character(aggr12$SALES_PRICE))
aggr12$N_ROOM
## [1] 2 3 4 5 6
## Levels: 2 3 4 5 6
class(aggr12$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(N_ROOM,fill=N_ROOM))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr12,
aes(x=N_ROOM,y=mean(SALES_PRICE),label=round(aggr12$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+geom_boxplot()
#7 SALE_COND
aggr13=aggregate(SALES_PRICE~SALE_COND,data = House,FUN = mean,na.rm=T)
aggr13$SALES_PRICE
## [1] 8975728 9479419 9050138 9125947 8861459
aggr13$SALES_PRICE=as.factor(aggr13$SALES_PRICE)
aggr13$SALES_PRICE=as.numeric(as.character(aggr13$SALES_PRICE))
aggr13$SALE_COND
## [1] AbNormal AdjLand Family Normal Sale Partial
## Levels: AbNormal AdjLand Family Normal Sale Partial
class(aggr13$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(SALE_COND,fill=SALE_COND))+geom_bar()
ggplot(data = House,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr13,
aes(x=SALE_COND,y=mean(SALES_PRICE),label=round(aggr13$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+geom_boxplot()
aggr14=aggregate(SALES_PRICE~SALE_COND,data = BHK_2,FUN = mean,na.rm=T)
aggr14$SALES_PRICE
## [1] 9893904 10113500 9527528 9807603 9678647
aggr14$SALES_PRICE=as.factor(aggr14$SALES_PRICE)
aggr14$SALES_PRICE=as.numeric(as.character(aggr14$SALES_PRICE))
aggr14$SALE_COND
## [1] AbNormal AdjLand Family Normal Sale Partial
## Levels: AbNormal AdjLand Family Normal Sale Partial
class(aggr14$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(SALE_COND,fill=SALE_COND))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr14,
aes(x=SALE_COND,y=mean(SALES_PRICE),label=round(aggr14$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+geom_boxplot()
aggr15=aggregate(SALES_PRICE~SALE_COND,data = BHK_3,FUN = mean,na.rm=T)
aggr15$SALES_PRICE
## [1] 13956477 14276746 13632057 14056235 13436253
aggr15$SALES_PRICE=as.factor(aggr15$SALES_PRICE)
aggr15$SALES_PRICE=as.numeric(as.character(aggr15$SALES_PRICE))
aggr15$SALE_COND
## [1] AbNormal AdjLand Family Normal Sale Partial
## Levels: AbNormal AdjLand Family Normal Sale Partial
class(aggr15$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(SALE_COND,fill=SALE_COND))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr15,
aes(x=SALE_COND,y=mean(SALES_PRICE),label=round(aggr15$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+geom_boxplot()
#8 PARK_FACIL
aggr16=aggregate(SALES_PRICE~PARK_FACIL,data = House,FUN = mean,na.rm=T)
aggr16$SALES_PRICE
## [1] 8598630 9604000
aggr16$SALES_PRICE=as.factor(aggr16$SALES_PRICE)
aggr16$SALES_PRICE=as.numeric(as.character(aggr16$SALES_PRICE))
aggr16$PARK_FACIL
## [1] No Yes
## Levels: No Yes
class(aggr16$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(PARK_FACIL,fill=PARK_FACIL))+geom_bar()
ggplot(data = House,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr16,
aes(x=PARK_FACIL,y=mean(SALES_PRICE),label=round(aggr16$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+geom_boxplot()
aggr17=aggregate(SALES_PRICE~PARK_FACIL,data = BHK_2,FUN = mean,na.rm=T)
aggr17$SALES_PRICE
## [1] 9340046 10268005
aggr17$SALES_PRICE=as.factor(aggr17$SALES_PRICE)
aggr17$SALES_PRICE=as.numeric(as.character(aggr17$SALES_PRICE))
aggr17$PARK_FACIL
## [1] No Yes
## Levels: No Yes
class(aggr17$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(PARK_FACIL,fill=PARK_FACIL))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr17,
aes(x=PARK_FACIL,y=mean(SALES_PRICE),label=round(aggr17$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+geom_boxplot()
aggr18=aggregate(SALES_PRICE~PARK_FACIL,data = BHK_3,FUN = mean,na.rm=T)
aggr18$SALES_PRICE
## [1] 13235672 14473701
aggr18$SALES_PRICE=as.factor(aggr18$SALES_PRICE)
aggr18$SALES_PRICE=as.numeric(as.character(aggr18$SALES_PRICE))
aggr18$PARK_FACIL
## [1] No Yes
## Levels: No Yes
class(aggr18$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(PARK_FACIL,fill=PARK_FACIL))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr18,
aes(x=PARK_FACIL,y=mean(SALES_PRICE),label=round(aggr18$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+geom_boxplot()
#9 UTILITY_AVAIL
aggr19=aggregate(SALES_PRICE~UTILITY_AVAIL,data = House,FUN = mean,na.rm=T)
aggr19$SALES_PRICE
## [1] 9248229 8706142 9203174 9174266
aggr19$SALES_PRICE=as.factor(aggr19$SALES_PRICE)
aggr19$SALES_PRICE=as.numeric(as.character(aggr19$SALES_PRICE))
aggr19$UTILITY_AVAIL
## [1] AllPub ELO NoSeWa NoSewr
## Levels: AllPub ELO NoSeWa NoSewr
nlevels(aggr19$UTILITY_AVAIL)
## [1] 4
class(aggr19$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(UTILITY_AVAIL,fill=UTILITY_AVAIL))+geom_bar()
ggplot(data = House,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr19,
aes(x=UTILITY_AVAIL,y=mean(SALES_PRICE),label=round(aggr19$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+geom_boxplot()
aggr20=aggregate(SALES_PRICE~UTILITY_AVAIL,data = BHK_2,FUN = mean,na.rm=T)
aggr20$SALES_PRICE
## [1] 10029917 9370981 9727401 10061315
aggr20$SALES_PRICE=as.factor(aggr20$SALES_PRICE)
aggr20$SALES_PRICE=as.numeric(as.character(aggr20$SALES_PRICE))
aggr20$UTILITY_AVAIL
## [1] AllPub ELO NoSeWa NoSewr
## Levels: AllPub ELO NoSeWa NoSewr
nlevels(aggr20$UTILITY_AVAIL)
## [1] 4
class(aggr20$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(UTILITY_AVAIL,fill=UTILITY_AVAIL))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr20,
aes(x=UTILITY_AVAIL,y=mean(SALES_PRICE),label=round(aggr20$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+geom_boxplot()
aggr21=aggregate(SALES_PRICE~UTILITY_AVAIL,data = BHK_3,FUN = mean,na.rm=T)
aggr21$SALES_PRICE
## [1] 14181164 13689766 13777333 13762990
aggr21$SALES_PRICE=as.factor(aggr21$SALES_PRICE)
aggr21$SALES_PRICE=as.numeric(as.character(aggr21$SALES_PRICE))
aggr21$UTILITY_AVAIL
## [1] AllPub ELO NoSeWa NoSewr
## Levels: AllPub ELO NoSeWa NoSewr
nlevels(aggr21$UTILITY_AVAIL)
## [1] 4
class(aggr21$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(UTILITY_AVAIL,fill=UTILITY_AVAIL))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr21,
aes(x=UTILITY_AVAIL,y=mean(SALES_PRICE),label=round(aggr21$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+geom_boxplot()
#10 STREET
aggr22=aggregate(SALES_PRICE~STREET,data = House,FUN = mean,na.rm=T)
aggr22$SALES_PRICE
## [1] 9683562 8259834 9162599
aggr22$SALES_PRICE=as.factor(aggr22$SALES_PRICE)
aggr22$SALES_PRICE=as.numeric(as.character(aggr22$SALES_PRICE))
aggr22$STREET
## [1] Gravel No Access Paved
## Levels: Gravel No Access Paved
nlevels(aggr22$STREET)
## [1] 3
class(aggr22$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(STREET,fill=STREET))+geom_bar()
ggplot(data = House,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr22,
aes(x=STREET,y=mean(SALES_PRICE),label=round(aggr22$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+geom_boxplot()
aggr23=aggregate(SALES_PRICE~STREET,data = BHK_2,FUN = mean,na.rm=T)
aggr23$SALES_PRICE
## [1] 10347012 9019227 9897742
aggr23$SALES_PRICE=as.factor(aggr23$SALES_PRICE)
aggr23$SALES_PRICE=as.numeric(as.character(aggr23$SALES_PRICE))
aggr23$STREET
## [1] Gravel No Access Paved
## Levels: Gravel No Access Paved
nlevels(aggr23$STREET)
## [1] 3
class(aggr23$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(STREET,fill=STREET))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr23,
aes(x=STREET,y=mean(SALES_PRICE),label=round(aggr23$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+geom_boxplot()
aggr24=aggregate(SALES_PRICE~STREET,data = BHK_3,FUN = mean,na.rm=T)
aggr24$SALES_PRICE
## [1] 14604557 12808119 14008754
aggr24$SALES_PRICE=as.factor(aggr24$SALES_PRICE)
aggr24$SALES_PRICE=as.numeric(as.character(aggr24$SALES_PRICE))
aggr24$STREET
## [1] Gravel No Access Paved
## Levels: Gravel No Access Paved
nlevels(aggr24$STREET)
## [1] 3
class(aggr24$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(STREET,fill=STREET))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr24,
aes(x=STREET,y=mean(SALES_PRICE),label=round(aggr24$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+geom_boxplot()
#11 MZZONE
aggr25=aggregate(SALES_PRICE~MZZONE,data = House,FUN = mean,na.rm=T)
aggr25$SALES_PRICE
## [1] 5949577 6464620 7153590 9159750 9908756 10447506
aggr25$SALES_PRICE=as.factor(aggr25$SALES_PRICE)
aggr25$SALES_PRICE=as.numeric(as.character(aggr25$SALES_PRICE))
aggr25$MZZONE
## [1] A C I RH RL RM
## Levels: A C I RH RL RM
nlevels(aggr25$MZZONE)
## [1] 6
class(aggr25$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(MZZONE,fill=MZZONE))+geom_bar()
ggplot(data = House,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr25,
aes(x=MZZONE,y=mean(SALES_PRICE),label=round(aggr25$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+geom_boxplot()
ggplot(data = House,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+geom_boxplot()
aggr26=aggregate(SALES_PRICE~MZZONE,data = BHK_2,FUN = mean,na.rm=T)
aggr26$SALES_PRICE
## [1] 6649765 7257514 7686996 9975170 10675261 11211142
aggr26$SALES_PRICE=as.factor(aggr26$SALES_PRICE)
aggr26$SALES_PRICE=as.numeric(as.character(aggr26$SALES_PRICE))
aggr26$MZZONE
## [1] A C I RH RL RM
## Levels: A C I RH RL RM
nlevels(aggr26$MZZONE)
## [1] 6
class(aggr26$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(MZZONE,fill=MZZONE))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr26,
aes(x=MZZONE,y=mean(SALES_PRICE),label=round(aggr26$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+geom_boxplot()
aggr27=aggregate(SALES_PRICE~MZZONE,data = BHK_3,FUN = mean,na.rm=T)
aggr27$SALES_PRICE
## [1] 9748694 10506115 11164411 13985734 14932087 15578229
aggr27$SALES_PRICE=as.factor(aggr27$SALES_PRICE)
aggr27$SALES_PRICE=as.numeric(as.character(aggr27$SALES_PRICE))
aggr27$MZZONE
## [1] A C I RH RL RM
## Levels: A C I RH RL RM
nlevels(aggr27$MZZONE)
## [1] 6
class(aggr27$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(MZZONE,fill=MZZONE))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr27,
aes(x=MZZONE,y=mean(SALES_PRICE),label=round(aggr27$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+geom_boxplot()
#12 QS_ROOM
aggr28=aggregate(SALES_PRICE~QS_ROOMS,data = House,FUN = mean,na.rm=T)
aggr28$SALES_PRICE
## [1] 9058257 8980598 9184492 9223161
aggr28$SALES_PRICE=as.factor(aggr28$SALES_PRICE)
aggr28$SALES_PRICE=as.numeric(as.character(aggr28$SALES_PRICE))
aggr28$QS_ROOMS
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr28$QS_ROOMS)
## [1] 4
class(aggr28$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(QS_ROOMS,fill=QS_ROOMS))+geom_bar()
ggplot(data = House,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr28,
aes(x=QS_ROOMS,y=mean(SALES_PRICE),label=round(aggr28$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+geom_boxplot()
aggr29=aggregate(SALES_PRICE~QS_ROOMS,data = BHK_2,FUN = mean,na.rm=T)
aggr29$SALES_PRICE
## [1] 9714444 9717054 9816789 10019719
aggr29$SALES_PRICE=as.factor(aggr29$SALES_PRICE)
aggr29$SALES_PRICE=as.numeric(as.character(aggr29$SALES_PRICE))
aggr29$QS_ROOMS
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr29$QS_ROOMS)
## [1] 4
class(aggr29$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(QS_ROOMS,fill=QS_ROOMS))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr29,
aes(x=QS_ROOMS,y=mean(SALES_PRICE),label=round(aggr29$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+geom_boxplot()
aggr30=aggregate(SALES_PRICE~QS_ROOMS,data = BHK_3,FUN = mean,na.rm=T)
aggr30$SALES_PRICE
## [1] 13948484 13806011 13980219 13763028
aggr30$SALES_PRICE=as.factor(aggr30$SALES_PRICE)
aggr30$SALES_PRICE=as.numeric(as.character(aggr30$SALES_PRICE))
aggr30$QS_ROOMS
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr30$QS_ROOMS)
## [1] 4
class(aggr30$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(QS_ROOMS,fill=QS_ROOMS))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr30,
aes(x=QS_ROOMS,y=mean(SALES_PRICE),label=round(aggr30$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+geom_boxplot()
# 13 QS_BATHROOM
aggr31=aggregate(SALES_PRICE~QS_BATHROOM,data = House,FUN = mean,na.rm=T)
aggr31$SALES_PRICE
## [1] 8942011 9151377 9046837 9190674
aggr31$SALES_PRICE=as.factor(aggr31$SALES_PRICE)
aggr31$SALES_PRICE=as.numeric(as.character(aggr31$SALES_PRICE))
aggr31$QS_BATHROOM
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr31$QS_BATHROOM)
## [1] 4
class(aggr31$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(QS_BATHROOM,fill=QS_BATHROOM))+geom_bar()
ggplot(data = House,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr31,
aes(x=QS_BATHROOM,y=mean(SALES_PRICE),label=round(aggr31$SALES_PRICE)))
ggplot(data = House,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+geom_boxplot()
aggr32=aggregate(SALES_PRICE~QS_BATHROOM,data = BHK_2,FUN = mean,na.rm=T)
aggr32$SALES_PRICE
## [1] 9795560 9909563 9748090 9728837
aggr32$SALES_PRICE=as.factor(aggr32$SALES_PRICE)
aggr32$SALES_PRICE=as.numeric(as.character(aggr32$SALES_PRICE))
aggr32$QS_BATHROOM
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr32$QS_BATHROOM)
## [1] 4
class(aggr32$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(QS_BATHROOM,fill=QS_BATHROOM))+geom_bar()
ggplot(data = BHK_2,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr32,
aes(x=QS_BATHROOM,y=mean(SALES_PRICE),label=round(aggr32$SALES_PRICE)))
ggplot(data = BHK_2,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+geom_boxplot()
aggr33=aggregate(SALES_PRICE~QS_BATHROOM,data = BHK_3,FUN = mean,na.rm=T)
aggr33$SALES_PRICE
## [1] 13945551 14013604 13665760 13851392
aggr33$SALES_PRICE=as.factor(aggr33$SALES_PRICE)
aggr33$SALES_PRICE=as.numeric(as.character(aggr33$SALES_PRICE))
aggr33$QS_BATHROOM
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr33$QS_BATHROOM)
## [1] 4
class(aggr33$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(QS_BATHROOM,fill=QS_BATHROOM))+geom_bar()
ggplot(data = BHK_3,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+
geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr33,
aes(x=QS_BATHROOM,y=mean(SALES_PRICE),label=round(aggr33$SALES_PRICE)))
ggplot(data = BHK_3,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+geom_boxplot()
#15 Houseage
ggplot(data = House,mapping = aes(houseage))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = House,mapping = aes(x=houseage,y=SALES_PRICE))+geom_point(color='darkblue')
#16 REG_FEE
ggplot(data = House,mapping = aes(REG_FEE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = House,mapping = aes(x=REG_FEE,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(House$INT_SQFT,House$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: House$INT_SQFT and House$SALES_PRICE
## t = 49.022, df = 2442, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6837107 0.7237090
## sample estimates:
## cor
## 0.7042683
boxplot.stats(House$REG_FEE)$out
## [1] 593864 581942 591965 600726 581744 621684 584890 596329 641170 609839
## [11] 699527 620324 612298 621279 619312 609732 574427 622769 576634 651668
## [21] 679450
ggplot(data = BHK_2,mapping = aes(REG_FEE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
You can also embed plots, for example:
ggplot(data = BHK_2,mapping = aes(x=REG_FEE,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(BHK_2$INT_SQFT,BHK_2$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: BHK_2$INT_SQFT and BHK_2$SALES_PRICE
## t = 53.881, df = 2334, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7258993 0.7620809
## sample estimates:
## cor
## 0.7445364
boxplot.stats(BHK_2$REG_FEE)$out
## [1] 679249 652659 645265 675762 637030 664728 690074 696797 680743 670804
## [11] 718693 647641 678626 695118 691429 649726 636349 630298
ggplot(data = BHK_3,mapping = aes(REG_FEE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = BHK_3,mapping = aes(x=REG_FEE,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(BHK_3$INT_SQFT,BHK_3$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: BHK_3$INT_SQFT and BHK_3$SALES_PRICE
## t = 60.981, df = 2327, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7681326 0.7994314
## sample estimates:
## cor
## 0.7842805
boxplot.stats(BHK_3$REG_FEE)$out
## [1] 917550 923026 963029 913666 940813 941567 909160 983922 931224 947124
## [11] 929714 904779 952411 936314 942859 981117
#17 COMMIS
ggplot(data = House,mapping = aes(COMMIS))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = House,mapping = aes(x=COMMIS,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(House$COMMIS,House$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: House$COMMIS and House$SALES_PRICE
## t = 31.664, df = 2442, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5107821 0.5670254
## sample estimates:
## cor
## 0.5395053
boxplot.stats(House$COMMIS)$out
## [1] 284560 285318 299188 283893 308495 289172 306149 309656 304670 284544
## [11] 305086 303563 313231 286763 304759 318205 287802 286822 319085 284351
## [21] 301789 353880
ggplot(data = BHK_2,mapping = aes(COMMIS))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = BHK_2,mapping = aes(x=COMMIS,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(BHK_2$COMMIS,BHK_2$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: BHK_2$COMMIS and BHK_2$SALES_PRICE
## t = 31.138, df = 2334, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5124544 0.5697878
## sample estimates:
## cor
## 0.5417509
boxplot.stats(BHK_2$COMMIS)$out
## [1] 313674 304011 343858 304556 301790 334948 312407 318615 339749 312347
## [11] 317913 311007 333425 313993 359347 328306 318539 313966 301723 310315
## [21] 318936
ggplot(data = BHK_3,mapping = aes(COMMIS))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = BHK_3,mapping = aes(x=COMMIS,y=SALES_PRICE))+geom_point(color='darkblue')
cor.test(BHK_3$COMMIS,BHK_3$SALES_PRICE)
##
## Pearson's product-moment correlation
##
## data: BHK_3$COMMIS and BHK_3$SALES_PRICE
## t = 33.563, df = 2327, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5431129 0.5978782
## sample estimates:
## cor
## 0.5711308
boxplot.stats(BHK_3$COMMIS)$out
## [1] 471247 456247 495405 421452 443349 442485 418432 466156 422406 455034
## [11] 470784 416414 469920 427494 463218 427485 424072 491961 479297 424557
## [21] 433936 481001 437319 426508 433326 419917 485924 416606 444749 429011
## [31] 420340 475795 451314 415659
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.