R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

gr_train=read.csv("D:/Great Lakes Sales price/gr_train.csv",na.strings = c(" ","","NA"))
head(gr_train)
##   PRT_ID       AREA INT_SQFT  DATE_SALE DIST_MAINROAD N_BEDROOM N_BATHROOM
## 1 P03210 Karapakkam     1004 04-05-2011           131         1          1
## 2 P09411 Anna Nagar     1986 19-12-2006            26         2          1
## 3 P01812      Adyar      909 04-02-2012            70         1          1
## 4 P05346  Velachery     1855 13-03-2010            14         3          2
## 5 P06210 Karapakkam     1226 05-10-2009            84         1          1
## 6 P00219   Chrompet     1220 11-09-2014            36         2          1
##   N_ROOM SALE_COND PARK_FACIL DATE_BUILD  BUILDTYPE UTILITY_AVAIL
## 1      3  AbNormal        Yes 15-05-1967 Commercial        AllPub
## 2      5  AbNormal         No 22-12-1995 Commercial        AllPub
## 3      3  AbNormal        Yes 09-02-1992 Commercial           ELO
## 4      5    Family         No 18-03-1988     Others       NoSewr 
## 5      3  AbNormal        Yes 13-10-1979     Others        AllPub
## 6      4   Partial         No 12-09-2009 Commercial        NoSeWa
##      STREET MZZONE QS_ROOMS QS_BATHROOM QS_BEDROOM QS_OVERALL REG_FEE
## 1     Paved      A      4.0         3.9        4.9      4.330  380000
## 2    Gravel     RH      4.9         4.2        2.5      3.765  760122
## 3    Gravel     RL      4.1         3.8        2.2      3.090  421094
## 4     Paved      I      4.7         3.9        3.6      4.010  356321
## 5    Gravel      C      3.0         2.5        4.1      3.290  237000
## 6 No Access     RH      4.5         2.6        3.1      3.320  409027
##   COMMIS SALES_PRICE
## 1 144400     7600000
## 2 304049    21717770
## 3  92114    13159200
## 4  77042     9630290
## 5  74063     7406250
## 6 198316    12394750
dim(gr_train)
## [1] 7109   22
summary(gr_train)
##      PRT_ID             AREA         INT_SQFT         DATE_SALE   
##  P00001 :   1   Chrompet  :1681   Min.   : 500   06-10-2009:  12  
##  P00002 :   1   Karapakkam:1363   1st Qu.: 993   06-01-2009:  10  
##  P00004 :   1   KK Nagar  : 996   Median :1373   12-04-2011:  10  
##  P00005 :   1   Velachery : 979   Mean   :1382   15-03-2012:  10  
##  P00006 :   1   Anna Nagar: 783   3rd Qu.:1744   17-11-2010:  10  
##  P00007 :   1   Adyar     : 773   Max.   :2500   26-02-2012:  10  
##  (Other):7103   (Other)   : 534                  (Other)   :7047  
##  DIST_MAINROAD     N_BEDROOM       N_BATHROOM        N_ROOM     
##  Min.   :  0.0   Min.   :1.000   Min.   :1.000   Min.   :2.000  
##  1st Qu.: 50.0   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:3.000  
##  Median : 99.0   Median :1.000   Median :1.000   Median :4.000  
##  Mean   : 99.6   Mean   :1.637   Mean   :1.213   Mean   :3.689  
##  3rd Qu.:148.0   3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:4.000  
##  Max.   :200.0   Max.   :4.000   Max.   :2.000   Max.   :6.000  
##                  NA's   :1       NA's   :5                      
##        SALE_COND    PARK_FACIL      DATE_BUILD        BUILDTYPE   
##  AdjLand    :1433   No :3520   02-07-1987:   6   Comercial :   4  
##  Partial    :1429   Noo:   2   04-04-1999:   5   Commercial:2325  
##  Normal Sale:1423   Yes:3587   02-10-1990:   4   House     :2444  
##  AbNormal   :1406              02-12-1982:   4   Other     :  26  
##  Family     :1403              03-01-1979:   4   Others    :2310  
##  Adj Land   :   6              03-10-1999:   4                    
##  (Other)    :   9              (Other)   :7082                    
##  UTILITY_AVAIL        STREET     MZZONE       QS_ROOMS      QS_BATHROOM   
##  All Pub:   1   Gravel   :2520   A : 537   Min.   :2.000   Min.   :2.000  
##  AllPub :1886   No Access:2010   C : 550   1st Qu.:2.700   1st Qu.:2.700  
##  ELO    :1522   NoAccess :   7   I : 525   Median :3.500   Median :3.500  
##  NoSeWa :1871   Pavd     :  12   RH:1822   Mean   :3.517   Mean   :3.507  
##  NoSewr :1829   Paved    :2560   RL:1858   3rd Qu.:4.300   3rd Qu.:4.300  
##                                  RM:1817   Max.   :5.000   Max.   :5.000  
##                                                                           
##    QS_BEDROOM      QS_OVERALL       REG_FEE           COMMIS      
##  Min.   :2.000   Min.   :2.000   Min.   : 71177   Min.   :  5055  
##  1st Qu.:2.700   1st Qu.:3.130   1st Qu.:272406   1st Qu.: 84219  
##  Median :3.500   Median :3.500   Median :349486   Median :127628  
##  Mean   :3.485   Mean   :3.503   Mean   :376938   Mean   :141006  
##  3rd Qu.:4.300   3rd Qu.:3.890   3rd Qu.:451562   3rd Qu.:184506  
##  Max.   :5.000   Max.   :4.970   Max.   :983922   Max.   :495405  
##                  NA's   :48                                       
##   SALES_PRICE      
##  Min.   : 2156875  
##  1st Qu.: 8272100  
##  Median :10335050  
##  Mean   :10894910  
##  3rd Qu.:12993900  
##  Max.   :23667340  
## 
colSums(is.na(gr_train))
##        PRT_ID          AREA      INT_SQFT     DATE_SALE DIST_MAINROAD 
##             0             0             0             0             0 
##     N_BEDROOM    N_BATHROOM        N_ROOM     SALE_COND    PARK_FACIL 
##             1             5             0             0             0 
##    DATE_BUILD     BUILDTYPE UTILITY_AVAIL        STREET        MZZONE 
##             0             0             0             0             0 
##      QS_ROOMS   QS_BATHROOM    QS_BEDROOM    QS_OVERALL       REG_FEE 
##             0             0             0            48             0 
##        COMMIS   SALES_PRICE 
##             0             0
str(gr_train)
## 'data.frame':    7109 obs. of  22 variables:
##  $ PRT_ID       : Factor w/ 7109 levels "P00001","P00002",..: 2267 6665 1271 3756 4394 157 6448 6859 2383 6821 ...
##  $ AREA         : Factor w/ 17 levels "Adyar","Adyr",..: 11 5 1 16 11 8 8 16 8 16 ...
##  $ INT_SQFT     : int  1004 1986 909 1855 1226 1220 1167 1847 771 1635 ...
##  $ DATE_SALE    : Factor w/ 2798 levels "01-01-2005","01-01-2007",..: 311 1746 289 1118 443 985 396 1114 489 1985 ...
##  $ DIST_MAINROAD: int  131 26 70 14 84 36 137 176 175 74 ...
##  $ N_BEDROOM    : int  1 2 1 3 1 2 1 3 1 2 ...
##  $ N_BATHROOM   : int  1 1 1 2 1 1 1 2 1 1 ...
##  $ N_ROOM       : int  3 5 3 5 3 4 3 5 2 4 ...
##  $ SALE_COND    : Factor w/ 9 levels "Ab Normal","AbNormal",..: 2 2 2 5 2 7 7 5 4 2 ...
##  $ PARK_FACIL   : Factor w/ 3 levels "No","Noo","Yes": 3 1 3 1 3 1 1 1 1 1 ...
##  $ DATE_BUILD   : Factor w/ 5808 levels "01-01-1967","01-01-1970",..: 2732 4227 1512 3301 2407 2225 2146 2710 2521 4898 ...
##  $ BUILDTYPE    : Factor w/ 5 levels "Comercial","Commercial",..: 2 2 2 5 5 2 4 2 5 5 ...
##  $ UTILITY_AVAIL: Factor w/ 5 levels "All Pub","AllPub",..: 2 2 3 5 2 4 2 2 5 3 ...
##  $ STREET       : Factor w/ 5 levels "Gravel","No Access",..: 5 1 1 5 1 2 2 1 5 2 ...
##  $ MZZONE       : Factor w/ 6 levels "A","C","I","RH",..: 1 4 5 3 2 4 5 6 6 3 ...
##  $ QS_ROOMS     : num  4 4.9 4.1 4.7 3 4.5 3.6 2.4 2.9 3.1 ...
##  $ QS_BATHROOM  : num  3.9 4.2 3.8 3.9 2.5 2.6 2.1 4.5 3.7 3.1 ...
##  $ QS_BEDROOM   : num  4.9 2.5 2.2 3.6 4.1 3.1 2.5 2.1 4 3.3 ...
##  $ QS_OVERALL   : num  4.33 3.77 3.09 4.01 3.29 ...
##  $ REG_FEE      : int  380000 760122 421094 356321 237000 409027 263152 604809 257578 323346 ...
##  $ COMMIS       : int  144400 304049 92114 77042 74063 198316 33955 235204 33236 121255 ...
##  $ SALES_PRICE  : int  7600000 21717770 13159200 9630290 7406250 12394750 8488790 16800250 8308970 8083650 ...
names(gr_train)
##  [1] "PRT_ID"        "AREA"          "INT_SQFT"      "DATE_SALE"    
##  [5] "DIST_MAINROAD" "N_BEDROOM"     "N_BATHROOM"    "N_ROOM"       
##  [9] "SALE_COND"     "PARK_FACIL"    "DATE_BUILD"    "BUILDTYPE"    
## [13] "UTILITY_AVAIL" "STREET"        "MZZONE"        "QS_ROOMS"     
## [17] "QS_BATHROOM"   "QS_BEDROOM"    "QS_OVERALL"    "REG_FEE"      
## [21] "COMMIS"        "SALES_PRICE"
levels(gr_train$AREA)
##  [1] "Adyar"      "Adyr"       "Ana Nagar"  "Ann Nagar"  "Anna Nagar"
##  [6] "Chormpet"   "Chrmpet"    "Chrompet"   "Chrompt"    "Karapakam" 
## [11] "Karapakkam" "KK Nagar"   "KKNagar"    "T Nagar"    "TNagar"    
## [16] "Velachery"  "Velchery"
gr_train$AREA=as.character(gr_train$AREA)
table(gr_train$AREA)
## 
##      Adyar       Adyr  Ana Nagar  Ann Nagar Anna Nagar   Chormpet 
##        773          1          3          2        783          6 
##    Chrmpet   Chrompet    Chrompt  Karapakam Karapakkam   KK Nagar 
##          6       1681          9          3       1363        996 
##    KKNagar    T Nagar     TNagar  Velachery   Velchery 
##          1        496          5        979          2
library("car")
## Warning: package 'car' was built under R version 3.4.2
gr_train$AREA<-recode(gr_train$AREA,"c('Adyr')='Adyar'")

gr_train$AREA<-recode(gr_train$AREA,"c('Ana Nagar','Ann Nagar')='Anna Nagar'")
#gr_train[gr_train$AREA=="Anna Nagar",]
table(gr_train$AREA)
## 
##      Adyar Anna Nagar   Chormpet    Chrmpet   Chrompet    Chrompt 
##        774        788          6          6       1681          9 
##  Karapakam Karapakkam   KK Nagar    KKNagar    T Nagar     TNagar 
##          3       1363        996          1        496          5 
##  Velachery   Velchery 
##        979          2
gr_train$AREA<-recode(gr_train$AREA,"c('Chormpet','Chrmpet','Chrompt')
                ='Chrompet'")
table(gr_train$AREA)
## 
##      Adyar Anna Nagar   Chrompet  Karapakam Karapakkam   KK Nagar 
##        774        788       1702          3       1363        996 
##    KKNagar    T Nagar     TNagar  Velachery   Velchery 
##          1        496          5        979          2
gr_train$AREA<-recode(gr_train$AREA,"c('Karapakam')='Karapakkam'")
gr_train$AREA<-recode(gr_train$AREA,"c('KKNagar')='KK Nagar'")
gr_train$AREA<-recode(gr_train$AREA,"c('TNagar')='T Nagar'")
gr_train$AREA<-recode(gr_train$AREA,"c('Velchery')='Velachery'")
gr_train$AREA=as.factor(gr_train$AREA)
levels(gr_train$AREA)
## [1] "Adyar"      "Anna Nagar" "Chrompet"   "Karapakkam" "KK Nagar"  
## [6] "T Nagar"    "Velachery"
table(gr_train$AREA)
## 
##      Adyar Anna Nagar   Chrompet Karapakkam   KK Nagar    T Nagar 
##        774        788       1702       1366        997        501 
##  Velachery 
##        981
agg=aggregate(SALES_PRICE~AREA,data = gr_train,FUN = mean,na.rm=T)
agg$SALES_PRICE
## [1]  9185668 15168202 10013810  7340100 12696229 15616743 11047353
agg$SALES_PRICE=as.factor(agg$SALES_PRICE)
agg$SALES_PRICE=as.numeric(as.character(agg$SALES_PRICE))
agg$AREA
## [1] Adyar      Anna Nagar Chrompet   Karapakkam KK Nagar   T Nagar   
## [7] Velachery 
## 7 Levels: Adyar Anna Nagar Chrompet Karapakkam KK Nagar ... Velachery
class(agg$SALES_PRICE)
## [1] "numeric"
library("ggplot2")
ggplot(data = gr_train,mapping = aes(x=AREA,fill=AREA))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg,
                                                         aes(x=AREA,y=mean(SALES_PRICE),label=round(agg$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+geom_boxplot()

ggplot(data = gr_train,mapping = aes(INT_SQFT))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = gr_train,mapping = aes(x=INT_SQFT,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(log(gr_train$INT_SQFT),gr_train$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  log(gr_train$INT_SQFT) and gr_train$SALES_PRICE
## t = 64.993, df = 7107, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5957708 0.6249377
## sample estimates:
##       cor 
## 0.6105612
boxplot.stats(gr_train$INT_SQFT)$out
## integer(0)
library("lubridate")
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
class(gr_train$DATE_SALE)
## [1] "factor"
gr_train$DATE_SALE=dmy(gr_train$DATE_SALE)

class(gr_train$DATE_BUILD)
## [1] "factor"
gr_train$DATE_BUILD=dmy(gr_train$DATE_BUILD)
ggplot(data = gr_train,mapping = aes(DIST_MAINROAD,fill=SALES_PRICE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = gr_train,mapping = aes(x=DIST_MAINROAD,y=SALES_PRICE))+geom_point(color='darkblue')

boxplot.stats(gr_train$DIST_MAINROAD)$out
## integer(0)
table(gr_train$N_BEDROOM)
## 
##    1    2    3    4 
## 3795 2352  707  254
class(gr_train$N_BEDROOM)
## [1] "integer"
gr_train$N_BEDROOM=as.factor(gr_train$N_BEDROOM)

agg1=aggregate(SALES_PRICE~N_BEDROOM,data = gr_train,FUN = mean,na.rm=T)
agg1$SALES_PRICE
## [1]  9790184 11817473 12478131 14451305
agg1$SALES_PRICE=as.factor(agg1$SALES_PRICE)
agg1$SALES_PRICE=as.numeric(as.character(agg1$SALES_PRICE))
agg1$N_BEDROOM
## [1] 1 2 3 4
## Levels: 1 2 3 4
class(agg1$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(N_BEDROOM,fill=N_BEDROOM))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg1,
                                                         aes(x=N_BEDROOM,y=mean(SALES_PRICE),label=round(agg1$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+geom_boxplot()

which(is.na(gr_train$N_BEDROOM))
## [1] 4061
gr_train[is.na(gr_train$N_BEDROOM),]
##      PRT_ID       AREA INT_SQFT  DATE_SALE DIST_MAINROAD N_BEDROOM
## 4061 P01066 Anna Nagar     1556 2013-01-16           181      <NA>
##      N_BATHROOM N_ROOM SALE_COND PARK_FACIL DATE_BUILD BUILDTYPE
## 4061          1      4  Partiall         No 1986-01-23     House
##      UTILITY_AVAIL    STREET MZZONE QS_ROOMS QS_BATHROOM QS_BEDROOM
## 4061       NoSewr  No Access     RM      4.8         2.7          2
##      QS_OVERALL REG_FEE COMMIS SALES_PRICE
## 4061      3.155  313136 107978    10797790
which(is.na(gr_train$N_BATHROOM))
## [1]   71 5088 6135 6372 6536
gr_train[is.na(gr_train$N_BATHROOM),]
##      PRT_ID       AREA INT_SQFT  DATE_SALE DIST_MAINROAD N_BEDROOM
## 71   P05304 Anna Nagar     1589 2010-03-22            39         1
## 5088 P01333   Chrompet     1016 2012-08-02           105         1
## 6135 P01332   Chrompet      916 2012-08-02           173         1
## 6372 P01189   Chrompet     1035 2012-11-06            90         1
## 6536 P09189 Anna Nagar     1864 2007-03-05           184         2
##      N_BATHROOM N_ROOM   SALE_COND PARK_FACIL DATE_BUILD BUILDTYPE
## 71           NA      4     Partial         No 1966-04-02    Others
## 5088         NA      3    AbNormal        Yes 1980-08-10    Others
## 6135         NA      3 Normal Sale        Yes 1974-08-12    Others
## 6372         NA      3     Partial         No 1981-11-14    Others
## 6536         NA      5      Family        Yes 1997-03-07    Others
##      UTILITY_AVAIL    STREET MZZONE QS_ROOMS QS_BATHROOM QS_BEDROOM
## 71          AllPub    Gravel     RL      2.1         2.5        4.5
## 5088       NoSewr     Gravel     RM      3.2         4.2        2.0
## 6135           ELO     Paved     RL      3.4         3.5        3.0
## 6372        NoSeWa No Access     RM      2.3         3.5        3.2
## 6536       NoSewr      Paved     RM      3.3         3.7        4.7
##      QS_OVERALL REG_FEE COMMIS SALES_PRICE
## 71         3.16  451857  85486    12212350
## 5088       3.24  330086 106479    10647920
## 6135         NA  265423  44237     8847420
## 6372       3.05  223403  24823     8274200
## 6536       3.96  575606 124455    15556920
gr_train$N_BATHROOM=as.factor(gr_train$N_BATHROOM)

agg2=aggregate(SALES_PRICE~N_BATHROOM,data = gr_train,FUN = mean,na.rm=T)
agg2$SALES_PRICE
## [1] 10681096 11682991
agg2$SALES_PRICE=as.factor(agg2$SALES_PRICE)
agg2$SALES_PRICE=as.numeric(as.character(agg2$SALES_PRICE))
agg2$N_BATHROOM
## [1] 1 2
## Levels: 1 2
class(agg2$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(N_BATHROOM,fill=N_BATHROOM))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg2,
                                                         aes(x=N_BATHROOM,y=mean(SALES_PRICE),label=round(agg2$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+geom_boxplot()

table(gr_train$N_ROOM)
## 
##    2    3    4    5    6 
##  921 2125 2563 1246  254
class(gr_train$N_ROOM)
## [1] "integer"
gr_train$N_ROOM=as.factor(gr_train$N_ROOM)

agg3=aggregate(SALES_PRICE~N_ROOM,data = gr_train,FUN = mean,na.rm=T)
agg3$SALES_PRICE
## [1]  6982718  9298812 11691432 14145331 14451305
agg3$SALES_PRICE=as.factor(agg3$SALES_PRICE)
agg3$SALES_PRICE=as.numeric(as.character(agg3$SALES_PRICE))
agg3$N_ROOM
## [1] 2 3 4 5 6
## Levels: 2 3 4 5 6
class(agg3$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(N_ROOM,fill=N_ROOM))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg3,
                                                         aes(x=N_ROOM,y=mean(SALES_PRICE),label=round(agg3$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+geom_boxplot()

levels(gr_train$SALE_COND)
## [1] "Ab Normal"   "AbNormal"    "Adj Land"    "AdjLand"     "Family"     
## [6] "Normal Sale" "Partial"     "Partiall"    "PartiaLl"
table(gr_train$SALE_COND)
## 
##   Ab Normal    AbNormal    Adj Land     AdjLand      Family Normal Sale 
##           5        1406           6        1433        1403        1423 
##     Partial    Partiall    PartiaLl 
##        1429           3           1
gr_train$SALE_COND<-recode(gr_train$SALE_COND,"c('Ab Normal')='AbNormal'")
gr_train$SALE_COND<-recode(gr_train$SALE_COND,"c('Adj Land')='AdjLand'")
gr_train$SALE_COND<-recode(gr_train$SALE_COND,"c('Partiall','PartiaLl')='Partial'")
levels(gr_train$SALE_COND)
## [1] "AbNormal"    "AdjLand"     "Family"      "Normal Sale" "Partial"
table(gr_train$SALE_COND)
## 
##    AbNormal     AdjLand      Family Normal Sale     Partial 
##        1411        1439        1403        1423        1433
gr_train$SALE_COND=as.factor(gr_train$SALE_COND)

agg4=aggregate(SALES_PRICE~SALE_COND,data = gr_train,FUN = mean,na.rm=T)
agg4$SALES_PRICE
## [1] 10914632 11209315 10736415 10994178 10616369
agg4$SALES_PRICE=as.factor(agg4$SALES_PRICE)
agg4$SALES_PRICE=as.numeric(as.character(agg4$SALES_PRICE))
agg4$SALE_COND
## [1] AbNormal    AdjLand     Family      Normal Sale Partial    
## Levels: AbNormal AdjLand Family Normal Sale Partial
class(agg4$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(SALE_COND,fill=SALE_COND))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg4,
                                                         aes(x=SALE_COND,y=mean(SALES_PRICE),label=round(agg4$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+geom_boxplot()

levels(gr_train$PARK_FACIL)
## [1] "No"  "Noo" "Yes"
table(gr_train$PARK_FACIL)
## 
##   No  Noo  Yes 
## 3520    2 3587
gr_train$PARK_FACIL<-recode(gr_train$PARK_FACIL,"c('Noo')='No'")
ggplot(data = gr_train,mapping = aes(PARK_FACIL,fill=PARK_FACIL))+geom_bar()

gr_train$PARK_FACIL=as.factor(gr_train$PARK_FACIL)

agg5=aggregate(SALES_PRICE~PARK_FACIL,data = gr_train,FUN = mean,na.rm=T)
agg5$SALES_PRICE
## [1] 10338050 11441678
agg5$SALES_PRICE=as.factor(agg5$SALES_PRICE)
agg5$SALES_PRICE=as.numeric(as.character(agg5$SALES_PRICE))
agg5$PARK_FACIL
## [1] No  Yes
## Levels: No Yes
class(agg5$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(PARK_FACIL,fill=PARK_FACIL))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg5,
                                                         aes(x=PARK_FACIL,y=mean(SALES_PRICE),label=round(agg5$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+geom_boxplot()

levels(gr_train$BUILDTYPE)
## [1] "Comercial"  "Commercial" "House"      "Other"      "Others"
table(gr_train$BUILDTYPE)
## 
##  Comercial Commercial      House      Other     Others 
##          4       2325       2444         26       2310
gr_train$BUILDTYPE<-recode(gr_train$BUILDTYPE,"c('Comercial','Commercil')='Commercial'")
gr_train$BUILDTYPE<-recode(gr_train$BUILDTYPE,"c('Other')='Others'")
ggplot(data = gr_train,mapping = aes(BUILDTYPE,fill=BUILDTYPE))+geom_bar()

gr_train$BUILDTYPE=as.character(gr_train$BUILDTYPE)
colSums(is.na(gr_train))
##        PRT_ID          AREA      INT_SQFT     DATE_SALE DIST_MAINROAD 
##             0             0             0             0             0 
##     N_BEDROOM    N_BATHROOM        N_ROOM     SALE_COND    PARK_FACIL 
##             1             5             0             0             0 
##    DATE_BUILD     BUILDTYPE UTILITY_AVAIL        STREET        MZZONE 
##             0             0             0             0             0 
##      QS_ROOMS   QS_BATHROOM    QS_BEDROOM    QS_OVERALL       REG_FEE 
##             0             0             0            48             0 
##        COMMIS   SALES_PRICE 
##             0             0
gr_train$BUILDTYPE[gr_train$BUILDTYPE=="Commercial"]="3BHK APTS"
gr_train$BUILDTYPE[gr_train$BUILDTYPE=="Others"]="2BHK APTS"

gr_train$BUILDTYPE=as.factor(gr_train$BUILDTYPE)

agg6=aggregate(SALES_PRICE~BUILDTYPE,data = gr_train,FUN = mean,na.rm=T)
agg6$SALES_PRICE
## [1]  9807998 13869836  9098847
agg6$SALES_PRICE=as.factor(agg6$SALES_PRICE)
agg6$SALES_PRICE=as.numeric(as.character(agg6$SALES_PRICE))
agg6$BUILDTYPE
## [1] 2BHK APTS 3BHK APTS House    
## Levels: 2BHK APTS 3BHK APTS House
nlevels(agg6$BUILDTYPE)
## [1] 3
class(agg6$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(BUILDTYPE,fill=BUILDTYPE))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=BUILDTYPE,y=SALES_PRICE,fill=BUILDTYPE))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg6,
                                                         aes(x=BUILDTYPE,y=mean(SALES_PRICE),label=round(agg6$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=BUILDTYPE,y=SALES_PRICE,fill=BUILDTYPE))+geom_boxplot()

levels(gr_train$UTILITY_AVAIL)
## [1] "All Pub" "AllPub"  "ELO"     "NoSeWa"  "NoSewr "
table(gr_train$UTILITY_AVAIL)
## 
## All Pub  AllPub     ELO  NoSeWa NoSewr  
##       1    1886    1522    1871    1829
gr_train$UTILITY_AVAIL<-recode(gr_train$UTILITY_AVAIL,"c('NoSewr ')='NoSewr'")
gr_train$UTILITY_AVAIL<-recode(gr_train$UTILITY_AVAIL,"c('All Pub')='AllPub'")

gr_train$UTILITY_AVAIL=as.factor(gr_train$UTILITY_AVAIL)

agg7=aggregate(SALES_PRICE~UTILITY_AVAIL,data = gr_train,FUN = mean,na.rm=T)
agg7$SALES_PRICE
## [1] 11210016 10469415 10893079 10925759
agg7$SALES_PRICE=as.factor(agg7$SALES_PRICE)
agg7$SALES_PRICE=as.numeric(as.character(agg7$SALES_PRICE))
agg7$UTILITY_AVAIL
## [1] AllPub ELO    NoSeWa NoSewr
## Levels: AllPub ELO NoSeWa NoSewr
nlevels(agg7$UTILITY_AVAIL)
## [1] 4
class(agg7$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(UTILITY_AVAIL,fill=UTILITY_AVAIL))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg7,
                                                         aes(x=UTILITY_AVAIL,y=mean(SALES_PRICE),label=round(agg7$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+geom_boxplot()

levels(gr_train$STREET)
## [1] "Gravel"    "No Access" "NoAccess"  "Pavd"      "Paved"
table(gr_train$STREET)
## 
##    Gravel No Access  NoAccess      Pavd     Paved 
##      2520      2010         7        12      2560
gr_train$STREET<-recode(gr_train$STREET,"c('NoAccess')='No Access'")
gr_train$STREET<-recode(gr_train$STREET,"c('Pavd')='Paved'")

gr_train$STREET=as.factor(gr_train$STREET)

agg8=aggregate(SALES_PRICE~STREET,data = gr_train,FUN = mean,na.rm=T)
agg8$SALES_PRICE
## [1] 11444619 10000738 11057537
agg8$SALES_PRICE=as.factor(agg8$SALES_PRICE)
agg8$SALES_PRICE=as.numeric(as.character(agg8$SALES_PRICE))
agg8$STREET
## [1] Gravel    No Access Paved    
## Levels: Gravel No Access Paved
nlevels(agg8$STREET)
## [1] 3
class(agg8$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(STREET,fill=STREET))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg8,
                                                         aes(x=STREET,y=mean(SALES_PRICE),label=round(agg8$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+geom_boxplot()

levels(gr_train$MZZONE)
## [1] "A"  "C"  "I"  "RH" "RL" "RM"
table(gr_train$MZZONE)
## 
##    A    C    I   RH   RL   RM 
##  537  550  525 1822 1858 1817
gr_train$MZZONE=as.factor(gr_train$MZZONE)

agg9=aggregate(SALES_PRICE~MZZONE,data = gr_train,FUN = mean,na.rm=T)
agg9$SALES_PRICE
## [1]  7292591  8052270  8738633 11039972 11765763 12407070
agg9$SALES_PRICE=as.factor(agg9$SALES_PRICE)
agg9$SALES_PRICE=as.numeric(as.character(agg9$SALES_PRICE))
agg9$MZZONE
## [1] A  C  I  RH RL RM
## Levels: A C I RH RL RM
nlevels(agg9$MZZONE)
## [1] 6
class(agg9$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(MZZONE,fill=MZZONE))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg9,
                                                         aes(x=MZZONE,y=mean(SALES_PRICE),label=round(agg9$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+geom_boxplot()

summary(gr_train$QS_ROOMS)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.700   3.500   3.517   4.300   5.000
table(gr_train$QS_ROOMS)
## 
##   2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9   3 3.1 3.2 3.3 3.4 3.5 3.6 3.7 
## 203 236 213 224 208 265 237 200 226 220 228 230 208 239 240 227 255 205 
## 3.8 3.9   4 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9   5 
## 259 245 218 222 239 225 219 218 252 239 239 242 228
gr_train$QS_ROOMS <- with(gr_train,ifelse(QS_ROOMS>4.4,5,ifelse(QS_ROOMS>3.5,4,
                                                    ifelse(QS_ROOMS>2.4,3,2))))
gr_train$QS_ROOMS=as.factor(gr_train$QS_ROOMS)

agg10=aggregate(SALES_PRICE~QS_ROOMS,data = gr_train,FUN = mean,na.rm=T)
agg10$SALES_PRICE
## [1] 10810950 10802809 10978843 10999236
agg10$SALES_PRICE=as.factor(agg10$SALES_PRICE)
agg10$SALES_PRICE=as.numeric(as.character(agg10$SALES_PRICE))
agg10$QS_ROOMS
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(agg10$QS_ROOMS)
## [1] 4
class(agg10$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(QS_ROOMS,fill=QS_ROOMS))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg10,
                                                         aes(x=QS_ROOMS,y=mean(SALES_PRICE),label=round(agg10$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+geom_boxplot()

table(gr_train$QS_BATHROOM)
## 
##   2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9   3 3.1 3.2 3.3 3.4 3.5 3.6 3.7 
## 222 224 234 220 230 233 226 256 206 228 241 232 226 227 234 225 221 251 
## 3.8 3.9   4 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9   5 
## 209 211 232 210 237 224 219 231 234 247 255 245 219
gr_train$QS_BATHROOM <- with(gr_train,ifelse(QS_BATHROOM>4.4,5,ifelse(QS_BATHROOM>3.5,4,
                                                          ifelse(QS_BATHROOM>2.4,3,2))))
gr_train$QS_BATHROOM=as.factor(gr_train$QS_BATHROOM)

agg11=aggregate(SALES_PRICE~QS_BATHROOM,data = gr_train,FUN = mean,na.rm=T)
agg11$SALES_PRICE
## [1] 10894591 10958150 10825820 10880414
agg11$SALES_PRICE=as.factor(agg11$SALES_PRICE)
agg11$SALES_PRICE=as.numeric(as.character(agg11$SALES_PRICE))
agg11$QS_BATHROOM
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(agg11$QS_BATHROOM)
## [1] 4
class(agg11$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(QS_BATHROOM,fill=QS_BATHROOM))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg11,
                                                         aes(x=QS_BATHROOM,y=mean(SALES_PRICE),label=round(agg11$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+geom_boxplot()

table(gr_train$QS_BEDROOM)
## 
##   2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9   3 3.1 3.2 3.3 3.4 3.5 3.6 3.7 
## 221 242 237 200 244 226 273 222 210 219 241 243 253 234 239 237 225 203 
## 3.8 3.9   4 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9   5 
## 244 220 248 223 212 237 237 227 233 228 211 203 217
gr_train$QS_BEDROOM <- with(gr_train,ifelse(QS_BEDROOM>4.4,5,ifelse(QS_BEDROOM>3.5,4,
                                                        ifelse(QS_BEDROOM>2.4,3,2))))

gr_train$QS_BEDROOM=as.factor(gr_train$QS_BEDROOM)

agg12=aggregate(SALES_PRICE~QS_BEDROOM,data = gr_train,FUN = mean,na.rm=T)
agg12$SALES_PRICE
## [1] 10670155 10905621 10986054 10927167
agg12$SALES_PRICE=as.factor(agg12$SALES_PRICE)
agg12$SALES_PRICE=as.numeric(as.character(agg12$SALES_PRICE))
agg12$QS_BEDROOM
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(agg12$QS_BEDROOM)
## [1] 4
class(agg12$SALES_PRICE)
## [1] "numeric"
ggplot(data = gr_train,mapping = aes(QS_BEDROOM,fill=QS_BEDROOM))+geom_bar()

ggplot(data = gr_train,mapping = aes(x=QS_BEDROOM,y=SALES_PRICE,fill=QS_BEDROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=agg12,
                                                         aes(x=QS_BEDROOM,y=mean(SALES_PRICE),label=round(agg12$SALES_PRICE)))

ggplot(data = gr_train,mapping = aes(x=QS_BEDROOM,y=SALES_PRICE,fill=QS_BEDROOM))+geom_boxplot()

gr_train$houseage=gr_train$DATE_SALE-gr_train$DATE_BUILD
class(gr_train$houseage)
## [1] "difftime"
gr_train$houseage=as.numeric(gr_train$houseage)

ggplot(data = gr_train,mapping = aes(houseage))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = gr_train,mapping = aes(x=houseage,y=SALES_PRICE))+geom_point(color='darkblue')

ggplot(data = gr_train,mapping = aes(REG_FEE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = gr_train,mapping = aes(x=REG_FEE,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(gr_train$INT_SQFT,gr_train$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  gr_train$INT_SQFT and gr_train$SALES_PRICE
## t = 65.259, df = 7107, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5973789 0.6264570
## sample estimates:
##       cor 
## 0.6121249
boxplot.stats(gr_train$REG_FEE)$out
##   [1] 760122 742113 917550 808639 816325 885666 792637 809377 839848 792581
##  [11] 850347 807919 804310 722637 725167 817115 725563 726866 858938 820722
##  [21] 751564 740832 762560 747611 732846 923026 963029 851979 823239 880115
##  [31] 752727 775789 913666 797727 836283 765428 893744 940813 782091 756758
##  [41] 759431 891866 778771 800245 777136 729338 769310 769130 731893 786861
##  [51] 809394 740382 787401 746911 941567 752815 800231 781305 752593 790306
##  [61] 780507 745098 840592 746576 816678 812872 807579 808102 781044 823645
##  [71] 777513 765316 850799 855567 726560 727590 770309 723532 767960 735740
##  [81] 724899 826735 808496 844168 728057 909160 787045 783698 759022 802148
##  [91] 738288 773321 870464 769757 731292 729771 802811 787504 775222 983922
## [101] 720625 741521 887450 730011 847638 818342 741809 770779 821501 732764
## [111] 743729 843562 931224 810693 773456 827999 809661 725904 758028 845790
## [121] 731190 755385 732194 742623 829837 838032 865802 792155 753674 883120
## [131] 839975 821637 869438 733135 746538 732043 803373 743923 805571 740159
## [141] 751507 740232 766669 741852 880095 761653 760083 839394 735368 947124
## [151] 821467 853494 929714 904779 845947 765080 733335 815506 853017 839704
## [161] 739547 870024 874696 790990 731930 745579 804292 866652 761292 860513
## [171] 729450 742540 747262 863115 731464 724201 750484 823250 782811 869775
## [181] 798100 952411 761636 826735 727335 745505 760754 839611 722752 753187
## [191] 743552 803512 854535 772422 936314 756744 783738 823071 851325 743023
## [201] 825243 760155 779777 942859 740155 903181 860696 735253 723058 750333
## [211] 883743 783633 757649 752460 852466 981117 751181 756135 790593 888724
## [221] 774637 815534 736659 823312 754472 747457 826242 828486
ggplot(data = gr_train,mapping = aes(COMMIS))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = gr_train,mapping = aes(x=COMMIS,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(gr_train$COMMIS,gr_train$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  gr_train$COMMIS and gr_train$SALES_PRICE
## t = 67.723, df = 7107, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6119380 0.6402014
## sample estimates:
##       cor 
## 0.6262754
boxplot.stats(gr_train$COMMIS)$out
##   [1] 339934 471247 387484 348491 456247 495405 364434 368162 381199 352277
##  [11] 342558 351886 397115 375431 342437 421452 443349 343422 378677 341073
##  [21] 408950 402279 337717 442485 348080 372260 418432 466156 343858 341883
##  [31] 374390 422406 341707 389978 455034 406315 389863 394030 357180 371273
##  [41] 406974 347700 379683 345860 362422 470784 334948 345888 395932 416414
##  [51] 340569 347505 366648 373286 399281 377053 469920 427494 365960 352022
##  [61] 343873 339524 370996 336398 370640 362451 463218 372519 349046 371776
##  [71] 357993 427485 363804 347960 339749 371953 361696 341155 391849 424072
##  [81] 344381 355720 342114 393752 491961 360312 404278 359952 351174 359272
##  [91] 374378 479297 340751 343440 348975 390689 362952 424557 340400 433936
## [101] 381920 348458 481001 337500 398987 398245 361706 342639 346772 349731
## [111] 360667 361417 355848 348714 341124 396831 362683 437319 377184 359347
## [121] 399868 368833 339657 355229 352530 354798 426508 340052 336820 409087
## [131] 348943 353696 372525 409432 391867 433326 345743 347910 369468 419917
## [141] 485924 382492 355800 416606 340923 344511 377825 389456 371776 393009
## [151] 363362 370852 390114 394428 343163 444749 379879 339206 350600 429011
## [161] 420340 368297 389913 356408 342111 396783 475795 451314 363424 337244
## [171] 367599 386469 373335 370302 415659 392474 378853 376956 411656 373606
## [181] 353880
summary(gr_train$QS_OVERALL)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.000   3.130   3.500   3.503   3.890   4.970      48
gr_train$QS_OVERALL <- with(gr_train,ifelse(QS_OVERALL>4.4,5,ifelse(QS_OVERALL>3.5,4,
                                                                    ifelse(QS_OVERALL>2.4,3,2))))
gr_train$QS_OVERALL=as.factor(gr_train$QS_OVERALL)
colnames(gr_train)
##  [1] "PRT_ID"        "AREA"          "INT_SQFT"      "DATE_SALE"    
##  [5] "DIST_MAINROAD" "N_BEDROOM"     "N_BATHROOM"    "N_ROOM"       
##  [9] "SALE_COND"     "PARK_FACIL"    "DATE_BUILD"    "BUILDTYPE"    
## [13] "UTILITY_AVAIL" "STREET"        "MZZONE"        "QS_ROOMS"     
## [17] "QS_BATHROOM"   "QS_BEDROOM"    "QS_OVERALL"    "REG_FEE"      
## [21] "COMMIS"        "SALES_PRICE"   "houseage"
gr_train=gr_train[,-c(1,4,11)]
colSums(is.na(gr_train))
##          AREA      INT_SQFT DIST_MAINROAD     N_BEDROOM    N_BATHROOM 
##             0             0             0             1             5 
##        N_ROOM     SALE_COND    PARK_FACIL     BUILDTYPE UTILITY_AVAIL 
##             0             0             0             0             0 
##        STREET        MZZONE      QS_ROOMS   QS_BATHROOM    QS_BEDROOM 
##             0             0             0             0             0 
##    QS_OVERALL       REG_FEE        COMMIS   SALES_PRICE      houseage 
##            48             0             0             0             0
library("missForest")
## Warning: package 'missForest' was built under R version 3.4.3
## Loading required package: randomForest
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## Loading required package: foreach
## Loading required package: itertools
## Warning: package 'itertools' was built under R version 3.4.2
## Loading required package: iterators
gr_train<-missForest(gr_train,verbose = TRUE)
##   missForest iteration 1 in progress...done!
##     estimated error(s): 0 0.01105671 
##     difference(s): 0 0.0002511906 
##     time: 2.82 seconds
## 
##   missForest iteration 2 in progress...done!
##     estimated error(s): 0 0.01110729 
##     difference(s): 0 0 
##     time: 2.09 seconds
## 
##   missForest iteration 3 in progress...done!
##     estimated error(s): 0 0.01101625 
##     difference(s): 0 2.009525e-05 
##     time: 2.08 seconds
class(gr_train)
## [1] "missForest"
gr_train<-data.frame(gr_train$ximp)
gr_train$OOBerror
## NULL
colSums(is.na(gr_train))
##          AREA      INT_SQFT DIST_MAINROAD     N_BEDROOM    N_BATHROOM 
##             0             0             0             0             0 
##        N_ROOM     SALE_COND    PARK_FACIL     BUILDTYPE UTILITY_AVAIL 
##             0             0             0             0             0 
##        STREET        MZZONE      QS_ROOMS   QS_BATHROOM    QS_BEDROOM 
##             0             0             0             0             0 
##    QS_OVERALL       REG_FEE        COMMIS   SALES_PRICE      houseage 
##             0             0             0             0             0
House=gr_train[gr_train$BUILDTYPE=="House",]
BHK_2=gr_train[gr_train$BUILDTYPE=="2BHK APTS",]
BHK_3=gr_train[gr_train$BUILDTYPE=="3BHK APTS",]
summary(House$SALES_PRICE)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  2640250  7407715  8985370  9098847 10844520 15880930
ggplot(data = House,mapping = aes(SALES_PRICE))+geom_density(fill="steelblue")

ggplot(data = BHK_2,mapping = aes(SALES_PRICE))+geom_density(fill="steelblue")

ggplot(data = BHK_3,mapping = aes(SALES_PRICE))+geom_density(fill="steelblue")

ggplot()+geom_density(data = House,mapping = aes(SALES_PRICE),colour="red")+
  geom_density(data = BHK_2,mapping = aes(SALES_PRICE),colour="blue")+
  geom_density(data = BHK_3,mapping = aes(SALES_PRICE),colour="green")

library("car")
boxplot.stats(House$SALES_PRICE)$out
## numeric(0)
boxplot.stats(BHK_2$SALES_PRICE)$out
## [1] 2156875
boxplot.stats(BHK_3$SALES_PRICE)$out
## numeric(0)
#1 AREA
levels(House$AREA)
## [1] "Adyar"      "Anna Nagar" "Chrompet"   "Karapakkam" "KK Nagar"  
## [6] "T Nagar"    "Velachery"
table(House$AREA)
## 
##      Adyar Anna Nagar   Chrompet Karapakkam   KK Nagar    T Nagar 
##        263        276        591        486        348        156 
##  Velachery 
##        324
ggplot(data = House,mapping = aes(x=AREA,fill=AREA))+geom_bar()

aggr1=aggregate(SALES_PRICE~AREA,data = House,FUN = mean,na.rm=T)
aggr1$SALES_PRICE
## [1]  7973683 12405999  8553965  6266440 10666748 12664535  9036630
aggr1$SALES_PRICE=as.factor(aggr1$SALES_PRICE)
aggr1$SALES_PRICE=as.numeric(as.character(aggr1$SALES_PRICE))
aggr1$AREA
## [1] Adyar      Anna Nagar Chrompet   Karapakkam KK Nagar   T Nagar   
## [7] Velachery 
## 7 Levels: Adyar Anna Nagar Chrompet Karapakkam KK Nagar ... Velachery
class(aggr1$SALES_PRICE)
## [1] "numeric"
library("ggplot2")
ggplot(data = House,mapping = aes(x=AREA,fill=AREA))+geom_bar()

ggplot(data = House,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr1,
                                                         aes(x=AREA,y=mean(SALES_PRICE),label=round(aggr1$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+geom_boxplot()

levels(BHK_2$AREA)
## [1] "Adyar"      "Anna Nagar" "Chrompet"   "Karapakkam" "KK Nagar"  
## [6] "T Nagar"    "Velachery"
table(BHK_2$AREA)
## 
##      Adyar Anna Nagar   Chrompet Karapakkam   KK Nagar    T Nagar 
##        264        260        549        447        327        167 
##  Velachery 
##        322
ggplot(data = BHK_2,mapping = aes(x=AREA,fill=AREA))+geom_bar()

aggr2=aggregate(SALES_PRICE~AREA,data = BHK_2,FUN = mean,na.rm=T)
aggr2$SALES_PRICE
## [1]  8209260 13473524  9154363  6737757 11634699 13575771  9726395
aggr2$SALES_PRICE=as.factor(aggr2$SALES_PRICE)
aggr2$SALES_PRICE=as.numeric(as.character(aggr2$SALES_PRICE))
aggr2$AREA
## [1] Adyar      Anna Nagar Chrompet   Karapakkam KK Nagar   T Nagar   
## [7] Velachery 
## 7 Levels: Adyar Anna Nagar Chrompet Karapakkam KK Nagar ... Velachery
class(aggr2$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(x=AREA,fill=AREA))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr2,
                                                         aes(x=AREA,y=mean(SALES_PRICE),label=round(aggr2$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+geom_boxplot()

levels(BHK_3$AREA)
## [1] "Adyar"      "Anna Nagar" "Chrompet"   "Karapakkam" "KK Nagar"  
## [6] "T Nagar"    "Velachery"
table(BHK_3$AREA)
## 
##      Adyar Anna Nagar   Chrompet Karapakkam   KK Nagar    T Nagar 
##        247        252        562        433        322        178 
##  Velachery 
##        335
ggplot(data = BHK_3,mapping = aes(x=AREA,fill=AREA))+geom_bar()

aggr3=aggregate(SALES_PRICE~AREA,data = BHK_3,FUN = mean,na.rm=T)
aggr3$SALES_PRICE
## [1] 11519773 19941948 12388551  9166997 15967594 20118915 14261749
aggr3$SALES_PRICE=as.factor(aggr3$SALES_PRICE)
aggr3$SALES_PRICE=as.numeric(as.character(aggr3$SALES_PRICE))
aggr3$AREA
## [1] Adyar      Anna Nagar Chrompet   Karapakkam KK Nagar   T Nagar   
## [7] Velachery 
## 7 Levels: Adyar Anna Nagar Chrompet Karapakkam KK Nagar ... Velachery
class(aggr3$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(x=AREA,fill=AREA))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr3,
                                                         aes(x=AREA,y=mean(SALES_PRICE),label=round(aggr3$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+geom_boxplot()

#2 INT_SQFT
ggplot(data = House,mapping = aes(INT_SQFT))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = House,mapping = aes(x=INT_SQFT,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(House$INT_SQFT,House$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  House$INT_SQFT and House$SALES_PRICE
## t = 49.022, df = 2442, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6837107 0.7237090
## sample estimates:
##       cor 
## 0.7042683
boxplot.stats(House$INT_SQFT)$out
## numeric(0)
summary(House$INT_SQFT)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   500.0   987.8  1366.5  1377.7  1733.0  2500.0
ggplot(data = BHK_2,mapping = aes(INT_SQFT))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = BHK_2,mapping = aes(x=INT_SQFT,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(BHK_2$INT_SQFT,BHK_2$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  BHK_2$INT_SQFT and BHK_2$SALES_PRICE
## t = 53.881, df = 2334, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7258993 0.7620809
## sample estimates:
##       cor 
## 0.7445364
boxplot.stats(BHK_2$INT_SQFT)$out
## numeric(0)
summary(BHK_2$INT_SQFT)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     500     996    1353    1381    1749    2499
ggplot(data = BHK_3,mapping = aes(INT_SQFT))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = BHK_3,mapping = aes(x=INT_SQFT,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(BHK_3$INT_SQFT,BHK_3$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  BHK_3$INT_SQFT and BHK_3$SALES_PRICE
## t = 60.981, df = 2327, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7681326 0.7994314
## sample estimates:
##       cor 
## 0.7842805
boxplot.stats(BHK_3$INT_SQFT)$out
## numeric(0)
summary(BHK_3$INT_SQFT)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     500    1000    1414    1388    1749    2498
bed0=House[House$N_BEDROOM==1,]
bed1=House[House$N_BEDROOM==2,]
bed2=House[House$N_BEDROOM==3,]
bed3=House[House$N_BEDROOM==4,]
ggplot(data = bed1,mapping = aes(x=INT_SQFT,y=SALES_PRICE))+
  geom_point()

#3 DIST_MAINROAD
ggplot(data = House,mapping = aes(DIST_MAINROAD,fill=SALES_PRICE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = House,mapping = aes(x=DIST_MAINROAD,y=SALES_PRICE))+geom_point(color='darkblue')

boxplot.stats(House$DIST_MAINROAD)$out
## numeric(0)
summary(House$DIST_MAINROAD)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   47.75   96.50   98.06  149.00  200.00
ggplot(data = BHK_2,mapping = aes(DIST_MAINROAD,fill=SALES_PRICE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = BHK_2,mapping = aes(x=DIST_MAINROAD,y=SALES_PRICE))+geom_point(color='darkblue')

boxplot.stats(BHK_2$DIST_MAINROAD)$out
## numeric(0)
summary(BHK_2$DIST_MAINROAD)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    51.0   100.0    99.4   147.0   200.0
ggplot(data = BHK_3,mapping = aes(DIST_MAINROAD,fill=SALES_PRICE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = BHK_3,mapping = aes(x=DIST_MAINROAD,y=SALES_PRICE))+geom_point(color='darkblue')

boxplot.stats(BHK_3$DIST_MAINROAD)$out
## numeric(0)
summary(BHK_3$DIST_MAINROAD)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    52.0   102.0   101.4   148.0   200.0
#4 N_BEDROOM

aggr4=aggregate(SALES_PRICE~N_BEDROOM,data = House,FUN = mean,na.rm=T)
aggr4$SALES_PRICE
## [1]  8286822  9702640 10269939 12139722
aggr4$SALES_PRICE=as.factor(aggr4$SALES_PRICE)
aggr4$SALES_PRICE=as.numeric(as.character(aggr4$SALES_PRICE))
aggr4$N_BEDROOM
## [1] 1 2 3 4
## Levels: 1 2 3 4
class(aggr4$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(N_BEDROOM,fill=N_BEDROOM))+geom_bar()

ggplot(data = House,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr4,
                                                         aes(x=N_BEDROOM,y=mean(SALES_PRICE),label=round(aggr4$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+geom_boxplot()

aggr5=aggregate(SALES_PRICE~N_BEDROOM,data = BHK_2,FUN = mean,na.rm=T)
aggr5$SALES_PRICE
## [1]  8862535 10607800 11087235 13231643
aggr5$SALES_PRICE=as.factor(aggr5$SALES_PRICE)
aggr5$SALES_PRICE=as.numeric(as.character(aggr5$SALES_PRICE))
aggr5$N_BEDROOM
## [1] 1 2 3 4
## Levels: 1 2 3 4
class(aggr5$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(N_BEDROOM,fill=N_BEDROOM))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr5,
                                                         aes(x=N_BEDROOM,y=mean(SALES_PRICE),label=round(aggr5$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+geom_boxplot()

aggr6=aggregate(SALES_PRICE~N_BEDROOM,data = BHK_3,FUN = mean,na.rm=T)
aggr6$SALES_PRICE
## [1] 12321601 15192929 16015603 18775575
aggr6$SALES_PRICE=as.factor(aggr6$SALES_PRICE)
aggr6$SALES_PRICE=as.numeric(as.character(aggr6$SALES_PRICE))
aggr6$N_BEDROOM
## [1] 1 2 3 4
## Levels: 1 2 3 4
class(aggr6$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(N_BEDROOM,fill=N_BEDROOM))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr6,
                                                         aes(x=N_BEDROOM,y=mean(SALES_PRICE),label=round(aggr6$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=N_BEDROOM,y=SALES_PRICE,fill=N_BEDROOM))+geom_boxplot()

bed0=House[House$N_BEDROOM==1,]
bed1=House[House$N_BEDROOM==2,]
bed2=House[House$N_BEDROOM==3,]
bed3=House[House$N_BEDROOM==4,]

ggplot(data = bed1,mapping = aes(x=AREA,fill=AREA))+geom_bar()

ggplot(data = bed2,mapping = aes(x=AREA,fill=AREA))+geom_bar()

ggplot(data = bed3,mapping = aes(x=AREA,fill=AREA))+geom_bar()

ggplot(data = bed1,mapping = aes(x=AREA,y=SALES_PRICE,fill=AREA))+
  geom_bar(stat="identity",position = "dodge")

bed4=BHK_2[BHK_2$N_BEDROOM==1,]
bed5=BHK_2[BHK_2$N_BEDROOM==2,]
bed6=BHK_2[BHK_2$N_BEDROOM==3,]
bed7=BHK_2[BHK_2$N_BEDROOM==4,]

ggplot(data = bed4,mapping = aes(x=AREA,fill=AREA))+geom_bar()

#5 N_BATHROOM

aggr7=aggregate(SALES_PRICE~N_BATHROOM,data = House,FUN = mean,na.rm=T)
aggr7$SALES_PRICE
## [1] 8948484 9621536
aggr7$SALES_PRICE=as.factor(aggr7$SALES_PRICE)
aggr7$SALES_PRICE=as.numeric(as.character(aggr7$SALES_PRICE))
aggr7$N_BATHROOM
## [1] 1 2
## Levels: 1 2
class(aggr7$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(N_BATHROOM,fill=N_BATHROOM))+geom_bar()

ggplot(data = House,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr7,
                                                         aes(x=N_BATHROOM,y=mean(SALES_PRICE),label=round(aggr7$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+geom_boxplot()

bath1=House[House$N_BATHROOM==1,]
bath2=House[House$N_BATHROOM==2,]

ggplot(data = bath2,mapping = aes(AREA,fill=AREA))+geom_bar()

aggr8=aggregate(SALES_PRICE~N_BATHROOM,data = BHK_2,FUN = mean,na.rm=T)
aggr8$SALES_PRICE
## [1]  9588267 10655400
aggr8$SALES_PRICE=as.factor(aggr8$SALES_PRICE)
aggr8$SALES_PRICE=as.numeric(as.character(aggr8$SALES_PRICE))
aggr8$N_BATHROOM
## [1] 1 2
## Levels: 1 2
class(aggr8$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(N_BATHROOM,fill=N_BATHROOM))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr8,
                                                         aes(x=N_BATHROOM,y=mean(SALES_PRICE),label=round(aggr8$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+geom_boxplot()

aggr9=aggregate(SALES_PRICE~N_BATHROOM,data = BHK_3,FUN = mean,na.rm=T)
aggr9$SALES_PRICE
## [1] 13569649 15002305
aggr9$SALES_PRICE=as.factor(aggr9$SALES_PRICE)
aggr9$SALES_PRICE=as.numeric(as.character(aggr9$SALES_PRICE))
aggr9$N_BATHROOM
## [1] 1 2
## Levels: 1 2
class(aggr9$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(N_BATHROOM,fill=N_BATHROOM))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr9,
                                                         aes(x=N_BATHROOM,y=mean(SALES_PRICE),label=round(aggr9$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=N_BATHROOM,y=SALES_PRICE,fill=N_BATHROOM))+geom_boxplot()

#6 N_ROOMS

aggr10=aggregate(SALES_PRICE~N_ROOM,data = House,FUN = mean,na.rm=T)
aggr10$SALES_PRICE
## [1]  6209357  7966188  9633542 11535305 12139722
aggr10$SALES_PRICE=as.factor(aggr10$SALES_PRICE)
aggr10$SALES_PRICE=as.numeric(as.character(aggr10$SALES_PRICE))
aggr10$N_ROOM
## [1] 2 3 4 5 6
## Levels: 2 3 4 5 6
class(aggr10$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(N_ROOM,fill=N_ROOM))+geom_bar()

ggplot(data = House,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr10,
                                                         aes(x=N_ROOM,y=mean(SALES_PRICE),label=round(aggr10$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+geom_boxplot()

room1=House[House$N_ROOM==2,]
room2=House[House$N_ROOM==3,]
room3=House[House$N_ROOM==4,]
room4=House[House$N_ROOM==5,]
room5=House[House$N_ROOM==6,]

ggplot(data = room1,mapping = aes(AREA,fill=AREA))+geom_bar()

ggplot(data = room2,mapping = aes(AREA,fill=AREA))+geom_bar()

ggplot(data = room3,mapping = aes(AREA,fill=AREA))+geom_bar()

ggplot(data = room4,mapping = aes(AREA,fill=AREA))+geom_bar()

ggplot(data = room5,mapping = aes(AREA,fill=AREA))+geom_bar()

aggr11=aggregate(SALES_PRICE~N_ROOM,data = BHK_2,FUN = mean,na.rm=T)
aggr11$SALES_PRICE
## [1]  6481595  8390607 10592620 12445687 13231643
aggr11$SALES_PRICE=as.factor(aggr11$SALES_PRICE)
aggr11$SALES_PRICE=as.numeric(as.character(aggr11$SALES_PRICE))
aggr11$N_ROOM
## [1] 2 3 4 5 6
## Levels: 2 3 4 5 6
class(aggr11$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(N_ROOM,fill=N_ROOM))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr11,
                                                         aes(x=N_ROOM,y=mean(SALES_PRICE),label=round(aggr11$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+geom_boxplot()

aggr12=aggregate(SALES_PRICE~N_ROOM,data = BHK_3,FUN = mean,na.rm=T)
aggr12$SALES_PRICE
## [1]  8471806 11523821 14956904 18328314 18775575
aggr12$SALES_PRICE=as.factor(aggr12$SALES_PRICE)
aggr12$SALES_PRICE=as.numeric(as.character(aggr12$SALES_PRICE))
aggr12$N_ROOM
## [1] 2 3 4 5 6
## Levels: 2 3 4 5 6
class(aggr12$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(N_ROOM,fill=N_ROOM))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr12,
                                                         aes(x=N_ROOM,y=mean(SALES_PRICE),label=round(aggr12$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=N_ROOM,y=SALES_PRICE,fill=N_ROOM))+geom_boxplot()

#7 SALE_COND

aggr13=aggregate(SALES_PRICE~SALE_COND,data = House,FUN = mean,na.rm=T)
aggr13$SALES_PRICE
## [1] 8975728 9479419 9050138 9125947 8861459
aggr13$SALES_PRICE=as.factor(aggr13$SALES_PRICE)
aggr13$SALES_PRICE=as.numeric(as.character(aggr13$SALES_PRICE))
aggr13$SALE_COND
## [1] AbNormal    AdjLand     Family      Normal Sale Partial    
## Levels: AbNormal AdjLand Family Normal Sale Partial
class(aggr13$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(SALE_COND,fill=SALE_COND))+geom_bar()

ggplot(data = House,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr13,
                                                         aes(x=SALE_COND,y=mean(SALES_PRICE),label=round(aggr13$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+geom_boxplot()

aggr14=aggregate(SALES_PRICE~SALE_COND,data = BHK_2,FUN = mean,na.rm=T)
aggr14$SALES_PRICE
## [1]  9893904 10113500  9527528  9807603  9678647
aggr14$SALES_PRICE=as.factor(aggr14$SALES_PRICE)
aggr14$SALES_PRICE=as.numeric(as.character(aggr14$SALES_PRICE))
aggr14$SALE_COND
## [1] AbNormal    AdjLand     Family      Normal Sale Partial    
## Levels: AbNormal AdjLand Family Normal Sale Partial
class(aggr14$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(SALE_COND,fill=SALE_COND))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr14,
                                                         aes(x=SALE_COND,y=mean(SALES_PRICE),label=round(aggr14$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+geom_boxplot()

aggr15=aggregate(SALES_PRICE~SALE_COND,data = BHK_3,FUN = mean,na.rm=T)
aggr15$SALES_PRICE
## [1] 13956477 14276746 13632057 14056235 13436253
aggr15$SALES_PRICE=as.factor(aggr15$SALES_PRICE)
aggr15$SALES_PRICE=as.numeric(as.character(aggr15$SALES_PRICE))
aggr15$SALE_COND
## [1] AbNormal    AdjLand     Family      Normal Sale Partial    
## Levels: AbNormal AdjLand Family Normal Sale Partial
class(aggr15$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(SALE_COND,fill=SALE_COND))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr15,
                                                         aes(x=SALE_COND,y=mean(SALES_PRICE),label=round(aggr15$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=SALE_COND,y=SALES_PRICE,fill=SALE_COND))+geom_boxplot()

#8 PARK_FACIL

aggr16=aggregate(SALES_PRICE~PARK_FACIL,data = House,FUN = mean,na.rm=T)
aggr16$SALES_PRICE
## [1] 8598630 9604000
aggr16$SALES_PRICE=as.factor(aggr16$SALES_PRICE)
aggr16$SALES_PRICE=as.numeric(as.character(aggr16$SALES_PRICE))
aggr16$PARK_FACIL
## [1] No  Yes
## Levels: No Yes
class(aggr16$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(PARK_FACIL,fill=PARK_FACIL))+geom_bar()

ggplot(data = House,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr16,
                                                         aes(x=PARK_FACIL,y=mean(SALES_PRICE),label=round(aggr16$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+geom_boxplot()

aggr17=aggregate(SALES_PRICE~PARK_FACIL,data = BHK_2,FUN = mean,na.rm=T)
aggr17$SALES_PRICE
## [1]  9340046 10268005
aggr17$SALES_PRICE=as.factor(aggr17$SALES_PRICE)
aggr17$SALES_PRICE=as.numeric(as.character(aggr17$SALES_PRICE))
aggr17$PARK_FACIL
## [1] No  Yes
## Levels: No Yes
class(aggr17$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(PARK_FACIL,fill=PARK_FACIL))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr17,
                                                         aes(x=PARK_FACIL,y=mean(SALES_PRICE),label=round(aggr17$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+geom_boxplot()

aggr18=aggregate(SALES_PRICE~PARK_FACIL,data = BHK_3,FUN = mean,na.rm=T)
aggr18$SALES_PRICE
## [1] 13235672 14473701
aggr18$SALES_PRICE=as.factor(aggr18$SALES_PRICE)
aggr18$SALES_PRICE=as.numeric(as.character(aggr18$SALES_PRICE))
aggr18$PARK_FACIL
## [1] No  Yes
## Levels: No Yes
class(aggr18$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(PARK_FACIL,fill=PARK_FACIL))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr18,
                                                         aes(x=PARK_FACIL,y=mean(SALES_PRICE),label=round(aggr18$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=PARK_FACIL,y=SALES_PRICE,fill=PARK_FACIL))+geom_boxplot()

#9 UTILITY_AVAIL

aggr19=aggregate(SALES_PRICE~UTILITY_AVAIL,data = House,FUN = mean,na.rm=T)
aggr19$SALES_PRICE
## [1] 9248229 8706142 9203174 9174266
aggr19$SALES_PRICE=as.factor(aggr19$SALES_PRICE)
aggr19$SALES_PRICE=as.numeric(as.character(aggr19$SALES_PRICE))
aggr19$UTILITY_AVAIL
## [1] AllPub ELO    NoSeWa NoSewr
## Levels: AllPub ELO NoSeWa NoSewr
nlevels(aggr19$UTILITY_AVAIL)
## [1] 4
class(aggr19$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(UTILITY_AVAIL,fill=UTILITY_AVAIL))+geom_bar()

ggplot(data = House,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr19,
                                                         aes(x=UTILITY_AVAIL,y=mean(SALES_PRICE),label=round(aggr19$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+geom_boxplot()

aggr20=aggregate(SALES_PRICE~UTILITY_AVAIL,data = BHK_2,FUN = mean,na.rm=T)
aggr20$SALES_PRICE
## [1] 10029917  9370981  9727401 10061315
aggr20$SALES_PRICE=as.factor(aggr20$SALES_PRICE)
aggr20$SALES_PRICE=as.numeric(as.character(aggr20$SALES_PRICE))
aggr20$UTILITY_AVAIL
## [1] AllPub ELO    NoSeWa NoSewr
## Levels: AllPub ELO NoSeWa NoSewr
nlevels(aggr20$UTILITY_AVAIL)
## [1] 4
class(aggr20$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(UTILITY_AVAIL,fill=UTILITY_AVAIL))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr20,
                                                         aes(x=UTILITY_AVAIL,y=mean(SALES_PRICE),label=round(aggr20$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+geom_boxplot()

aggr21=aggregate(SALES_PRICE~UTILITY_AVAIL,data = BHK_3,FUN = mean,na.rm=T)
aggr21$SALES_PRICE
## [1] 14181164 13689766 13777333 13762990
aggr21$SALES_PRICE=as.factor(aggr21$SALES_PRICE)
aggr21$SALES_PRICE=as.numeric(as.character(aggr21$SALES_PRICE))
aggr21$UTILITY_AVAIL
## [1] AllPub ELO    NoSeWa NoSewr
## Levels: AllPub ELO NoSeWa NoSewr
nlevels(aggr21$UTILITY_AVAIL)
## [1] 4
class(aggr21$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(UTILITY_AVAIL,fill=UTILITY_AVAIL))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr21,
                                                         aes(x=UTILITY_AVAIL,y=mean(SALES_PRICE),label=round(aggr21$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=UTILITY_AVAIL,y=SALES_PRICE,fill=UTILITY_AVAIL))+geom_boxplot()

#10 STREET

aggr22=aggregate(SALES_PRICE~STREET,data = House,FUN = mean,na.rm=T)
aggr22$SALES_PRICE
## [1] 9683562 8259834 9162599
aggr22$SALES_PRICE=as.factor(aggr22$SALES_PRICE)
aggr22$SALES_PRICE=as.numeric(as.character(aggr22$SALES_PRICE))
aggr22$STREET
## [1] Gravel    No Access Paved    
## Levels: Gravel No Access Paved
nlevels(aggr22$STREET)
## [1] 3
class(aggr22$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(STREET,fill=STREET))+geom_bar()

ggplot(data = House,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr22,
                                                         aes(x=STREET,y=mean(SALES_PRICE),label=round(aggr22$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+geom_boxplot()

aggr23=aggregate(SALES_PRICE~STREET,data = BHK_2,FUN = mean,na.rm=T)
aggr23$SALES_PRICE
## [1] 10347012  9019227  9897742
aggr23$SALES_PRICE=as.factor(aggr23$SALES_PRICE)
aggr23$SALES_PRICE=as.numeric(as.character(aggr23$SALES_PRICE))
aggr23$STREET
## [1] Gravel    No Access Paved    
## Levels: Gravel No Access Paved
nlevels(aggr23$STREET)
## [1] 3
class(aggr23$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(STREET,fill=STREET))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr23,
                                                         aes(x=STREET,y=mean(SALES_PRICE),label=round(aggr23$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+geom_boxplot()

aggr24=aggregate(SALES_PRICE~STREET,data = BHK_3,FUN = mean,na.rm=T)
aggr24$SALES_PRICE
## [1] 14604557 12808119 14008754
aggr24$SALES_PRICE=as.factor(aggr24$SALES_PRICE)
aggr24$SALES_PRICE=as.numeric(as.character(aggr24$SALES_PRICE))
aggr24$STREET
## [1] Gravel    No Access Paved    
## Levels: Gravel No Access Paved
nlevels(aggr24$STREET)
## [1] 3
class(aggr24$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(STREET,fill=STREET))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr24,
                                                         aes(x=STREET,y=mean(SALES_PRICE),label=round(aggr24$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=STREET,y=SALES_PRICE,fill=STREET))+geom_boxplot()

#11 MZZONE

aggr25=aggregate(SALES_PRICE~MZZONE,data = House,FUN = mean,na.rm=T)
aggr25$SALES_PRICE
## [1]  5949577  6464620  7153590  9159750  9908756 10447506
aggr25$SALES_PRICE=as.factor(aggr25$SALES_PRICE)
aggr25$SALES_PRICE=as.numeric(as.character(aggr25$SALES_PRICE))
aggr25$MZZONE
## [1] A  C  I  RH RL RM
## Levels: A C I RH RL RM
nlevels(aggr25$MZZONE)
## [1] 6
class(aggr25$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(MZZONE,fill=MZZONE))+geom_bar()

ggplot(data = House,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr25,
                                                         aes(x=MZZONE,y=mean(SALES_PRICE),label=round(aggr25$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+geom_boxplot()

ggplot(data = House,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+geom_boxplot()

aggr26=aggregate(SALES_PRICE~MZZONE,data = BHK_2,FUN = mean,na.rm=T)
aggr26$SALES_PRICE
## [1]  6649765  7257514  7686996  9975170 10675261 11211142
aggr26$SALES_PRICE=as.factor(aggr26$SALES_PRICE)
aggr26$SALES_PRICE=as.numeric(as.character(aggr26$SALES_PRICE))
aggr26$MZZONE
## [1] A  C  I  RH RL RM
## Levels: A C I RH RL RM
nlevels(aggr26$MZZONE)
## [1] 6
class(aggr26$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(MZZONE,fill=MZZONE))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr26,
                                                         aes(x=MZZONE,y=mean(SALES_PRICE),label=round(aggr26$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+geom_boxplot()

aggr27=aggregate(SALES_PRICE~MZZONE,data = BHK_3,FUN = mean,na.rm=T)
aggr27$SALES_PRICE
## [1]  9748694 10506115 11164411 13985734 14932087 15578229
aggr27$SALES_PRICE=as.factor(aggr27$SALES_PRICE)
aggr27$SALES_PRICE=as.numeric(as.character(aggr27$SALES_PRICE))
aggr27$MZZONE
## [1] A  C  I  RH RL RM
## Levels: A C I RH RL RM
nlevels(aggr27$MZZONE)
## [1] 6
class(aggr27$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(MZZONE,fill=MZZONE))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr27,
                                                         aes(x=MZZONE,y=mean(SALES_PRICE),label=round(aggr27$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=MZZONE,y=SALES_PRICE,fill=MZZONE))+geom_boxplot()

#12 QS_ROOM

aggr28=aggregate(SALES_PRICE~QS_ROOMS,data = House,FUN = mean,na.rm=T)
aggr28$SALES_PRICE
## [1] 9058257 8980598 9184492 9223161
aggr28$SALES_PRICE=as.factor(aggr28$SALES_PRICE)
aggr28$SALES_PRICE=as.numeric(as.character(aggr28$SALES_PRICE))
aggr28$QS_ROOMS
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr28$QS_ROOMS)
## [1] 4
class(aggr28$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(QS_ROOMS,fill=QS_ROOMS))+geom_bar()

ggplot(data = House,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr28,
                                                         aes(x=QS_ROOMS,y=mean(SALES_PRICE),label=round(aggr28$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+geom_boxplot()

aggr29=aggregate(SALES_PRICE~QS_ROOMS,data = BHK_2,FUN = mean,na.rm=T)
aggr29$SALES_PRICE
## [1]  9714444  9717054  9816789 10019719
aggr29$SALES_PRICE=as.factor(aggr29$SALES_PRICE)
aggr29$SALES_PRICE=as.numeric(as.character(aggr29$SALES_PRICE))
aggr29$QS_ROOMS
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr29$QS_ROOMS)
## [1] 4
class(aggr29$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(QS_ROOMS,fill=QS_ROOMS))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr29,
                                                         aes(x=QS_ROOMS,y=mean(SALES_PRICE),label=round(aggr29$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+geom_boxplot()

aggr30=aggregate(SALES_PRICE~QS_ROOMS,data = BHK_3,FUN = mean,na.rm=T)
aggr30$SALES_PRICE
## [1] 13948484 13806011 13980219 13763028
aggr30$SALES_PRICE=as.factor(aggr30$SALES_PRICE)
aggr30$SALES_PRICE=as.numeric(as.character(aggr30$SALES_PRICE))
aggr30$QS_ROOMS
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr30$QS_ROOMS)
## [1] 4
class(aggr30$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(QS_ROOMS,fill=QS_ROOMS))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr30,
                                                         aes(x=QS_ROOMS,y=mean(SALES_PRICE),label=round(aggr30$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=QS_ROOMS,y=SALES_PRICE,fill=QS_ROOMS))+geom_boxplot()

# 13 QS_BATHROOM

aggr31=aggregate(SALES_PRICE~QS_BATHROOM,data = House,FUN = mean,na.rm=T)
aggr31$SALES_PRICE
## [1] 8942011 9151377 9046837 9190674
aggr31$SALES_PRICE=as.factor(aggr31$SALES_PRICE)
aggr31$SALES_PRICE=as.numeric(as.character(aggr31$SALES_PRICE))
aggr31$QS_BATHROOM
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr31$QS_BATHROOM)
## [1] 4
class(aggr31$SALES_PRICE)
## [1] "numeric"
ggplot(data = House,mapping = aes(QS_BATHROOM,fill=QS_BATHROOM))+geom_bar()

ggplot(data = House,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr31,
                                                         aes(x=QS_BATHROOM,y=mean(SALES_PRICE),label=round(aggr31$SALES_PRICE)))

ggplot(data = House,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+geom_boxplot()

aggr32=aggregate(SALES_PRICE~QS_BATHROOM,data = BHK_2,FUN = mean,na.rm=T)
aggr32$SALES_PRICE
## [1] 9795560 9909563 9748090 9728837
aggr32$SALES_PRICE=as.factor(aggr32$SALES_PRICE)
aggr32$SALES_PRICE=as.numeric(as.character(aggr32$SALES_PRICE))
aggr32$QS_BATHROOM
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr32$QS_BATHROOM)
## [1] 4
class(aggr32$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_2,mapping = aes(QS_BATHROOM,fill=QS_BATHROOM))+geom_bar()

ggplot(data = BHK_2,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr32,
                                                         aes(x=QS_BATHROOM,y=mean(SALES_PRICE),label=round(aggr32$SALES_PRICE)))

ggplot(data = BHK_2,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+geom_boxplot()

aggr33=aggregate(SALES_PRICE~QS_BATHROOM,data = BHK_3,FUN = mean,na.rm=T)
aggr33$SALES_PRICE
## [1] 13945551 14013604 13665760 13851392
aggr33$SALES_PRICE=as.factor(aggr33$SALES_PRICE)
aggr33$SALES_PRICE=as.numeric(as.character(aggr33$SALES_PRICE))
aggr33$QS_BATHROOM
## [1] 2 3 4 5
## Levels: 2 3 4 5
nlevels(aggr33$QS_BATHROOM)
## [1] 4
class(aggr33$SALES_PRICE)
## [1] "numeric"
ggplot(data = BHK_3,mapping = aes(QS_BATHROOM,fill=QS_BATHROOM))+geom_bar()

ggplot(data = BHK_3,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+
  geom_bar(stat="identity",position = "dodge")+geom_text(data=aggr33,
                                                         aes(x=QS_BATHROOM,y=mean(SALES_PRICE),label=round(aggr33$SALES_PRICE)))

ggplot(data = BHK_3,mapping = aes(x=QS_BATHROOM,y=SALES_PRICE,fill=QS_BATHROOM))+geom_boxplot()

#15 Houseage

ggplot(data = House,mapping = aes(houseage))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = House,mapping = aes(x=houseage,y=SALES_PRICE))+geom_point(color='darkblue')

#16 REG_FEE

ggplot(data = House,mapping = aes(REG_FEE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = House,mapping = aes(x=REG_FEE,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(House$INT_SQFT,House$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  House$INT_SQFT and House$SALES_PRICE
## t = 49.022, df = 2442, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6837107 0.7237090
## sample estimates:
##       cor 
## 0.7042683
boxplot.stats(House$REG_FEE)$out
##  [1] 593864 581942 591965 600726 581744 621684 584890 596329 641170 609839
## [11] 699527 620324 612298 621279 619312 609732 574427 622769 576634 651668
## [21] 679450
ggplot(data = BHK_2,mapping = aes(REG_FEE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Including Plots

You can also embed plots, for example:

ggplot(data = BHK_2,mapping = aes(x=REG_FEE,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(BHK_2$INT_SQFT,BHK_2$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  BHK_2$INT_SQFT and BHK_2$SALES_PRICE
## t = 53.881, df = 2334, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7258993 0.7620809
## sample estimates:
##       cor 
## 0.7445364
boxplot.stats(BHK_2$REG_FEE)$out
##  [1] 679249 652659 645265 675762 637030 664728 690074 696797 680743 670804
## [11] 718693 647641 678626 695118 691429 649726 636349 630298
ggplot(data = BHK_3,mapping = aes(REG_FEE))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = BHK_3,mapping = aes(x=REG_FEE,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(BHK_3$INT_SQFT,BHK_3$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  BHK_3$INT_SQFT and BHK_3$SALES_PRICE
## t = 60.981, df = 2327, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7681326 0.7994314
## sample estimates:
##       cor 
## 0.7842805
boxplot.stats(BHK_3$REG_FEE)$out
##  [1] 917550 923026 963029 913666 940813 941567 909160 983922 931224 947124
## [11] 929714 904779 952411 936314 942859 981117
#17 COMMIS
ggplot(data = House,mapping = aes(COMMIS))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = House,mapping = aes(x=COMMIS,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(House$COMMIS,House$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  House$COMMIS and House$SALES_PRICE
## t = 31.664, df = 2442, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5107821 0.5670254
## sample estimates:
##       cor 
## 0.5395053
boxplot.stats(House$COMMIS)$out
##  [1] 284560 285318 299188 283893 308495 289172 306149 309656 304670 284544
## [11] 305086 303563 313231 286763 304759 318205 287802 286822 319085 284351
## [21] 301789 353880
ggplot(data = BHK_2,mapping = aes(COMMIS))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = BHK_2,mapping = aes(x=COMMIS,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(BHK_2$COMMIS,BHK_2$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  BHK_2$COMMIS and BHK_2$SALES_PRICE
## t = 31.138, df = 2334, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5124544 0.5697878
## sample estimates:
##       cor 
## 0.5417509
boxplot.stats(BHK_2$COMMIS)$out
##  [1] 313674 304011 343858 304556 301790 334948 312407 318615 339749 312347
## [11] 317913 311007 333425 313993 359347 328306 318539 313966 301723 310315
## [21] 318936
ggplot(data = BHK_3,mapping = aes(COMMIS))+geom_histogram(fill="steelblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = BHK_3,mapping = aes(x=COMMIS,y=SALES_PRICE))+geom_point(color='darkblue')

cor.test(BHK_3$COMMIS,BHK_3$SALES_PRICE)
## 
##  Pearson's product-moment correlation
## 
## data:  BHK_3$COMMIS and BHK_3$SALES_PRICE
## t = 33.563, df = 2327, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5431129 0.5978782
## sample estimates:
##       cor 
## 0.5711308
boxplot.stats(BHK_3$COMMIS)$out
##  [1] 471247 456247 495405 421452 443349 442485 418432 466156 422406 455034
## [11] 470784 416414 469920 427494 463218 427485 424072 491961 479297 424557
## [21] 433936 481001 437319 426508 433326 419917 485924 416606 444749 429011
## [31] 420340 475795 451314 415659

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.