library(plyr)  
require(gdata)
## Loading required package: gdata
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
## 
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
## 
## Attaching package: 'gdata'
## 
## The following object is masked from 'package:stats':
## 
##     nobs
## 
## The following object is masked from 'package:utils':
## 
##     object.size
NYC <- read.xls("rollingsales_manhattan.xls",pattern="BOROUGH")
head(NYC)
##   BOROUGH              NEIGHBORHOOD
## 1       1                          
## 2       1                          
## 3       1                          
## 4       1                          
## 5       1                          
## 6       1                          
##                        BUILDING.CLASS.CATEGORY TAX.CLASS.AT.PRESENT BLOCK
## 1 13  CONDOS - ELEVATOR APARTMENTS                                    738
## 2 13  CONDOS - ELEVATOR APARTMENTS                                    738
## 3 13  CONDOS - ELEVATOR APARTMENTS                                    738
## 4 13  CONDOS - ELEVATOR APARTMENTS                                    738
## 5 13  CONDOS - ELEVATOR APARTMENTS                                    738
## 6 13  CONDOS - ELEVATOR APARTMENTS                                    738
##    LOT EASE.MENT BUILDING.CLASS.AT.PRESENT
## 1 1306        NA                          
## 2 1307        NA                          
## 3 1308        NA                          
## 4 1309        NA                          
## 5 1310        NA                          
## 6 1311        NA                          
##                                     ADDRESS APART.MENT.NUMBER ZIP.CODE
## 1 345 WEST 14TH   STREET                                         10014
## 2 345 WEST 14TH   STREET                                         10014
## 3 345 WEST 14TH   STREET                                         10014
## 4 345 WEST 14TH   STREET                                         10014
## 5 345 WEST 14TH   STREET                                         10014
## 6 345 WEST 14TH   STREET                                         10014
##   RESIDENTIAL.UNITS COMMERCIAL.UNITS TOTAL.UNITS LAND.SQUARE.FEET
## 1                 0                0           0                0
## 2                 0                0           0                0
## 3                 0                0           0                0
## 4                 0                0           0                0
## 5                 0                0           0                0
## 6                 0                0           0                0
##   GROSS.SQUARE.FEET YEAR.BUILT TAX.CLASS.AT.TIME.OF.SALE
## 1                 0          0                         2
## 2                 0          0                         2
## 3                 0          0                         2
## 4                 0          0                         2
## 5                 0          0                         2
## 6                 0          0                         2
##   BUILDING.CLASS.AT.TIME.OF.SALE SALE.PRICE  SALE.DATE
## 1                             R4 $2,214,693 2013-05-20
## 2                             R4 $1,654,656 2013-05-16
## 3                             R4 $1,069,162 2013-05-23
## 4                             R4 $1,374,637 2013-05-15
## 5                             R4 $1,649,565 2013-05-13
## 6                             R4 $1,705,568 2013-05-21
summary(NYC)
##     BOROUGH                     NEIGHBORHOOD  
##  Min.   :1   MIDTOWN WEST             : 6264  
##  1st Qu.:1   UPPER EAST SIDE (59-79)  : 2569  
##  Median :1   UPPER EAST SIDE (79-96)  : 2117  
##  Mean   :1   UPPER WEST SIDE (59-79)  : 2053  
##  3rd Qu.:1   MIDTOWN EAST             : 1357  
##  Max.   :1   UPPER WEST SIDE (79-96)  : 1133  
##              (Other)                  :11902  
##                                  BUILDING.CLASS.CATEGORY
##  13  CONDOS - ELEVATOR APARTMENTS            :8480      
##  10  COOPS - ELEVATOR APARTMENTS             :7530      
##                                              :3454      
##  25  LUXURY HOTELS                           :1644      
##  17  CONDOPS                                 :1083      
##  07  RENTALS - WALKUP APARTMENTS             :1020      
##  (Other)                                     :4184      
##  TAX.CLASS.AT.PRESENT     BLOCK           LOT         EASE.MENT     
##  2      :18394        Min.   :   7   Min.   :   1.0   Mode:logical  
##  4      : 6920        1st Qu.: 877   1st Qu.:  37.0   NA's:27395    
##  2C     :  789        Median :1047   Median :1007.0                 
##  1      :  505        Mean   :1110   Mean   : 741.8                 
##  2B     :  362        3rd Qu.:1411   3rd Qu.:1233.0                 
##  2A     :  264        Max.   :2250   Max.   :9117.0                 
##  (Other):  161                                                      
##  BUILDING.CLASS.AT.PRESENT
##  R4     :8384             
##  D4     :7227             
##  RH     :3515             
##  H2     :1641             
##  R9     :1082             
##  C6     : 804             
##  (Other):4742             
##                                       ADDRESS         APART.MENT.NUMBER
##  870 7 AVENUE                             : 2087               :14570  
##  102 WEST 57TH STREET                     : 1322   TIMES       :  599  
##  200 WEST 56TH   STREET                   :  608   5B          :   85  
##  1335 AVENUE OF THE AMERIC                :  405   3A          :   77  
##  102 WEST 57TH ST                         :  262   6A          :   77  
##  1335 AVENUE OF THE AMER                  :  191   4A          :   76  
##  (Other)                                  :22520   (Other)     :11911  
##     ZIP.CODE     RESIDENTIAL.UNITS COMMERCIAL.UNITS   TOTAL.UNITS   
##  Min.   :    0   0      :16372     Min.   :  0.000   1      :13772  
##  1st Qu.:10016   1      : 9132     1st Qu.:  0.000   0      : 9704  
##  Median :10019   2      :  229     Median :  0.000   2      : 1909  
##  Mean   :10029   3      :  182     Mean   :  0.375   3      :  189  
##  3rd Qu.:10027   8      :  125     3rd Qu.:  0.000   4      :  187  
##  Max.   :10463   4      :  123     Max.   :604.000   10     :  159  
##                  (Other): 1232                       (Other): 1475  
##  LAND.SQUARE.FEET GROSS.SQUARE.FEET   YEAR.BUILT  
##  0      :22906    0      :23069     Min.   :   0  
##  7,532  : 1635    112,850: 1634     1st Qu.:1900  
##  2,500  :   65    3,600  :   15     Median :1928  
##  2,008  :   45    5,400  :   14     Mean   :1494  
##  2,554  :   42    4,000  :   12     3rd Qu.:1973  
##  2,523  :   39    4,976  :   12     Max.   :2013  
##  (Other): 2663    (Other): 2639                   
##  TAX.CLASS.AT.TIME.OF.SALE BUILDING.CLASS.AT.TIME.OF.SALE
##  Min.   :1.000             R4     :8480                  
##  1st Qu.:2.000             D4     :7227                  
##  Median :2.000             RH     :2983                  
##  Mean   :2.488             H2     :1641                  
##  3rd Qu.:4.000             R9     :1083                  
##  Max.   :4.000             R5     : 995                  
##                            (Other):4986                  
##       SALE.PRICE         SALE.DATE    
##  $0        : 7593   2012-12-17:  485  
##  $57,900   :  272   2012-12-27:  324  
##  $100      :  217   2012-12-20:  319  
##  $10       :  143   2012-12-28:  308  
##  $18,850   :  143   2012-12-19:  293  
##  $1,100,000:   97   2012-08-23:  274  
##  (Other)   :18930   (Other)   :25392
colnames(NYC)
##  [1] "BOROUGH"                        "NEIGHBORHOOD"                  
##  [3] "BUILDING.CLASS.CATEGORY"        "TAX.CLASS.AT.PRESENT"          
##  [5] "BLOCK"                          "LOT"                           
##  [7] "EASE.MENT"                      "BUILDING.CLASS.AT.PRESENT"     
##  [9] "ADDRESS"                        "APART.MENT.NUMBER"             
## [11] "ZIP.CODE"                       "RESIDENTIAL.UNITS"             
## [13] "COMMERCIAL.UNITS"               "TOTAL.UNITS"                   
## [15] "LAND.SQUARE.FEET"               "GROSS.SQUARE.FEET"             
## [17] "YEAR.BUILT"                     "TAX.CLASS.AT.TIME.OF.SALE"     
## [19] "BUILDING.CLASS.AT.TIME.OF.SALE" "SALE.PRICE"                    
## [21] "SALE.DATE"
NYC <- NYC[,-1]
NYC$SALE.PRICE<-as.numeric(gsub("[^[:digit:]]","",NYC$SALE.PRICE))
count(is.na(NYC$SALE.PRICE))
##       x  freq
## 1 FALSE 27395
names(NYC)<-tolower(names(NYC))
NYC$gross.square.feet<-as.numeric(gsub("[^[:digit:]]","",NYC$gross.square.feet))
NYC$land.square.feet<-as.numeric(gsub("[^[:digit:]]","",NYC$land.square.feet))
NYC$sale.date<-as.Date(NYC$sale.date)
NYC$built<-as.numeric(as.character(NYC$year.built))

Exploratory Graphs

Retain Actual Sales Only

Limit Inquiry to Family Homes (1-3)

NYC.homes<-NYC.sale[which(grepl("FAMILY",
           NYC.sale$building.class.category)),]
plot(log(NYC.homes$gross.square.feet),log(NYC.homes$sale.price))

NYC.homes$outlier<-(log(NYC.homes$sale.price)<=5) + 0
NYC.homes<-NYC.homes[which(NYC.homes$outlier==0),]
plot(log(NYC.homes$gross.square.feet),log(NYC.homes$sale.price))