Realis Part 01 Watertown

Background

The data for this R script comes from the URA Realis web site (www.ura.gov.sg, click on Realis menu) The purpose of the web site
is to provide data and information about the caveats lodged for EVERY property transaction in Singapore.

The URA Realis web site contains a lot of data and we will only look at a small subset for this R script. The files for this
script are as follows:

(1) realis_all_apartment_99_newsub_2012_jan_oct_13_part_xx.csv

Contains information about ONLY apartments in EVERY region, with a tenure of NINETY-NINE(99)-year lease, for uncompleted
properties (New and Sub sales) from January 2012 to today (13 October 2012).

(2) realis_ner_apartment_99_newsub_2012_jan_oct_13_part_xx.csv

Contains information about ONLY apartments in ONLY north-east region, with a tenure of NINETY-NINE(99)-year lease, for
uncompleted properties (New and Sub sales) from January 2012 to today (13 October 2012).

(3) realis_watertown_2000_oct_2012_oct_13.csv

Contains information about ALL apartments in project Watertown ONLY from January 2012 to today (13 October 2012).

library(psych)
library(ltm)
## Loading required package: MASS
## Loading required package: msm
## Loading required package: mvtnorm
## Loading required package: polycor
## Loading required package: sfsmisc
## Attaching package: 'polycor'
## The following object(s) are masked from 'package:psych':
## 
## polyserial
## Attaching package: 'ltm'
## The following object(s) are masked from 'package:psych':
## 
## factor.scores
library(gclus)
## Loading required package: cluster
library(RColorBrewer)
library(wordcloud)
## Loading required package: Rcpp

#|------------------------------------------------------------------------------------------|
#|                                I N I T I A L I Z A T I O N                               |
#|------------------------------------------------------------------------------------------|
RealisReadDfr <- function(fileStr, partNum=1, 
                          workDirStr="C:/Users/denbrige/100 FxOption/103 FxOptionVerBack/080 Fx Git/R-nonsource")
{
  #---  Assert THREE (3) arguments:                                                   
  #       fileStr:      name of the file (without the suffix "_part_xx" and extension ".csv"
  #       partNum:      number of parts                                               
  #       workDirStr:   working directory                                             

  #---  Check that partNum is valid (between 1 to 99)                                 
  if( as.numeric(partNum) < 1 || as.numeric(partNum) > 99 ) 
    stop("partNum MUST be between 1 AND 99")
  #---  Set working directory                                                         
  setwd(workDirStr)
  #---  Read data from split parts
  #       Append suffix to the fileStr
  #       Read each part and merge them together

  if( as.numeric(partNum) > 1 )
  {
    retDfr <- read.csv( paste( fileStr, "_part01.csv", sep="" ), colClasses = "character" )

    for( id in 2:partNum )
    {
    #---  rbind() function will bind two data frames with the same header together
      partStr <- paste( fileStr, "_part", sprintf("%02d", as.numeric(id)), ".csv", sep="" )
      tmpDfr <- read.csv( partStr, colClasses = "character")
      retDfr <- rbind( retDfr, tmpDfr )
    }
  }
  else
    retDfr <- read.csv( paste( fileStr, ".csv", sep="" ), colClasses = "character" )

  #---  Return a data frame
  return(retDfr)
}

#|------------------------------------------------------------------------------------------|
#|                            I N T E R N A L   F U N C T I O N S                           |
#|------------------------------------------------------------------------------------------|
RealisAddressSplitDfr <- function( inChr )                                             
{                                                                                   
#---  Assert THREE (3) arguments:                                                   
#       inChr:      vector of addresses                                             

#---  Split address into parts      
  subChr <- substring(inChr, regexpr('#', inChr)) 
  lvlChr <- substr(subChr, 2, 3)
  untChr <- substr(subChr, 5, 6)

#---  Return a data frame         
  return(data.frame(level=lvlChr, unit=untChr))
}                                                                                   

freqVtr <- function(inDfr, orderVtr) 
{
  #---  Assert 'directory' is a character vector of length 1 indicating the location of the
  #       CSV files.
  #     'threshold' is a numeric vector of length 1 indicating the number of completely
  #       observed observations (on all variables) required to compute the correlation 
  #       between nitrate and sulfate; the default is 0.
  #     Return a numeric vector of correlations.

  #---  Assert create an empty numeric vector
  outVtr <- numeric(0)

  for(ord in orderVtr)
  {
    #---  Append numeric vector
    outVtr <- c(outVtr, inDfr[inDfr$name==ord,2])
  }

  #---  Assert return value is a numeric vector
  return(outVtr)
}

#|------------------------------------------------------------------------------------------|
#|                                M A I N   P R O C E D U R E                               |
#|------------------------------------------------------------------------------------------|
#---  Init loading data
rawDfr <- RealisReadDfr("realis_watertown_2000_oct_2012_oct_13")
#---  Count of cols of data
ncol(rawDfr)
## [1] 19
#---  Count of rows of data
nrow(rawDfr)
## [1] 717
#---  Names of header
names(rawDfr)
##  [1] "Project.Name"                "Address"                    
##  [3] "No..of.Units"                "Area..sqm."                 
##  [5] "Type.of.Area"                "Transacted.Price...."       
##  [7] "Unit.Price....psm."          "Unit.Price....psf."         
##  [9] "Contract.Date"               "Property.Type"              
## [11] "Tenure"                      "Completion.Date"            
## [13] "Type.of.Sale"                "Purchaser.Address.Indicator"
## [15] "Postal.District"             "Postal.Sector"              
## [17] "Postal.Code"                 "Planning.Region"            
## [19] "Planning.Area"
#---  Peek at data
head(rawDfr)
##   Project.Name                     Address No..of.Units Area..sqm.
## 1    WATERTOWN 79 Punggol Central  #06-83             1        109
## 2    WATERTOWN 67 Punggol Central  #13-16             1        137
## 3    WATERTOWN 79 Punggol Central  #06-86             1        126
## 4    WATERTOWN 67 Punggol Central  #10-15             1        127
## 5    WATERTOWN 67 Punggol Central  #08-13             1        109
## 6    WATERTOWN 67 Punggol Central  #05-13             1        109
##   Type.of.Area Transacted.Price.... Unit.Price....psm. Unit.Price....psf.
## 1       Strata              1741200              15974               1484
## 2       Strata              2417280              17644               1639
## 3       Strata              2069225              16422               1526
## 4       Strata              2116110              16662               1548
## 5       Strata              1748970              16046               1491
## 6       Strata              1571560              14418               1339
##   Contract.Date Property.Type                 Tenure Completion.Date
## 1   20-SEP-2012     Apartment 99 Yrs From 18/05/2011     Uncompleted
## 2   30-AUG-2012     Apartment 99 Yrs From 18/05/2011     Uncompleted
## 3   28-AUG-2012     Apartment 99 Yrs From 18/05/2011     Uncompleted
## 4   06-JUL-2012     Apartment 99 Yrs From 18/05/2011     Uncompleted
## 5   04-JUN-2012     Apartment 99 Yrs From 18/05/2011     Uncompleted
## 6   31-MAY-2012     Apartment 99 Yrs From 18/05/2011     Uncompleted
##   Type.of.Sale Purchaser.Address.Indicator Postal.District Postal.Sector
## 1     New Sale                     Private              19            82
## 2     New Sale                         HDB              19            82
## 3     New Sale                         HDB              19            82
## 4     New Sale                     Private              19            82
## 5     New Sale                     Private              19            82
## 6     New Sale                         HDB              19            82
##   Postal.Code   Planning.Region Planning.Area
## 1      828759 North East Region       Punggol
## 2      828843 North East Region       Punggol
## 3      828759 North East Region       Punggol
## 4      828843 North East Region       Punggol
## 5      828843 North East Region       Punggol
## 6      828843 North East Region       Punggol

#--- Coerce character into numeric or date
rawDfr[, 4] <- suppressWarnings( as.numeric( rawDfr[, 4] ) )    # Area sqm
rawDfr[, 6] <- suppressWarnings( as.numeric( rawDfr[, 6] ) )    # Transacted Price
rawDfr[, 7] <- suppressWarnings( as.numeric( rawDfr[, 7] ) )    # Unit Price psm
rawDfr[, 8] <- suppressWarnings( as.numeric( rawDfr[, 8] ) )    # Unit Price psf
rawDfr[, 9] <- as.Date(rawDfr[, 9], "%d-%b-%Y")

#--- Split address into parts and merge new columns
rawDfr <- cbind( rawDfr, RealisAddressSplitDfr(rawDfr[, 2]) )

#|------------------------------------------------------------------------------------------|
#|                            P A R T   O N E   P R O C E D U R E                           |
#|------------------------------------------------------------------------------------------|
#---  Count of rows of data
nrow(rawDfr)
## [1] 717
describe(rawDfr[, 4])
##   var   n  mean    sd median trimmed   mad min max range skew kurtosis
## 1   1 717 72.56 25.17     56   69.17 10.38  49 147    98 0.93    -0.43
##     se
## 1 0.94
describe(rawDfr[, 6])
##   var   n   mean     sd median trimmed    mad    min     max   range skew
## 1   1 717 942905 348324 791607  890471 257746 547057 2417280 1870223 1.32
##   kurtosis    se
## 1     1.62 13008
describe(rawDfr[, 8])
##   var   n mean  sd median trimmed   mad min  max range skew kurtosis   se
## 1   1 717 1208 125   1195    1202 131.9 960 1639   679 0.41    -0.34 4.67
#--- Plot a simple histogram 
par( mfrow = c(3,2), las=2, mar=c(5.1,5.1,4.1,2.1) )
hist( rawDfr[, 4], xlab="Area Sqm", main="Distribution of Floor Area" )
boxplot( rawDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist( rawDfr[, 6], xlab="", main="Distribution of Purchase Price" )
boxplot( rawDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist( rawDfr[, 8], xlab="Unit Price Psf", main="Distribution of Unit Price Psf" )
boxplot( rawDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")

plot of chunk unnamed-chunk-1

# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- (rawDfr[, 4]/pi)
pal <- colorRampPalette(brewer.pal(3, "Blues"))
symbols(rawDfr[, 8], rawDfr[, 6], circles = radius, inches = 0.3, fg = "white", 
    bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf")
# text( rawDfr[, 8], rawDfr[, 6], rawDfr[, 4], cex=0.6, col='red' )
text(1195, 791607, "MEDIAN", cex = 1.2, col = "red")

plot of chunk unnamed-chunk-2

# --- Plot a timeline
par(mfrow = c(3, 1))
tempDfr <- rawDfr
tempDfr <- tempDfr[complete.cases(tempDfr[, 8]), ]
tempDfr <- tempDfr[complete.cases(tempDfr[, 9]), ]
plot(tempDfr[, 6] ~ tempDfr[, 9], col = rgb(red = 0, green = 0, blue = 0, alpha = 0.12), 
    xlim = c(as.Date("01-01-12", "%d-%m-%y"), as.Date("30-09-12", "%d-%m-%y")), 
    xlab = "Date", ylab = "Transacted Price", main = "Timeline of Transacted Price")
plot(tempDfr[, 8] ~ tempDfr[, 9], col = rgb(red = 0, green = 0, blue = 0, alpha = 0.12), 
    xlim = c(as.Date("01-01-12", "%d-%m-%y"), as.Date("30-09-12", "%d-%m-%y")), 
    xlab = "Date", ylab = "Unit Price Psf", main = "Timeline of Unit Price Psf")
plot(tempDfr[, 4] ~ tempDfr[, 9], col = rgb(red = 0, green = 0, blue = 0, alpha = 0.12), 
    xlim = c(as.Date("01-01-12", "%d-%m-%y"), as.Date("30-09-12", "%d-%m-%y")), 
    xlab = "Date", ylab = "Area Sqm", main = "Timeline of Area Sqm")

plot of chunk unnamed-chunk-3


#
# |------------------------------------------------------------------------------------------|
# | P A R T T W O P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
subDfr <- rawDfr[rawDfr$level == "11", ]
# --- Count of rows of data
nrow(subDfr)
## [1] 63
describe(subDfr[, 4])
##   var  n  mean    sd median trimmed   mad min max range skew kurtosis  se
## 1   1 63 72.92 26.19     56   69.59 10.38  49 138    89 0.87    -0.69 3.3
describe(subDfr[, 6])
##   var  n   mean     sd median trimmed    mad    min     max   range skew
## 1   1 63 985759 372858 818400  929870 245747 614991 2314452 1699461 1.33
##   kurtosis    se
## 1     1.56 46976
describe(subDfr[, 8])
##   var  n mean    sd median trimmed   mad  min  max range skew kurtosis
## 1   1 63 1258 119.4   1244    1255 137.9 1040 1558   518 0.23    -0.64
##      se
## 1 15.04
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(subDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area (Level 11)")
boxplot(subDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Level 11)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Level 11)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")

plot of chunk unnamed-chunk-4

# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- (subDfr[, 4]/pi)
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white", 
    bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Level 11)")
text(subDfr[, 8], subDfr[, 6], subDfr[, 4], cex = 0.6, col = "red")
text(1244, 818400, "MEDIAN", cex = 1.2, col = "red")

plot of chunk unnamed-chunk-5


#
# |------------------------------------------------------------------------------------------|
# | P A R T T H R E E P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
subDfr <- rawDfr[rawDfr[, 4] == 104, ]
# --- Count of rows of data
nrow(subDfr)
## [1] 21
describe(subDfr[, 4])
##   var  n mean sd median trimmed mad min max range skew kurtosis se
## 1   1 21  104  0    104     104   0 104 104     0  NaN      NaN  0
describe(subDfr[, 6])
##   var  n    mean     sd  median trimmed    mad     min     max  range skew
## 1   1 21 1291946 135040 1288954 1291082 168314 1089215 1523625 434410 0.07
##   kurtosis    se
## 1    -1.36 29468
describe(subDfr[, 8])
##   var  n mean    sd median trimmed   mad min  max range skew kurtosis
## 1   1 21 1154 120.6   1151    1153 149.7 973 1361   388 0.07    -1.36
##      se
## 1 26.31
# --- Plot a simple histogram
par(mfrow = c(2, 2), las = 2)
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Size 104 sqm)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Size 104 sqm)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")

plot of chunk unnamed-chunk-6

# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- subDfr$level
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white", 
    bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Size 104 sqm)")
text(subDfr[, 8], subDfr[, 6], subDfr$level, cex = 0.6, col = "red")
text(1151, 1288954, "MEDIAN", cex = 1.2, col = "red")

plot of chunk unnamed-chunk-7


#
# |------------------------------------------------------------------------------------------|
# | P A R T F O U R P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
# --- Init loading data
rawDfr <- RealisReadDfr("realis_ner_apartment_99_newsub_2012_jan_oct_13", 2)
# --- Count of cols of data
ncol(rawDfr)
## [1] 19
# --- Count of rows of data
nrow(rawDfr)
## [1] 1071
nrow(rawDfr[rawDfr$Project.Name == "WATERTOWN", ])
## [1] 717
# --- Names of header
names(rawDfr)
##  [1] "Project.Name"                "Address"                    
##  [3] "No..of.Units"                "Area..sqm."                 
##  [5] "Type.of.Area"                "Transacted.Price...."       
##  [7] "Unit.Price....psm."          "Unit.Price....psf."         
##  [9] "Contract.Date"               "Property.Type"              
## [11] "Tenure"                      "Completion.Date"            
## [13] "Type.of.Sale"                "Purchaser.Address.Indicator"
## [15] "Postal.District"             "Postal.Sector"              
## [17] "Postal.Code"                 "Planning.Region"            
## [19] "Planning.Area"
# --- Peek at data
head(rawDfr)
##         Project.Name                     Address No..of.Units Area..sqm.
## 1 BARTLEY RESIDENCES  1A Lorong How Sun  #02-05             1         85
## 2 BARTLEY RESIDENCES   3 Lorong How Sun  #04-13             1        102
## 3 BARTLEY RESIDENCES   7 Lorong How Sun  #05-37             1         74
## 4          WATERTOWN 79 Punggol Central  #06-83             1        109
## 5 BARTLEY RESIDENCES  3A Lorong How Sun  #13-22             1         84
## 6 BARTLEY RESIDENCES  3A Lorong How Sun  #07-19             1         99
##   Type.of.Area Transacted.Price.... Unit.Price....psm. Unit.Price....psf.
## 1       Strata              1131000              13306               1236
## 2       Strata              1316000              12902               1199
## 3       Strata              1020000              13784               1281
## 4       Strata              1741200              15974               1484
## 5       Strata              1146000              13643               1267
## 6       Strata              1283000              12960               1204
##   Contract.Date Property.Type                 Tenure Completion.Date
## 1   01-OCT-2012     Apartment 99 Yrs From 29/06/2011     Uncompleted
## 2   20-SEP-2012     Apartment 99 Yrs From 29/06/2011     Uncompleted
## 3   20-SEP-2012     Apartment 99 Yrs From 29/06/2011     Uncompleted
## 4   20-SEP-2012     Apartment 99 Yrs From 18/05/2011     Uncompleted
## 5   19-SEP-2012     Apartment 99 Yrs From 29/06/2011     Uncompleted
## 6   17-SEP-2012     Apartment 99 Yrs From 29/06/2011     Uncompleted
##   Type.of.Sale Purchaser.Address.Indicator Postal.District Postal.Sector
## 1     New Sale                     Private              19            53
## 2     New Sale                         HDB              19            53
## 3     New Sale                         HDB              19            53
## 4     New Sale                     Private              19            82
## 5     New Sale                     Private              19            53
## 6     New Sale                         HDB              19            53
##   Postal.Code   Planning.Region Planning.Area
## 1      536559 North East Region     Serangoon
## 2      536560 North East Region     Serangoon
## 3      536564 North East Region     Serangoon
## 4      828759 North East Region       Punggol
## 5      536561 North East Region     Serangoon
## 6      536561 North East Region     Serangoon

# --- Coerce character into numeric or date
rawDfr[, 4] <- suppressWarnings(as.numeric(rawDfr[, 4]))  # Area sqm
rawDfr[, 6] <- suppressWarnings(as.numeric(rawDfr[, 6]))  # Transacted Price
rawDfr[, 7] <- suppressWarnings(as.numeric(rawDfr[, 7]))  # Unit Price psm
rawDfr[, 8] <- suppressWarnings(as.numeric(rawDfr[, 8]))  # Unit Price psf
rawDfr[, 9] <- as.Date(rawDfr[, 9], "%d-%b-%Y")

# --- Split address into parts and merge new columns
rawDfr <- cbind(rawDfr, RealisAddressSplitDfr(rawDfr[, 2]))
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(rawDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area")
boxplot(rawDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(rawDfr[, 6], xlab = "", main = "Distribution of Purchase Price")
boxplot(rawDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(rawDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf")
boxplot(rawDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")

plot of chunk unnamed-chunk-8


subDfr <- rawDfr[rawDfr$level == "11", ]
# --- Count of rows of data
nrow(subDfr)
## [1] 93
describe(subDfr[, 4])
##   var  n  mean    sd median trimmed   mad min max range skew kurtosis   se
## 1   1 93 74.75 26.71     74   72.55 31.13  43 149   106 0.62    -0.71 2.77
describe(subDfr[, 6])
##   var  n    mean     sd  median trimmed    mad    min     max   range skew
## 1   1 93 1017030 360184 1001000  975188 436553 614991 2314452 1699461    1
##   kurtosis    se
## 1     0.85 37349
describe(subDfr[, 8])
##   var  n mean    sd median trimmed   mad  min  max range skew kurtosis
## 1   1 93 1274 113.6   1256    1273 120.1 1040 1558   518 0.17    -0.55
##      se
## 1 11.78

# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(subDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area (Level 11)")
boxplot(subDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Level 11)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Level 11)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")

plot of chunk unnamed-chunk-8

# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- (subDfr[, 4]/pi)
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white", 
    bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Level 11)")
text(subDfr[, 8], subDfr[, 6], subDfr[, 4], cex = 0.6, col = "red")
text(1256, 1001000, "MEDIAN", cex = 1.2, col = "red")

plot of chunk unnamed-chunk-9


subDfr <- rawDfr[rawDfr[, 4] > 102, ]
subDfr <- subDfr[subDfr[, 4] < 106, ]
# --- Count of rows of data
nrow(subDfr)
## [1] 75
describe(subDfr[, 4])
##   var  n  mean   sd median trimmed  mad min max range skew kurtosis  se
## 1   1 75 103.9 0.85    104   103.9 1.48 103 105     2  0.2     -1.6 0.1
describe(subDfr[, 6])
##   var  n    mean     sd  median trimmed    mad     min     max  range skew
## 1   1 75 1240368 116583 1207281 1231266 114248 1079891 1523625 443734 0.65
##   kurtosis    se
## 1     -0.7 13462
describe(subDfr[, 8])
##   var  n mean   sd median trimmed   mad min  max range skew kurtosis    se
## 1   1 75 1109 99.3   1083    1101 100.8 973 1361   388 0.69    -0.55 11.47
# --- Plot a simple histogram
par(mfrow = c(2, 2), las = 2)
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Size 102<sqm<106)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Size 102<sqm<106)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")

plot of chunk unnamed-chunk-10

# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- subDfr$level
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white", 
    bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Size 102<sqm<106)")
text(subDfr[, 8], subDfr[, 6], subDfr$level, cex = 0.6, col = "red")
text(1083, 1207281, "MEDIAN", cex = 1.2, col = "red")

plot of chunk unnamed-chunk-11


#
# |------------------------------------------------------------------------------------------|
# | P A R T F I V E P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
# --- Init loading data
rawDfr <- RealisReadDfr("realis_all_apartment_99_newsub_2012_jan_oct_13", 2)
# --- Count of cols of data
ncol(rawDfr)
## [1] 19
# --- Count of rows of data
nrow(rawDfr)
## [1] 1819
nrow(rawDfr[rawDfr$Project.Name == "WATERTOWN", ])
## [1] 717
# --- Names of header
names(rawDfr)
##  [1] "Project.Name"                "Address"                    
##  [3] "No..of.Units"                "Area..sqm."                 
##  [5] "Type.of.Area"                "Transacted.Price...."       
##  [7] "Unit.Price....psm."          "Unit.Price....psf."         
##  [9] "Contract.Date"               "Property.Type"              
## [11] "Tenure"                      "Completion.Date"            
## [13] "Type.of.Sale"                "Purchaser.Address.Indicator"
## [15] "Postal.District"             "Postal.Sector"              
## [17] "Postal.Code"                 "Planning.Region"            
## [19] "Planning.Area"
# --- Peek at data
head(rawDfr)
##         Project.Name                       Address No..of.Units Area..sqm.
## 1 BARTLEY RESIDENCES    1A Lorong How Sun  #02-05             1         85
## 2      NIN RESIDENCE 85 Pheng Geck Avenue  #17-13             1         84
## 3  UP@ROBERTSON QUAY    92 Robertson Quay  #07-06             1         49
## 4       V ON SHENTON       5A Shenton Way  #18-12             1         96
## 5       V ON SHENTON       5A Shenton Way  #32-14             1         44
## 6 BARTLEY RESIDENCES     3 Lorong How Sun  #04-13             1        102
##   Type.of.Area Transacted.Price.... Unit.Price....psm. Unit.Price....psf.
## 1       Strata              1131000              13306               1236
## 2       Strata              1152000              13714               1274
## 3       Strata              1490000              30408               2825
## 4       Strata              1885000              19635               1824
## 5       Strata              1153000              26205               2434
## 6       Strata              1316000              12902               1199
##   Contract.Date Property.Type                 Tenure Completion.Date
## 1   01-OCT-2012     Apartment 99 Yrs From 29/06/2011     Uncompleted
## 2   26-SEP-2012     Apartment 99 Yrs From 13/09/2010     Uncompleted
## 3   24-SEP-2012     Apartment 99 Yrs From 07/06/2011     Uncompleted
## 4   24-SEP-2012     Apartment 99 Yrs From 29/11/2011     Uncompleted
## 5   21-SEP-2012     Apartment 99 Yrs From 29/11/2011     Uncompleted
## 6   20-SEP-2012     Apartment 99 Yrs From 29/06/2011     Uncompleted
##   Type.of.Sale Purchaser.Address.Indicator Postal.District Postal.Sector
## 1     New Sale                     Private              19            53
## 2     New Sale                     Private              13            34
## 3     New Sale                     Private              09            23
## 4     New Sale                     Private              01            06
## 5     New Sale                     Private              01            06
## 6     New Sale                         HDB              19            53
##   Postal.Code   Planning.Region   Planning.Area
## 1      536559 North East Region       Serangoon
## 2      348271    Central Region       Toa Payoh
## 3      238260    Central Region Singapore River
## 4      068814    Central Region   Downtown Core
## 5      068814    Central Region   Downtown Core
## 6      536560 North East Region       Serangoon

# --- Coerce character into numeric or date
rawDfr[, 4] <- suppressWarnings(as.numeric(rawDfr[, 4]))  # Area sqm
rawDfr[, 6] <- suppressWarnings(as.numeric(rawDfr[, 6]))  # Transacted Price
rawDfr[, 7] <- suppressWarnings(as.numeric(rawDfr[, 7]))  # Unit Price psm
rawDfr[, 8] <- suppressWarnings(as.numeric(rawDfr[, 8]))  # Unit Price psf
rawDfr[, 9] <- as.Date(rawDfr[, 9], "%d-%b-%Y")

# --- Split address into parts and merge new columns
rawDfr <- cbind(rawDfr, RealisAddressSplitDfr(rawDfr[, 2]))

Part FIVE (5)B: Which is the most popular project in Singapore?

This part answers a really simple question: Which project has sold the MOST units in Singapore?

The project that sold the MOST units in Singapore is Watertown with SEVEN HUNDRED AND SEVENTEEN (717) sales.

#
# |------------------------------------------------------------------------------------------|
# | P A R T C O U N T P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
# --- Plot a wordcloud
nameDfr <- rawDfr$Project.Name
if (length(unique(rawDfr$Project.Name)) > 6) {

    par(mfrow = c(1, 1), mar = c(2.1, 2.1, 2.1, 2.1))
    wordcloud(gsub(" ", ".", rawDfr$Project.Name), colors = brewer.pal(6, "Dark2"), 
        scale = c(7, 0.7), random.order = FALSE)
}
## Loading required package: tm

plot of chunk unnamed-chunk-12

Part FIVE (5)C: Which project is the MOST expensive in Singapore?

This part answers a really simple question: Which project has the highest MEDIAN psf in Singapore?

The MOST expensive project in Singapore is Up@Robertson Quay, with a median psf of about TWO THOUSAND FIVE HUNDRED $2,500.


if (length(unique(rawDfr$Project.Name)) > 6) {
    # --- Count of freq by state
    table(nameDfr)
    # --- Create a data frame of freq by state Remove row.names
    tableDfr <- data.frame(name = names(tapply(nameDfr, nameDfr, length)), freq = tapply(nameDfr, 
        nameDfr, length))
    rownames(tableDfr) <- NULL

    # --- Create a subset
    outcome2Dfr <- rawDfr[rawDfr$Project.Name %in% subset(tableDfr$name, tableDfr$freq >= 
        20), ]
    # --- Count of rows of data
    nrow(outcome2Dfr)
    # --- Count of freq by state
    table(outcome2Dfr$Project.Name)

    # --- Plot a simple boxplot of the THIRTY(30)-day death rates by state (a)
    # Add a label to the y-axis '30-day Death Rate' (b) Add a title for the
    # histogram 'Heart Attack 30-day Death Rate by State' (c) Set the x- and
    # y-axis tick labels to be perpendicular to the axes (see las) (d) Order
    # the states by the MEDIAN THIRTY(30)-day death rate and plot the boxplot.
    # (e) Shrink the x-axis tick labels so that the abbreviated state names do
    # NOT overlap EACH other.  (f) Alter the x-axis tick labels so that they
    # include the number of hospitals in that state in parentheses. For
    # example, the label for the state of Connecticut would be CT (32). You
    # will need the axis() function and when you call the boxplot() function
    # you will want to set the option xaxt to be 'n'.
    psf <- outcome2Dfr[, 8]
    name <- reorder(outcome2Dfr$Project.Name, outcome2Dfr[, 8], median, na.rm = TRUE)
    orderVtr <- levels(name["scores"])
    countVtr <- freqVtr(tableDfr, orderVtr)

    par(mfrow = c(1, 1), las = 2, mar = c(15.1, 5.1, 4.1, 2.1))
    boxplot(psf ~ name, ylab = "Unit Price psf", xaxt = "n", main = "Unit Price Psf by Project")
    axis(1, at = seq_along(orderVtr), cex.axis = 0.8, labels = eval(substitute(paste(st, 
        " (", n, ")", sep = ""), list(st = orderVtr, n = countVtr))))
}

plot of chunk unnamed-chunk-13


# --- Remove outlier (Area = 674)
rawDfr <- rawDfr[rawDfr[, 4] < 674, ]
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(rawDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area")
boxplot(rawDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(rawDfr[, 6], xlab = "", main = "Distribution of Purchase Price")
boxplot(rawDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(rawDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf")
boxplot(rawDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")

plot of chunk unnamed-chunk-14


subDfr <- rawDfr[rawDfr$level == "11", ]
# --- Count of rows of data
nrow(subDfr)
## [1] 121
describe(subDfr[, 4])
##   var   n  mean    sd median trimmed   mad min max range skew kurtosis
## 1   1 121 72.88 27.29     58   69.52 22.24  43 173   130 1.07     0.63
##     se
## 1 2.48
describe(subDfr[, 6])
##   var   n    mean     sd median trimmed    mad    min     max   range skew
## 1   1 121 1010103 412163 832118  947439 306723 598718 3134560 2535842 1.96
##   kurtosis    se
## 1      5.8 37469
describe(subDfr[, 8])
##   var   n mean    sd median trimmed   mad  min  max range skew kurtosis
## 1   1 121 1293 179.8   1255    1270 121.6 1040 2178  1138 2.71    10.19
##      se
## 1 16.35

# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(subDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area (Level 11)")
boxplot(subDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Level 11)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Level 11)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")

plot of chunk unnamed-chunk-14

# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2)
# --- Size the circles
radius <- (subDfr[, 4]/pi)
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white", 
    bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Level 11)")
text(subDfr[, 8], subDfr[, 6], subDfr[, 4], cex = 0.6, col = "red")
text(1255, 832118, "MEDIAN", cex = 1.2, col = "red")

plot of chunk unnamed-chunk-15


subDfr <- rawDfr[rawDfr[, 4] > 102, ]
subDfr <- subDfr[subDfr[, 4] < 106, ]
# --- Count of rows of data
nrow(subDfr)
## [1] 86
describe(subDfr[, 4])
##   var  n  mean   sd median trimmed  mad min max range skew kurtosis   se
## 1   1 86 103.9 0.85    104   103.9 1.48 103 105     2 0.17    -1.61 0.09
describe(subDfr[, 6])
##   var  n    mean     sd  median trimmed    mad     min     max   range
## 1   1 86 1313100 238331 1231218 1269714 141459 1079891 2299000 1219109
##   skew kurtosis    se
## 1    2     4.37 25700
describe(subDfr[, 8])
##   var  n mean    sd median trimmed   mad min  max range skew kurtosis
## 1   1 86 1174 209.4   1098    1135 111.2 973 2034  1061 2.04     4.44
##      se
## 1 22.58
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(subDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area (Size 102<sqm<106)")
boxplot(subDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Size 104 sqm)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Size 102<sqm<106)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")

plot of chunk unnamed-chunk-16

# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- subDfr$level
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white", 
    bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Size 102<sqm<106)")
text(subDfr[, 8], subDfr[, 6], subDfr$level, cex = 0.6, col = "red")
text(1098, 1231218, "MEDIAN", cex = 1.2, col = "red")

plot of chunk unnamed-chunk-17