The data for this R script comes from the URA Realis web site (www.ura.gov.sg, click on Realis menu) The purpose of the web site
is to provide data and information about the caveats lodged for EVERY property transaction in Singapore.
The URA Realis web site contains a lot of data and we will only look at a small subset for this R script. The files for this
script are as follows:
(1) realis_terrasse_newsub_2008_jan_oct_15.csv
Contains information about ALL apartments in project Terrassee ONLY from January 2008 to current (13 October 2012).
library(psych)
library(ltm)
## Loading required package: MASS
## Loading required package: msm
## Loading required package: mvtnorm
## Loading required package: polycor
## Loading required package: sfsmisc
## Attaching package: 'polycor'
## The following object(s) are masked from 'package:psych':
##
## polyserial
## Attaching package: 'ltm'
## The following object(s) are masked from 'package:psych':
##
## factor.scores
library(gclus)
## Loading required package: cluster
#|------------------------------------------------------------------------------------------|
#| I N I T I A L I Z A T I O N |
#|------------------------------------------------------------------------------------------|
RealisReadDfr <- function(fileStr, partNum=1,
workDirStr="C:/Users/denbrige/100 FxOption/103 FxOptionVerBack/080 Fx Git/R-nonsource")
{
#--- Assert THREE (3) arguments:
# fileStr: name of the file (without the suffix "_part_xx" and extension ".csv"
# partNum: number of parts
# workDirStr: working directory
#--- Check that partNum is valid (between 1 to 99)
if( as.numeric(partNum) < 1 || as.numeric(partNum) > 99 )
stop("partNum MUST be between 1 AND 99")
#--- Set working directory
setwd(workDirStr)
#--- Read data from split parts
# Append suffix to the fileStr
# Read each part and merge them together
if( as.numeric(partNum) > 1 )
{
retDfr <- read.csv( paste( fileStr, "_part01.csv", sep="" ), colClasses = "character" )
for( id in 2:partNum )
{
#--- rbind() function will bind two data frames with the same header together
partStr <- paste( fileStr, "_part", sprintf("%02d", as.numeric(id)), ".csv", sep="" )
tmpDfr <- read.csv( partStr, colClasses = "character")
retDfr <- rbind( retDfr, tmpDfr )
}
}
else
retDfr <- read.csv( paste( fileStr, ".csv", sep="" ), colClasses = "character" )
#--- Return a data frame
return(retDfr)
}
#|------------------------------------------------------------------------------------------|
#| I N T E R N A L F U N C T I O N S |
#|------------------------------------------------------------------------------------------|
RealisAddressSplitDfr <- function( inChr )
{
#--- Assert THREE (3) arguments:
# inChr: vector of addresses
#--- Split address into parts
subChr <- substring(inChr, regexpr('#', inChr))
lvlChr <- substr(subChr, 2, 3)
untChr <- substr(subChr, 5, 6)
#--- Return a data frame
return(data.frame(level=lvlChr, unit=untChr))
}
#|------------------------------------------------------------------------------------------|
#| M A I N P R O C E D U R E |
#|------------------------------------------------------------------------------------------|
#--- Init loading data
rawDfr <- RealisReadDfr("realis_terrasse_newsub_2008_jan_oct_15")
#--- Count of cols of data
ncol(rawDfr)
## [1] 19
#--- Count of rows of data
nrow(rawDfr)
## [1] 380
#--- Names of header
names(rawDfr)
## [1] "Project.Name" "Address"
## [3] "No..of.Units" "Area..sqm."
## [5] "Type.of.Area" "Transacted.Price...."
## [7] "Unit.Price....psm." "Unit.Price....psf."
## [9] "Contract.Date" "Property.Type"
## [11] "Tenure" "Completion.Date"
## [13] "Type.of.Sale" "Purchaser.Address.Indicator"
## [15] "Postal.District" "Postal.Sector"
## [17] "Postal.Code" "Planning.Region"
## [19] "Planning.Area"
#--- Peek at data
head(rawDfr)
## Project.Name Address No..of.Units Area..sqm.
## 1 TERRASSE 25 Terrasse Lane #01-24 1 89
## 2 TERRASSE 35 Terrasse Lane #03-80 1 105
## 3 TERRASSE 33 Terrasse Lane #01-71 1 89
## 4 TERRASSE 35 Terrasse Lane #04-80 1 107
## 5 TERRASSE 35 Terrasse Lane #02-80 1 114
## 6 TERRASSE 35 Terrasse Lane #01-81 1 89
## Type.of.Area Transacted.Price.... Unit.Price....psm. Unit.Price....psf.
## 1 Strata 998690 11221 1042
## 2 Strata 1145000 10905 1013
## 3 Strata 970000 10899 1013
## 4 Strata 1150000 10748 998
## 5 Strata 1150000 10088 937
## 6 Strata 950000 10674 992
## Contract.Date Property.Type Tenure Completion.Date
## 1 02-JUL-2012 Condominium 99 Yrs From 23/08/2010 Uncompleted
## 2 12-JUN-2012 Condominium 99 Yrs From 23/08/2010 Uncompleted
## 3 08-JUN-2012 Condominium 99 Yrs From 23/08/2010 Uncompleted
## 4 06-JUN-2012 Condominium 99 Yrs From 23/08/2010 Uncompleted
## 5 06-JUN-2012 Condominium 99 Yrs From 23/08/2010 Uncompleted
## 6 04-JUN-2012 Condominium 99 Yrs From 23/08/2010 Uncompleted
## Type.of.Sale Purchaser.Address.Indicator Postal.District Postal.Sector
## 1 New Sale HDB 19 54
## 2 New Sale Private 19 54
## 3 New Sale HDB 19 54
## 4 New Sale HDB 19 54
## 5 New Sale Private 19 54
## 6 New Sale HDB 19 54
## Postal.Code Planning.Region Planning.Area
## 1 544776 North East Region Hougang
## 2 544781 North East Region Hougang
## 3 544780 North East Region Hougang
## 4 544781 North East Region Hougang
## 5 544781 North East Region Hougang
## 6 544781 North East Region Hougang
#--- Coerce character into numeric or date
rawDfr[, 4] <- suppressWarnings( as.numeric( rawDfr[, 4] ) ) # Area sqm
rawDfr[, 6] <- suppressWarnings( as.numeric( rawDfr[, 6] ) ) # Transacted Price
rawDfr[, 7] <- suppressWarnings( as.numeric( rawDfr[, 7] ) ) # Unit Price psm
rawDfr[, 8] <- suppressWarnings( as.numeric( rawDfr[, 8] ) ) # Unit Price psf
rawDfr[, 9] <- as.Date(rawDfr[, 9], "%d-%b-%Y")
#--- Split address into parts and merge new columns
rawDfr <- cbind( rawDfr, RealisAddressSplitDfr(rawDfr[, 2]) )
There are THREE HUNDRED AND EIGHTY (380) units that are available with a median price of $1.105 million and an area of 103
sqm, at a median psf of $993.
#
# |------------------------------------------------------------------------------------------|
# | P A R T O N E P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
# --- Count of rows of data
nrow(rawDfr)
## [1] 380
describe(rawDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 380 106.9 41.6 103 103.1 41.51 47 232 185 0.9 0.56 2.13
describe(rawDfr[, 6])
## var n mean sd median trimmed mad min max range
## 1 1 380 1103604 332439 1105800 1088157 355824 584400 1954600 1370200
## skew kurtosis se
## 1 0.37 -0.56 17054
describe(rawDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 380 988.9 92.44 993 990.9 96.37 709 1204 495 -0.35 0.24
## se
## 1 4.74
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
hist(rawDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area")
boxplot(rawDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(rawDfr[, 6], xlab = "", main = "Distribution of Purchase Price")
boxplot(rawDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(rawDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf")
boxplot(rawDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- (rawDfr[, 4]/pi)
symbols(rawDfr[, 8], rawDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = "lightblue", ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf")
text(rawDfr[, 8], rawDfr[, 6], rawDfr[, 4], cex = 0.6, col = "red")
text(993, 1105000, "MEDIAN", cex = 1.2, col = "red")
The busiest period appeared to be at the launch last year, but there were significant activities this year.
# --- Plot a timeline
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
tempDfr <- rawDfr
tempDfr <- tempDfr[complete.cases(tempDfr[, 8]), ]
tempDfr <- tempDfr[complete.cases(tempDfr[, 9]), ]
plot(tempDfr[, 6] ~ tempDfr[, 9], xlim = c(as.Date("01-06-11", "%d-%m-%y"),
as.Date("30-07-12", "%d-%m-%y")), xlab = "Date", ylab = "", main = "Timeline of Transacted Price")
There are ONLY FIVE (5) levels maximum per block. Therefore, we assume a LEVEL FIVE (5) unit for a good view.
There are SEVENTY-EIGHT (78) units that are available with a median price of $1.400 million at a median psf of $891.
#
# |------------------------------------------------------------------------------------------|
# | P A R T T W O P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
subDfr <- rawDfr[rawDfr$level == "05", ]
# --- Count of rows of data
nrow(subDfr)
## [1] 78
describe(subDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 78 148 39.43 144 152.7 34.1 50 206 156 -1.23 1.35 4.46
describe(subDfr[, 6])
## var n mean sd median trimmed mad min max range
## 1 1 78 1418265 328798 1400150 1454329 221056 599700 1954600 1354900
## skew kurtosis se
## 1 -1.17 1.37 37229
describe(subDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 78 908.8 77.54 891.5 893.2 34.84 823 1148 325 2.08 3.34
## se
## 1 8.78
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
hist(subDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area")
boxplot(subDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- (subDfr[, 4]/pi)
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = "lightblue", ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf")
text(subDfr[, 8], subDfr[, 6], subDfr[, 4], cex = 0.6, col = "red")
text(891, 1400000, "MEDIAN", cex = 1.2, col = "red")
There are THIRTY-SIX(36) units that are available with a median price of $1.149 million at a median psf of $1,019.
#
# |------------------------------------------------------------------------------------------|
# | P A R T T H R E E P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
subDfr <- rawDfr[rawDfr[, 4] > 102, ]
subDfr <- subDfr[subDfr[, 4] < 106, ]
# --- Count of rows of data
nrow(subDfr)
## [1] 36
describe(subDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 36 104.1 1.01 105 104.1 0 103 105 2 -0.11 -2.04 0.17
describe(subDfr[, 6])
## var n mean sd median trimmed mad min max range skew
## 1 1 36 1157541 66802 1149200 1158135 86065 1001800 1260000 258200 -0.07
## kurtosis se
## 1 -1.15 11134
describe(subDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 36 1033 52.32 1019 1034 52.63 886 1115 229 -0.29 -0.31
## se
## 1 8.72
# --- Plot a simple histogram
par(mfrow = c(2, 2), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- subDfr$level
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = "lightblue", ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf")
text(subDfr[, 8], subDfr[, 6], subDfr$level, cex = 0.6, col = "red")
text(1019, 1149000, "MEDIAN", cex = 1.2, col = "red")
There are FIFTEEN (15) units that are available with a median area of 144 sqm which we can purchase for a median psf of $916.
#
# |------------------------------------------------------------------------------------------|
# | P A R T F O U R P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
subDfr <- rawDfr[rawDfr[, 6] > 1400000, ]
subDfr <- subDfr[subDfr[, 6] < 1500000, ]
# --- Count of rows of data
nrow(subDfr)
## [1] 15
describe(subDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 15 144.1 15.27 144 143.5 16.31 127 169 42 0.56 -1.26 3.94
describe(subDfr[, 6])
## var n mean sd median trimmed mad min max range skew
## 1 1 15 1436541 32482 1436300 1434647 36472 1400300 1497400 97100 0.53
## kurtosis se
## 1 -1.17 8387
describe(subDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 15 933.9 82.58 916 933.8 103.8 813 1056 243 -0.05 -1.47
## se
## 1 21.32
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
hist(subDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area")
boxplot(subDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- (subDfr[, 4]/pi)
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = "lightblue", ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf")
text(subDfr[, 8], subDfr[, 6], subDfr[, 4], cex = 0.6, col = "red")
text(916, 1436300, "MEDIAN", cex = 1.2, col = "red")
The variable 'area' is an EXCELLENT (94.4%) predictor of price for the project.
Note: the same variable only explains 87.3% of price for Watertown project, with other unknown variables accounting for the rest of price.
#
# |------------------------------------------------------------------------------------------|
# | P A R T F I V E P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
# --- Scatterplot and Correlation Analysis (library gclus and ltm)
# Scatterplot
subDfr <- data.frame(price = rawDfr[, 6], area = rawDfr[, 4], psf = rawDfr[,
8])
par(mfrow = c(1, 1), las = 1)
cpairs(subDfr, gap = 0.5, panel.colors = dmat.color(abs(cor(subDfr))), main = "RAW Variables Ordered and Colored by Correlations")
# --- Correlation matrix
cor(subDfr)
## price area psf
## price 1.0000 0.9718 -0.7037
## area 0.9718 1.0000 -0.8242
## psf -0.7037 -0.8242 1.0000
# --- Perform correlation test for matrix (library ltm) Correlation null
# hypothesis is that the correlation is zero (not correlated) If the
# p-value is less than the alpha level, then the null hypothesis is
# rejected Check for correlation p<0.05 is correlated
rcor.test(subDfr)
##
## price area psf
## price ***** 0.972 -0.704
## area <0.001 ***** -0.824
## psf <0.001 <0.001 *****
##
## upper diagonal part contains correlation coefficient estimates
## lower diagonal part contains corresponding p-values
# --- Simple Regression (unstandardized) Y = price; X = area;
raw1Lm <- lm(subDfr$price ~ subDfr$area)
summary(raw1Lm)
##
## Call:
## lm(formula = subDfr$price ~ subDfr$area)
##
## Residuals:
## Min 1Q Median 3Q Max
## -303879 -44352 -4812 41550 264047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 273671.3 11104.6 24.6 <2e-16 ***
## subDfr$area 7765.6 96.8 80.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 78400 on 378 degrees of freedom
## Multiple R-squared: 0.944, Adjusted R-squared: 0.944
## F-statistic: 6.43e+03 on 1 and 378 DF, p-value: <2e-16
#
# |------------------------------------------------------------------------------------------|
# | E N D O F S C R I P T |
# |------------------------------------------------------------------------------------------|