The data for this R script comes from the URA Realis web site (www.ura.gov.sg, click on Realis menu) The purpose of the web site
is to provide data and information about the caveats lodged for EVERY property transaction in Singapore.
The URA Realis web site contains a lot of data and we will only look at a small subset for this R script. The files for this
script are as follows:
(1) realis_all_apartment_99_newsub_2012_jan_oct_13_part_xx.csv
Contains information about ONLY apartments in EVERY region, with a tenure of NINETY-NINE(99)-year lease, for uncompleted
properties (New and Sub sales) from January 2012 to today (13 October 2012).
(2) realis_ner_apartment_99_newsub_2012_jan_oct_13_part_xx.csv
Contains information about ONLY apartments in ONLY north-east region, with a tenure of NINETY-NINE(99)-year lease, for
uncompleted properties (New and Sub sales) from January 2012 to today (13 October 2012).
(3) realis_watertown_2000_oct_2012_oct_13.csv
Contains information about ALL apartments in project Watertown ONLY from January 2012 to today (13 October 2012).
library(psych)
library(ltm)
## Loading required package: MASS
## Loading required package: msm
## Loading required package: mvtnorm
## Loading required package: polycor
## Loading required package: sfsmisc
## Attaching package: 'polycor'
## The following object(s) are masked from 'package:psych':
##
## polyserial
## Attaching package: 'ltm'
## The following object(s) are masked from 'package:psych':
##
## factor.scores
library(gclus)
## Loading required package: cluster
library(RColorBrewer)
library(wordcloud)
## Loading required package: Rcpp
#|------------------------------------------------------------------------------------------|
#| I N I T I A L I Z A T I O N |
#|------------------------------------------------------------------------------------------|
RealisReadDfr <- function(fileStr, partNum=1,
workDirStr="C:/Users/denbrige/100 FxOption/103 FxOptionVerBack/080 Fx Git/R-nonsource")
{
#--- Assert THREE (3) arguments:
# fileStr: name of the file (without the suffix "_part_xx" and extension ".csv"
# partNum: number of parts
# workDirStr: working directory
#--- Check that partNum is valid (between 1 to 99)
if( as.numeric(partNum) < 1 || as.numeric(partNum) > 99 )
stop("partNum MUST be between 1 AND 99")
#--- Set working directory
setwd(workDirStr)
#--- Read data from split parts
# Append suffix to the fileStr
# Read each part and merge them together
if( as.numeric(partNum) > 1 )
{
retDfr <- read.csv( paste( fileStr, "_part01.csv", sep="" ), colClasses = "character" )
for( id in 2:partNum )
{
#--- rbind() function will bind two data frames with the same header together
partStr <- paste( fileStr, "_part", sprintf("%02d", as.numeric(id)), ".csv", sep="" )
tmpDfr <- read.csv( partStr, colClasses = "character")
retDfr <- rbind( retDfr, tmpDfr )
}
}
else
retDfr <- read.csv( paste( fileStr, ".csv", sep="" ), colClasses = "character" )
#--- Return a data frame
return(retDfr)
}
#|------------------------------------------------------------------------------------------|
#| I N T E R N A L F U N C T I O N S |
#|------------------------------------------------------------------------------------------|
RealisAddressSplitDfr <- function( inChr )
{
#--- Assert THREE (3) arguments:
# inChr: vector of addresses
#--- Split address into parts
subChr <- substring(inChr, regexpr('#', inChr))
lvlChr <- substr(subChr, 2, 3)
untChr <- substr(subChr, 5, 6)
#--- Return a data frame
return(data.frame(level=lvlChr, unit=untChr))
}
freqVtr <- function(inDfr, orderVtr)
{
#--- Assert 'directory' is a character vector of length 1 indicating the location of the
# CSV files.
# 'threshold' is a numeric vector of length 1 indicating the number of completely
# observed observations (on all variables) required to compute the correlation
# between nitrate and sulfate; the default is 0.
# Return a numeric vector of correlations.
#--- Assert create an empty numeric vector
outVtr <- numeric(0)
for(ord in orderVtr)
{
#--- Append numeric vector
outVtr <- c(outVtr, inDfr[inDfr$name==ord,2])
}
#--- Assert return value is a numeric vector
return(outVtr)
}
#|------------------------------------------------------------------------------------------|
#| M A I N P R O C E D U R E |
#|------------------------------------------------------------------------------------------|
#--- Init loading data
rawDfr <- RealisReadDfr("realis_watertown_2000_oct_2012_oct_13")
#--- Count of cols of data
ncol(rawDfr)
## [1] 19
#--- Count of rows of data
nrow(rawDfr)
## [1] 717
#--- Names of header
names(rawDfr)
## [1] "Project.Name" "Address"
## [3] "No..of.Units" "Area..sqm."
## [5] "Type.of.Area" "Transacted.Price...."
## [7] "Unit.Price....psm." "Unit.Price....psf."
## [9] "Contract.Date" "Property.Type"
## [11] "Tenure" "Completion.Date"
## [13] "Type.of.Sale" "Purchaser.Address.Indicator"
## [15] "Postal.District" "Postal.Sector"
## [17] "Postal.Code" "Planning.Region"
## [19] "Planning.Area"
#--- Peek at data
head(rawDfr)
## Project.Name Address No..of.Units Area..sqm.
## 1 WATERTOWN 79 Punggol Central #06-83 1 109
## 2 WATERTOWN 67 Punggol Central #13-16 1 137
## 3 WATERTOWN 79 Punggol Central #06-86 1 126
## 4 WATERTOWN 67 Punggol Central #10-15 1 127
## 5 WATERTOWN 67 Punggol Central #08-13 1 109
## 6 WATERTOWN 67 Punggol Central #05-13 1 109
## Type.of.Area Transacted.Price.... Unit.Price....psm. Unit.Price....psf.
## 1 Strata 1741200 15974 1484
## 2 Strata 2417280 17644 1639
## 3 Strata 2069225 16422 1526
## 4 Strata 2116110 16662 1548
## 5 Strata 1748970 16046 1491
## 6 Strata 1571560 14418 1339
## Contract.Date Property.Type Tenure Completion.Date
## 1 20-SEP-2012 Apartment 99 Yrs From 18/05/2011 Uncompleted
## 2 30-AUG-2012 Apartment 99 Yrs From 18/05/2011 Uncompleted
## 3 28-AUG-2012 Apartment 99 Yrs From 18/05/2011 Uncompleted
## 4 06-JUL-2012 Apartment 99 Yrs From 18/05/2011 Uncompleted
## 5 04-JUN-2012 Apartment 99 Yrs From 18/05/2011 Uncompleted
## 6 31-MAY-2012 Apartment 99 Yrs From 18/05/2011 Uncompleted
## Type.of.Sale Purchaser.Address.Indicator Postal.District Postal.Sector
## 1 New Sale Private 19 82
## 2 New Sale HDB 19 82
## 3 New Sale HDB 19 82
## 4 New Sale Private 19 82
## 5 New Sale Private 19 82
## 6 New Sale HDB 19 82
## Postal.Code Planning.Region Planning.Area
## 1 828759 North East Region Punggol
## 2 828843 North East Region Punggol
## 3 828759 North East Region Punggol
## 4 828843 North East Region Punggol
## 5 828843 North East Region Punggol
## 6 828843 North East Region Punggol
#--- Coerce character into numeric or date
rawDfr[, 4] <- suppressWarnings( as.numeric( rawDfr[, 4] ) ) # Area sqm
rawDfr[, 6] <- suppressWarnings( as.numeric( rawDfr[, 6] ) ) # Transacted Price
rawDfr[, 7] <- suppressWarnings( as.numeric( rawDfr[, 7] ) ) # Unit Price psm
rawDfr[, 8] <- suppressWarnings( as.numeric( rawDfr[, 8] ) ) # Unit Price psf
rawDfr[, 9] <- as.Date(rawDfr[, 9], "%d-%b-%Y")
#--- Split address into parts and merge new columns
rawDfr <- cbind( rawDfr, RealisAddressSplitDfr(rawDfr[, 2]) )
#|------------------------------------------------------------------------------------------|
#| P A R T O N E P R O C E D U R E |
#|------------------------------------------------------------------------------------------|
#--- Count of rows of data
nrow(rawDfr)
## [1] 717
describe(rawDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 717 72.56 25.17 56 69.17 10.38 49 147 98 0.93 -0.43
## se
## 1 0.94
describe(rawDfr[, 6])
## var n mean sd median trimmed mad min max range skew
## 1 1 717 942905 348324 791607 890471 257746 547057 2417280 1870223 1.32
## kurtosis se
## 1 1.62 13008
describe(rawDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 717 1208 125 1195 1202 131.9 960 1639 679 0.41 -0.34 4.67
#--- Plot a simple histogram
par( mfrow = c(3,2), las=2, mar=c(5.1,5.1,4.1,2.1) )
hist( rawDfr[, 4], xlab="Area Sqm", main="Distribution of Floor Area" )
boxplot( rawDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist( rawDfr[, 6], xlab="", main="Distribution of Purchase Price" )
boxplot( rawDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist( rawDfr[, 8], xlab="Unit Price Psf", main="Distribution of Unit Price Psf" )
boxplot( rawDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- (rawDfr[, 4]/pi)
pal <- colorRampPalette(brewer.pal(3, "Blues"))
symbols(rawDfr[, 8], rawDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf")
# text( rawDfr[, 8], rawDfr[, 6], rawDfr[, 4], cex=0.6, col='red' )
text(1195, 791607, "MEDIAN", cex = 1.2, col = "red")
# --- Plot a timeline
par(mfrow = c(3, 1))
tempDfr <- rawDfr
tempDfr <- tempDfr[complete.cases(tempDfr[, 8]), ]
tempDfr <- tempDfr[complete.cases(tempDfr[, 9]), ]
plot(tempDfr[, 6] ~ tempDfr[, 9], col = rgb(red = 0, green = 0, blue = 0, alpha = 0.12),
xlim = c(as.Date("01-01-12", "%d-%m-%y"), as.Date("30-09-12", "%d-%m-%y")),
xlab = "Date", ylab = "Transacted Price", main = "Timeline of Transacted Price")
plot(tempDfr[, 8] ~ tempDfr[, 9], col = rgb(red = 0, green = 0, blue = 0, alpha = 0.12),
xlim = c(as.Date("01-01-12", "%d-%m-%y"), as.Date("30-09-12", "%d-%m-%y")),
xlab = "Date", ylab = "Unit Price Psf", main = "Timeline of Unit Price Psf")
plot(tempDfr[, 4] ~ tempDfr[, 9], col = rgb(red = 0, green = 0, blue = 0, alpha = 0.12),
xlim = c(as.Date("01-01-12", "%d-%m-%y"), as.Date("30-09-12", "%d-%m-%y")),
xlab = "Date", ylab = "Area Sqm", main = "Timeline of Area Sqm")
#
# |------------------------------------------------------------------------------------------|
# | P A R T T W O P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
subDfr <- rawDfr[rawDfr$level == "11", ]
# --- Count of rows of data
nrow(subDfr)
## [1] 63
describe(subDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 63 72.92 26.19 56 69.59 10.38 49 138 89 0.87 -0.69 3.3
describe(subDfr[, 6])
## var n mean sd median trimmed mad min max range skew
## 1 1 63 985759 372858 818400 929870 245747 614991 2314452 1699461 1.33
## kurtosis se
## 1 1.56 46976
describe(subDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 63 1258 119.4 1244 1255 137.9 1040 1558 518 0.23 -0.64
## se
## 1 15.04
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(subDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area (Level 11)")
boxplot(subDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Level 11)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Level 11)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- (subDfr[, 4]/pi)
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Level 11)")
text(subDfr[, 8], subDfr[, 6], subDfr[, 4], cex = 0.6, col = "red")
text(1244, 818400, "MEDIAN", cex = 1.2, col = "red")
#
# |------------------------------------------------------------------------------------------|
# | P A R T T H R E E P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
subDfr <- rawDfr[rawDfr[, 4] == 104, ]
# --- Count of rows of data
nrow(subDfr)
## [1] 21
describe(subDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 21 104 0 104 104 0 104 104 0 NaN NaN 0
describe(subDfr[, 6])
## var n mean sd median trimmed mad min max range skew
## 1 1 21 1291946 135040 1288954 1291082 168314 1089215 1523625 434410 0.07
## kurtosis se
## 1 -1.36 29468
describe(subDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 21 1154 120.6 1151 1153 149.7 973 1361 388 0.07 -1.36
## se
## 1 26.31
# --- Plot a simple histogram
par(mfrow = c(2, 2), las = 2)
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Size 104 sqm)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Size 104 sqm)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- subDfr$level
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Size 104 sqm)")
text(subDfr[, 8], subDfr[, 6], subDfr$level, cex = 0.6, col = "red")
text(1151, 1288954, "MEDIAN", cex = 1.2, col = "red")
#
# |------------------------------------------------------------------------------------------|
# | P A R T F O U R P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
# --- Init loading data
rawDfr <- RealisReadDfr("realis_ner_apartment_99_newsub_2012_jan_oct_13", 2)
# --- Count of cols of data
ncol(rawDfr)
## [1] 19
# --- Count of rows of data
nrow(rawDfr)
## [1] 1071
nrow(rawDfr[rawDfr$Project.Name == "WATERTOWN", ])
## [1] 717
# --- Names of header
names(rawDfr)
## [1] "Project.Name" "Address"
## [3] "No..of.Units" "Area..sqm."
## [5] "Type.of.Area" "Transacted.Price...."
## [7] "Unit.Price....psm." "Unit.Price....psf."
## [9] "Contract.Date" "Property.Type"
## [11] "Tenure" "Completion.Date"
## [13] "Type.of.Sale" "Purchaser.Address.Indicator"
## [15] "Postal.District" "Postal.Sector"
## [17] "Postal.Code" "Planning.Region"
## [19] "Planning.Area"
# --- Peek at data
head(rawDfr)
## Project.Name Address No..of.Units Area..sqm.
## 1 BARTLEY RESIDENCES 1A Lorong How Sun #02-05 1 85
## 2 BARTLEY RESIDENCES 3 Lorong How Sun #04-13 1 102
## 3 BARTLEY RESIDENCES 7 Lorong How Sun #05-37 1 74
## 4 WATERTOWN 79 Punggol Central #06-83 1 109
## 5 BARTLEY RESIDENCES 3A Lorong How Sun #13-22 1 84
## 6 BARTLEY RESIDENCES 3A Lorong How Sun #07-19 1 99
## Type.of.Area Transacted.Price.... Unit.Price....psm. Unit.Price....psf.
## 1 Strata 1131000 13306 1236
## 2 Strata 1316000 12902 1199
## 3 Strata 1020000 13784 1281
## 4 Strata 1741200 15974 1484
## 5 Strata 1146000 13643 1267
## 6 Strata 1283000 12960 1204
## Contract.Date Property.Type Tenure Completion.Date
## 1 01-OCT-2012 Apartment 99 Yrs From 29/06/2011 Uncompleted
## 2 20-SEP-2012 Apartment 99 Yrs From 29/06/2011 Uncompleted
## 3 20-SEP-2012 Apartment 99 Yrs From 29/06/2011 Uncompleted
## 4 20-SEP-2012 Apartment 99 Yrs From 18/05/2011 Uncompleted
## 5 19-SEP-2012 Apartment 99 Yrs From 29/06/2011 Uncompleted
## 6 17-SEP-2012 Apartment 99 Yrs From 29/06/2011 Uncompleted
## Type.of.Sale Purchaser.Address.Indicator Postal.District Postal.Sector
## 1 New Sale Private 19 53
## 2 New Sale HDB 19 53
## 3 New Sale HDB 19 53
## 4 New Sale Private 19 82
## 5 New Sale Private 19 53
## 6 New Sale HDB 19 53
## Postal.Code Planning.Region Planning.Area
## 1 536559 North East Region Serangoon
## 2 536560 North East Region Serangoon
## 3 536564 North East Region Serangoon
## 4 828759 North East Region Punggol
## 5 536561 North East Region Serangoon
## 6 536561 North East Region Serangoon
# --- Coerce character into numeric or date
rawDfr[, 4] <- suppressWarnings(as.numeric(rawDfr[, 4])) # Area sqm
rawDfr[, 6] <- suppressWarnings(as.numeric(rawDfr[, 6])) # Transacted Price
rawDfr[, 7] <- suppressWarnings(as.numeric(rawDfr[, 7])) # Unit Price psm
rawDfr[, 8] <- suppressWarnings(as.numeric(rawDfr[, 8])) # Unit Price psf
rawDfr[, 9] <- as.Date(rawDfr[, 9], "%d-%b-%Y")
# --- Split address into parts and merge new columns
rawDfr <- cbind(rawDfr, RealisAddressSplitDfr(rawDfr[, 2]))
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(rawDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area")
boxplot(rawDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(rawDfr[, 6], xlab = "", main = "Distribution of Purchase Price")
boxplot(rawDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(rawDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf")
boxplot(rawDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
subDfr <- rawDfr[rawDfr$level == "11", ]
# --- Count of rows of data
nrow(subDfr)
## [1] 93
describe(subDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 93 74.75 26.71 74 72.55 31.13 43 149 106 0.62 -0.71 2.77
describe(subDfr[, 6])
## var n mean sd median trimmed mad min max range skew
## 1 1 93 1017030 360184 1001000 975188 436553 614991 2314452 1699461 1
## kurtosis se
## 1 0.85 37349
describe(subDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 93 1274 113.6 1256 1273 120.1 1040 1558 518 0.17 -0.55
## se
## 1 11.78
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(subDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area (Level 11)")
boxplot(subDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Level 11)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Level 11)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- (subDfr[, 4]/pi)
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Level 11)")
text(subDfr[, 8], subDfr[, 6], subDfr[, 4], cex = 0.6, col = "red")
text(1256, 1001000, "MEDIAN", cex = 1.2, col = "red")
subDfr <- rawDfr[rawDfr[, 4] > 102, ]
subDfr <- subDfr[subDfr[, 4] < 106, ]
# --- Count of rows of data
nrow(subDfr)
## [1] 75
describe(subDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 75 103.9 0.85 104 103.9 1.48 103 105 2 0.2 -1.6 0.1
describe(subDfr[, 6])
## var n mean sd median trimmed mad min max range skew
## 1 1 75 1240368 116583 1207281 1231266 114248 1079891 1523625 443734 0.65
## kurtosis se
## 1 -0.7 13462
describe(subDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 75 1109 99.3 1083 1101 100.8 973 1361 388 0.69 -0.55 11.47
# --- Plot a simple histogram
par(mfrow = c(2, 2), las = 2)
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Size 102<sqm<106)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Size 102<sqm<106)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- subDfr$level
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Size 102<sqm<106)")
text(subDfr[, 8], subDfr[, 6], subDfr$level, cex = 0.6, col = "red")
text(1083, 1207281, "MEDIAN", cex = 1.2, col = "red")
#
# |------------------------------------------------------------------------------------------|
# | P A R T F I V E P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
# --- Init loading data
rawDfr <- RealisReadDfr("realis_all_apartment_99_newsub_2012_jan_oct_13", 2)
# --- Count of cols of data
ncol(rawDfr)
## [1] 19
# --- Count of rows of data
nrow(rawDfr)
## [1] 1819
nrow(rawDfr[rawDfr$Project.Name == "WATERTOWN", ])
## [1] 717
# --- Names of header
names(rawDfr)
## [1] "Project.Name" "Address"
## [3] "No..of.Units" "Area..sqm."
## [5] "Type.of.Area" "Transacted.Price...."
## [7] "Unit.Price....psm." "Unit.Price....psf."
## [9] "Contract.Date" "Property.Type"
## [11] "Tenure" "Completion.Date"
## [13] "Type.of.Sale" "Purchaser.Address.Indicator"
## [15] "Postal.District" "Postal.Sector"
## [17] "Postal.Code" "Planning.Region"
## [19] "Planning.Area"
# --- Peek at data
head(rawDfr)
## Project.Name Address No..of.Units Area..sqm.
## 1 BARTLEY RESIDENCES 1A Lorong How Sun #02-05 1 85
## 2 NIN RESIDENCE 85 Pheng Geck Avenue #17-13 1 84
## 3 UP@ROBERTSON QUAY 92 Robertson Quay #07-06 1 49
## 4 V ON SHENTON 5A Shenton Way #18-12 1 96
## 5 V ON SHENTON 5A Shenton Way #32-14 1 44
## 6 BARTLEY RESIDENCES 3 Lorong How Sun #04-13 1 102
## Type.of.Area Transacted.Price.... Unit.Price....psm. Unit.Price....psf.
## 1 Strata 1131000 13306 1236
## 2 Strata 1152000 13714 1274
## 3 Strata 1490000 30408 2825
## 4 Strata 1885000 19635 1824
## 5 Strata 1153000 26205 2434
## 6 Strata 1316000 12902 1199
## Contract.Date Property.Type Tenure Completion.Date
## 1 01-OCT-2012 Apartment 99 Yrs From 29/06/2011 Uncompleted
## 2 26-SEP-2012 Apartment 99 Yrs From 13/09/2010 Uncompleted
## 3 24-SEP-2012 Apartment 99 Yrs From 07/06/2011 Uncompleted
## 4 24-SEP-2012 Apartment 99 Yrs From 29/11/2011 Uncompleted
## 5 21-SEP-2012 Apartment 99 Yrs From 29/11/2011 Uncompleted
## 6 20-SEP-2012 Apartment 99 Yrs From 29/06/2011 Uncompleted
## Type.of.Sale Purchaser.Address.Indicator Postal.District Postal.Sector
## 1 New Sale Private 19 53
## 2 New Sale Private 13 34
## 3 New Sale Private 09 23
## 4 New Sale Private 01 06
## 5 New Sale Private 01 06
## 6 New Sale HDB 19 53
## Postal.Code Planning.Region Planning.Area
## 1 536559 North East Region Serangoon
## 2 348271 Central Region Toa Payoh
## 3 238260 Central Region Singapore River
## 4 068814 Central Region Downtown Core
## 5 068814 Central Region Downtown Core
## 6 536560 North East Region Serangoon
# --- Coerce character into numeric or date
rawDfr[, 4] <- suppressWarnings(as.numeric(rawDfr[, 4])) # Area sqm
rawDfr[, 6] <- suppressWarnings(as.numeric(rawDfr[, 6])) # Transacted Price
rawDfr[, 7] <- suppressWarnings(as.numeric(rawDfr[, 7])) # Unit Price psm
rawDfr[, 8] <- suppressWarnings(as.numeric(rawDfr[, 8])) # Unit Price psf
rawDfr[, 9] <- as.Date(rawDfr[, 9], "%d-%b-%Y")
# --- Split address into parts and merge new columns
rawDfr <- cbind(rawDfr, RealisAddressSplitDfr(rawDfr[, 2]))
#
# |------------------------------------------------------------------------------------------|
# | P A R T C O U N T P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
# --- Plot a wordcloud
nameDfr <- rawDfr$Project.Name
if (length(unique(rawDfr$Project.Name)) > 6) {
par(mfrow = c(1, 1), mar = c(2.1, 2.1, 2.1, 2.1))
wordcloud(gsub(" ", ".", rawDfr$Project.Name), colors = brewer.pal(6, "Dark2"),
scale = c(7, 0.7), random.order = FALSE)
}
## Loading required package: tm
if (length(unique(rawDfr$Project.Name)) > 6) {
# --- Count of freq by state
table(nameDfr)
# --- Create a data frame of freq by state Remove row.names
tableDfr <- data.frame(name = names(tapply(nameDfr, nameDfr, length)), freq = tapply(nameDfr,
nameDfr, length))
rownames(tableDfr) <- NULL
# --- Create a subset
outcome2Dfr <- rawDfr[rawDfr$Project.Name %in% subset(tableDfr$name, tableDfr$freq >=
20), ]
# --- Count of rows of data
nrow(outcome2Dfr)
# --- Count of freq by state
table(outcome2Dfr$Project.Name)
# --- Plot a simple boxplot of the THIRTY(30)-day death rates by state (a)
# Add a label to the y-axis '30-day Death Rate' (b) Add a title for the
# histogram 'Heart Attack 30-day Death Rate by State' (c) Set the x- and
# y-axis tick labels to be perpendicular to the axes (see las) (d) Order
# the states by the MEDIAN THIRTY(30)-day death rate and plot the boxplot.
# (e) Shrink the x-axis tick labels so that the abbreviated state names do
# NOT overlap EACH other. (f) Alter the x-axis tick labels so that they
# include the number of hospitals in that state in parentheses. For
# example, the label for the state of Connecticut would be CT (32). You
# will need the axis() function and when you call the boxplot() function
# you will want to set the option xaxt to be 'n'.
psf <- outcome2Dfr[, 8]
name <- reorder(outcome2Dfr$Project.Name, outcome2Dfr[, 8], median, na.rm = TRUE)
orderVtr <- levels(name["scores"])
countVtr <- freqVtr(tableDfr, orderVtr)
par(mfrow = c(1, 1), las = 2, mar = c(15.1, 5.1, 4.1, 2.1))
boxplot(psf ~ name, ylab = "Unit Price psf", xaxt = "n", main = "Unit Price Psf by Project")
axis(1, at = seq_along(orderVtr), cex.axis = 0.8, labels = eval(substitute(paste(st,
" (", n, ")", sep = ""), list(st = orderVtr, n = countVtr))))
}
# --- Remove outlier (Area = 674)
rawDfr <- rawDfr[rawDfr[, 4] < 674, ]
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(rawDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area")
boxplot(rawDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(rawDfr[, 6], xlab = "", main = "Distribution of Purchase Price")
boxplot(rawDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(rawDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf")
boxplot(rawDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
subDfr <- rawDfr[rawDfr$level == "11", ]
# --- Count of rows of data
nrow(subDfr)
## [1] 121
describe(subDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 121 72.88 27.29 58 69.52 22.24 43 173 130 1.07 0.63
## se
## 1 2.48
describe(subDfr[, 6])
## var n mean sd median trimmed mad min max range skew
## 1 1 121 1010103 412163 832118 947439 306723 598718 3134560 2535842 1.96
## kurtosis se
## 1 5.8 37469
describe(subDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 121 1293 179.8 1255 1270 121.6 1040 2178 1138 2.71 10.19
## se
## 1 16.35
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(subDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area (Level 11)")
boxplot(subDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Level 11)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Level 11)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2)
# --- Size the circles
radius <- (subDfr[, 4]/pi)
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Level 11)")
text(subDfr[, 8], subDfr[, 6], subDfr[, 4], cex = 0.6, col = "red")
text(1255, 832118, "MEDIAN", cex = 1.2, col = "red")
subDfr <- rawDfr[rawDfr[, 4] > 102, ]
subDfr <- subDfr[subDfr[, 4] < 106, ]
# --- Count of rows of data
nrow(subDfr)
## [1] 86
describe(subDfr[, 4])
## var n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 86 103.9 0.85 104 103.9 1.48 103 105 2 0.17 -1.61 0.09
describe(subDfr[, 6])
## var n mean sd median trimmed mad min max range
## 1 1 86 1313100 238331 1231218 1269714 141459 1079891 2299000 1219109
## skew kurtosis se
## 1 2 4.37 25700
describe(subDfr[, 8])
## var n mean sd median trimmed mad min max range skew kurtosis
## 1 1 86 1174 209.4 1098 1135 111.2 973 2034 1061 2.04 4.44
## se
## 1 22.58
# --- Plot a simple histogram
par(mfrow = c(3, 2), las = 2)
hist(subDfr[, 4], xlab = "Area Sqm", main = "Distribution of Floor Area (Size 102<sqm<106)")
boxplot(subDfr[, 4], outchar = T, main = "Boxplot of Floor Area", col = "slateblue1")
hist(subDfr[, 6], xlab = "", main = "Distribution of Purchase Price (Size 104 sqm)")
boxplot(subDfr[, 6], outchar = T, main = "Boxplot of Purchase Price", col = "slateblue1")
hist(subDfr[, 8], xlab = "Unit Price Psf", main = "Distribution of Unit Price Psf (Size 102<sqm<106)")
boxplot(subDfr[, 8], outchar = T, main = "Boxplot of Unit Price Psf", col = "slateblue1")
# --- Make a bubble plot
par(mfrow = c(1, 1), las = 2, mar = c(5.1, 5.1, 4.1, 2.1))
# --- Size the circles
radius <- subDfr$level
symbols(subDfr[, 8], subDfr[, 6], circles = radius, inches = 0.3, fg = "white",
bg = pal(12), ylab = "", xlab = "Unit Price Psf", main = "Purchase Price vs Unit Price Psf (Size 102<sqm<106)")
text(subDfr[, 8], subDfr[, 6], subDfr$level, cex = 0.6, col = "red")
text(1098, 1231218, "MEDIAN", cex = 1.2, col = "red")