You will realize that we need to convert factors into numbers. Also numbers comes with commas, so it is quite cumbersome for conversion.
library(ggplot2)
pty <- read.csv("pty.csv")
str(pty)
## 'data.frame': 1022 obs. of 10 variables:
## $ PROJECT : Factor w/ 386 levels "1 KING ALBERT PARK",..: 117 117 117 117 117 220 220 220 220 220 ...
## $ PROPERTY.TYPE : Factor w/ 6 levels "Apartment","Condominium",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ TENURE : Factor w/ 5 levels "103 years","956 years",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ SALE.DATE : Factor w/ 15 levels "1-Aug-17","18-Jul-17",..: 6 6 5 2 2 6 6 6 6 4 ...
## $ SQ.FT : Factor w/ 295 levels "1,001","1,012",..: 86 245 267 288 288 275 275 275 242 273 ...
## $ PRICE : Factor w/ 804 levels "1,000,000","1,005,000",..: 366 95 238 337 321 209 185 163 709 143 ...
## $ PSF : Factor w/ 687 levels "1,000","1,002",..: 325 452 422 405 388 339 323 308 335 306 ...
## $ COMPLETION.DATE: Factor w/ 48 levels "1963","1967",..: 47 47 47 47 47 47 47 47 47 47 ...
## $ TYPE.OF.SALE : Factor w/ 3 levels "New Sale","Resale",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ DISTRICT : int 3 3 3 3 3 3 3 3 3 3 ...
This will convert (numbers with commas) to without commas.And i convert into data frame for visualization purpose.
library(readr)
pty <- data.frame(read_csv("pty.csv")) # = read_delim(txt, delim = ",")
## Parsed with column specification:
## cols(
## PROJECT = col_character(),
## `PROPERTY TYPE` = col_character(),
## TENURE = col_character(),
## `SALE DATE` = col_character(),
## `SQ FT` = col_number(),
## PRICE = col_number(),
## PSF = col_number(),
## `COMPLETION DATE` = col_character(),
## `TYPE OF SALE` = col_character(),
## DISTRICT = col_integer()
## )
str(pty)
## 'data.frame': 1022 obs. of 10 variables:
## $ PROJECT : chr "HIGHLINE RESIDENCES" "HIGHLINE RESIDENCES" "HIGHLINE RESIDENCES" "HIGHLINE RESIDENCES" ...
## $ PROPERTY.TYPE : chr "Condominium" "Condominium" "Condominium" "Condominium" ...
## $ TENURE : chr "99 years" "99 years" "99 years" "99 years" ...
## $ SALE.DATE : chr "22-Jul-17" "22-Jul-17" "21-Jul-17" "18-Jul-17" ...
## $ SQ.FT : num 1152 506 700 915 915 ...
## $ PRICE : num 1961300 1152400 1433600 1830200 1760000 ...
## $ PSF : num 1703 2278 2049 2000 1924 ...
## $ COMPLETION.DATE: chr "Uncompleted" "Uncompleted" "Uncompleted" "Uncompleted" ...
## $ TYPE.OF.SALE : chr "New Sale" "New Sale" "New Sale" "New Sale" ...
## $ DISTRICT : int 3 3 3 3 3 3 3 3 3 3 ...
qplot(TYPE.OF.SALE, data=pty , geom="bar", fill=TYPE.OF.SALE)
qplot(TYPE.OF.SALE, data=pty , geom="bar", fill=PROPERTY.TYPE)
qplot(DISTRICT, TYPE.OF.SALE, data=pty , geom="boxplot")
qplot(TYPE.OF.SALE, PSF, data=pty , geom=c("boxplot", "jitter"))
qplot(PSF, PROPERTY.TYPE, data=pty, geom="point")
qplot(PSF, PROPERTY.TYPE, data=pty, geom="point", size=TENURE)
## Warning: Using size for a discrete variable is not advised.
ggplot(data=subset(pty,PSF>1000), aes(PSF)) + geom_histogram() + facet_wrap(~PROPERTY.TYPE, nrow=2) + scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=subset(pty,DISTRICT=10), aes(PSF)) + geom_histogram() + facet_wrap(~PROPERTY.TYPE + TYPE.OF.SALE, nrow=1) + scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
scales::hue_pal()(3) #SEE THE COLORS CODES
## [1] "#F8766D" "#00BA38" "#619CFF"
#position can be fill or stack
ggplot(data=pty, aes(x=TYPE.OF.SALE,fill=factor(PROPERTY.TYPE))) + geom_bar(position="dodge")
ggplot(data=pty, aes(x=factor(1),fill=factor(TYPE.OF.SALE)))+geom_bar(width = 1)+coord_polar(theta = "y")
ggplot(data=pty, aes(x=factor(1),fill=factor(PROPERTY.TYPE)))+geom_bar(width = 1)+coord_polar(theta = "y")
ggplot(data=pty, aes(x=PROPERTY.TYPE,fill=factor(PROPERTY.TYPE)))+geom_bar(width = 1)+coord_polar(theta = "x")
ggplot(data=pty, aes(x=TYPE.OF.SALE,fill=factor(PROPERTY.TYPE)))+geom_bar(width = 1)+coord_polar(theta = "x")