R Markdown

This is an R Markdown document to keep all necessary code to wrangle.

Installing Packages

if(!require('dplyr')) {
  install.packages("dplyr")
}
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
if(!require('tidyverse')) {
  install.packages('tidyverse')
}
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   0.3.5
## ✔ tibble  3.1.8     ✔ stringr 1.5.0
## ✔ tidyr   1.2.1     ✔ forcats 0.5.2
## ✔ readr   2.1.3     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
if(!require('knitr')){
  install.packages('knitr')
}
## Loading required package: knitr
if(!require('readr')){
  install.packages('readr')
}

if(!require('tidyverse')){
  install.packages("tidyverse")
}

if(!require('readxl')){
  install.packages("readxl")
}
## Loading required package: readxl
if(!require('ggplot2')){
  install.packages("ggplot2")
}

Loading Packages

library(dplyr)
library(knitr)
library(readr)
library(tidyverse)
library(readxl)
library(ggplot2)

Importing Data

The data is imported from multiple sources and formats

df_housing <- read_csv("C:/Users/user/Downloads/Data Analytics/Data/capitals_index.csv",show_col_types = FALSE)
df_housing_data_list <- read_csv("C:/Users/user/Downloads/Data Analytics/Data/vic_house_parameters_list.csv",show_col_types = FALSE)
df_population <- read_xlsx("C:/Users/user/Downloads/Data Analytics/Data/310104_population.xlsx",sheet=2,na="", guess_max=2000, skip = 9)
df_property_price_index <- read_xlsx("C:/Users/user/Downloads/Data Analytics/Data/641601_property_price_index.xlsx",sheet=2,na="", guess_max=2000, skip = 9)

Testing Imported Data

The imported data should be tested

head(df_housing,50)
## # A tibble: 50 × 8
##     Year City      HouseValueK HouseNumberK PriceIndexHo…¹   CPI Total…² `Pop-M`
##    <dbl> <chr>           <dbl>        <dbl>          <dbl> <dbl>   <dbl>   <dbl>
##  1  2011 Sydney            541         2839           98.4  99.8    9049     7.3
##  2  2011 Melbourne         513         2266          100    99.8   22851     5.6
##  3  2011 Brisbane          429         1785          100.   99.8    7327     4.5
##  4  2011 Adelaide          380          722          101.   99.8    5222     1.6
##  5  2011 Perth             525          942           99.4  99.8   12749     2.4
##  6  2011 Hobart            305          231          102.   99.8     827     0.5
##  7  2011 Darwin            523           74           98.2  99.8     471     0.2
##  8  2011 Canberra          548          145          101.   99.8    1766     0.4
##  9  2012 Sydney            545         2863          104.  102.     9240     7.4
## 10  2012 Melbourne         492         2310          100.  102.    18764     5.7
## # … with 40 more rows, and abbreviated variable names ¹​PriceIndexHousing,
## #   ²​TotalBldgPermit
head(df_housing_data_list,16)
## # A tibble: 16 × 4
##     Year HouseAttribute      Value Type  
##    <dbl> <chr>               <dbl> <chr> 
##  1  2012 dwell          2309800    Number
##  2  2012 houses_md          280    K     
##  3  2012 houses_mn          309    K     
##  4  2012 houses_nos       20329    Number
##  5  2012 population           5.6  M     
##  6  2012 rate_const_pi        0.1  Change
##  7  2012 rate_dwell_chg       1.95 Change
##  8  2012 rate_inflation       1.2  Change
##  9  2012 rate_ppl_chg         1.9  Change
## 10  2012 rate_rental_pi       2.6  Change
## 11  2012 units_apts_md      240    K     
## 12  2012 units_apts_mn      264    K     
## 13  2012 units_apts_nos    4242    Number
## 14  2013 dwell          2353000    Number
## 15  2013 houses_md          290    K     
## 16  2013 houses_mn          319    K
#head(df_housing_data,6)
#head(df_Inflation_rate,8)
head(df_population,10)
## # A tibble: 10 × 28
##    `Series ID`         A2060825C A2060…¹ A2060…² A2060…³ A2060…⁴ A2060…⁵ A2060…⁶
##    <dttm>                  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 1981-06-01 00:00:00   2608351 1958717 1178447  653940  657249  212565   65393
##  2 1981-09-01 00:00:00   2616060 1964139 1189946  655136  663047  212862   66716
##  3 1981-12-01 00:00:00   2624579 1969349 1200504  657014  667381  212935   68023
##  4 1982-03-01 00:00:00   2634534 1975617 1210128  658840  672273  213477   69023
##  5 1982-06-01 00:00:00   2643527 1981619 1219369  660066  676892  213679   69388
##  6 1982-09-01 00:00:00   2649615 1986589 1228791  661669  681539  213868   70025
##  7 1982-12-01 00:00:00   2655478 1991532 1235548  663641  684771  214168   70671
##  8 1983-03-01 00:00:00   2663858 1997990 1242336  666073  688002  214543   71620
##  9 1983-06-01 00:00:00   2668049 2003140 1248666  667942  691681  215090   72336
## 10 1983-09-01 00:00:00   2673036 2007981 1254248  669852  694823  215635   73411
## # … with 20 more variables: A2060832A <dbl>, A2060824A <dbl>, A2060834F <dbl>,
## #   A2060835J <dbl>, A2060836K <dbl>, A2060837L <dbl>, A2060838R <dbl>,
## #   A2060839T <dbl>, A2060840A <dbl>, A2060841C <dbl>, A2060833C <dbl>,
## #   A2060843J <dbl>, A2060844K <dbl>, A2060845L <dbl>, A2060846R <dbl>,
## #   A2060847T <dbl>, A2060848V <dbl>, A2060849W <dbl>, A2060850F <dbl>,
## #   A2060842F <dbl>, and abbreviated variable names ¹​A2060826F, ²​A2060827J,
## #   ³​A2060828K, ⁴​A2060829L, ⁵​A2060830W, ⁶​A2060831X
head(df_property_price_index,12)
## # A tibble: 12 × 28
##    `Series ID`         A837283…¹ A8372…² A8372…³ A8372…⁴ A8372…⁵ A8372…⁶ A8372…⁷
##    <dttm>                  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 2003-09-01 00:00:00      85.3    60.7    64.2    62.2    48.3    61.2    40.5
##  2 2003-12-01 00:00:00      88.2    62.1    69.4    63.9    50.6    66.5    43.3
##  3 2004-03-01 00:00:00      87      60.8    70.7    64.8    52.5    68.7    45.5
##  4 2004-06-01 00:00:00      84.4    60.9    71.7    65.9    53.7    71.8    45.2
##  5 2004-09-01 00:00:00      84.2    60.9    71.2    67.1    54.7    72.3    46.8
##  6 2004-12-01 00:00:00      84.8    62.4    72      68.6    57.1    74.6    49.7
##  7 2005-03-01 00:00:00      83.2    62.5    72.6    68.6    59.4    75.8    53.6
##  8 2005-06-01 00:00:00      82.6    63.2    73.3    69.3    62.6    76.9    54.7
##  9 2005-09-01 00:00:00      81.4    63.1    73.6    70      65.2    76.7    56.1
## 10 2005-12-01 00:00:00      81.8    64      75      71.6    70      79.1    60.2
## 11 2006-03-01 00:00:00      80.8    65.4    75.6    72.2    75.6    81      63.2
## 12 2006-06-01 00:00:00      82.3    67.2    77.5    73.4    86.4    83.4    67.1
## # … with 20 more variables: A83728446K <dbl>, A83728455L <dbl>,
## #   A83728389A <dbl>, A83728398C <dbl>, A83728407V <dbl>, A83728416W <dbl>,
## #   A83728425X <dbl>, A83728434A <dbl>, A83728443C <dbl>, A83728452F <dbl>,
## #   A83728461J <dbl>, A83728386V <dbl>, A83728395W <dbl>, A83728404L <dbl>,
## #   A83728413R <dbl>, A83728422T <dbl>, A83728431V <dbl>, A83728440W <dbl>,
## #   A83728449T <dbl>, A83728458V <dbl>, and abbreviated variable names
## #   ¹​A83728383L, ²​A83728392R, ³​A83728401F, ⁴​A83728410J, ⁵​A83728419C, …
#head(df_housing_price_index_capitals,14)

Line Graph

df_housing
## # A tibble: 96 × 8
##     Year City      HouseValueK HouseNumberK PriceIndexHo…¹   CPI Total…² `Pop-M`
##    <dbl> <chr>           <dbl>        <dbl>          <dbl> <dbl>   <dbl>   <dbl>
##  1  2011 Sydney            541         2839           98.4  99.8    9049     7.3
##  2  2011 Melbourne         513         2266          100    99.8   22851     5.6
##  3  2011 Brisbane          429         1785          100.   99.8    7327     4.5
##  4  2011 Adelaide          380          722          101.   99.8    5222     1.6
##  5  2011 Perth             525          942           99.4  99.8   12749     2.4
##  6  2011 Hobart            305          231          102.   99.8     827     0.5
##  7  2011 Darwin            523           74           98.2  99.8     471     0.2
##  8  2011 Canberra          548          145          101.   99.8    1766     0.4
##  9  2012 Sydney            545         2863          104.  102.     9240     7.4
## 10  2012 Melbourne         492         2310          100.  102.    18764     5.7
## # … with 86 more rows, and abbreviated variable names ¹​PriceIndexHousing,
## #   ²​TotalBldgPermit
ggplot(df_housing, aes(Year, CPI, fill = City)) +
  geom_col()

#df_housing_price_index_capitals
ggplot(df_housing, aes(Year, TotalBldgPermit, fill = City)) +
  geom_col()

ggplot(df_housing, aes(x = Year, y = HouseValueK, color = City )) +
  geom_boxplot()

ggplot(df_housing, aes(x = Year, y = HouseNumberK, color = City )) +
  geom_boxplot()

ggplot(df_housing, aes(y = HouseValueK, x = Year, color = City)) +
  geom_point() +
  geom_line() 

ggplot(df_housing, aes(x = Year, y = CPI, color = City)) +
  geom_line() + 
  facet_wrap(~City)

Bar Graph

As a refresher, here is the chart you created before:

# df_housing_price_index_capitals %>%
#   hchart('scatter', hcaes(x = Year, y = Index, color = City))
# 
# # Select The columns
# df_housingp <- select(filter(df_housing, Year == 2014),City, Year, HouseNumberK)
# hchart(df_housingp, type = "pie", hcaes(City, HouseNumberK))

ggplot(df_housing, aes(x="", y=HouseNumberK, fill=City)) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0)

### Scatter Graph Housing Values

As a refresher, here is the chart you created before:

glimpse(df_housing_data_list)
## Rows: 143
## Columns: 4
## $ Year           <dbl> 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2…
## $ HouseAttribute <chr> "dwell", "houses_md", "houses_mn", "houses_nos", "popul…
## $ Value          <dbl> 2309800.00, 280.00, 309.00, 20329.00, 5.60, 0.10, 1.95,…
## $ Type           <chr> "Number", "K", "K", "Number", "M", "Change", "Change", …
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
hchart(
  select(df_housing,Year,City,HouseValueK), "scatter", hcaes(x = Year, y = HouseValueK, group = City))