IDA 1
This report uses the Australian Bureau of Statistics’ (ABS) dataset on patterns of housing arrangements as recorded in the 2011 census and released in 2013. It is a large dataset from a respectable and reliable source, and the data acquired was done so in an ethically unobjectionable way, as participation in the census is not compulsory.
However, this places it at risk of excluding some sources of data through volunteer bias. Beyond that, it only takes into account those households which were both occupied on the night of the census, and occupied in a calssifiable way. It is grouped by Local Government Areas (LGA), which results in the exclusion of potential data from unicorporated areas and from the Australian Capital territory. The totals for each LGA include those households where tenure type was not recorded - thus, proportions of stated tenure type compared to that total may not accurately reflect genuine proportions in the population.
###importing libraries and dataset
library(readr)
library(ggplot2)
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(plyr))
library(tidyr)
data <- read_csv("housing.csv")
## Parsed with column specification:
## cols(
## `Local Government Area` = col_character(),
## `Owned - outright` = col_double(),
## `Owned - with a mortgage` = col_double(),
## `Rented - Real estate agent` = col_double(),
## `Rented - State or territory housing authority` = col_double(),
## `Rented - Person not in same household` = col_double(),
## `Rented - Housing co-operative, community or church group` = col_double(),
## `Rented - Other landlord type` = col_double(),
## `Rented - Landlord type not stated` = col_double(),
## `Other tenure type` = col_double(),
## `Tenure type not stated` = col_double(),
## Total = col_double(),
## `Median mortgage repayment ($/monthly)` = col_double(),
## State = col_character()
## )
# data head
head(data)
## # A tibble: 6 x 14
## `Local Governme… `Owned - outrig… `Owned - with a… `Rented - Real …
## <chr> <dbl> <dbl> <dbl>
## 1 LGA10050 Albury… 5440 6403 3783
## 2 LGA10110 Armida… 2888 2322 1803
## 3 LGA10150 Ashfie… 4408 4240 4624
## 4 LGA10200 Auburn… 5505 7104 5824
## 5 LGA10250 Ballin… 6274 4169 2415
## 6 LGA10300 Balran… 327 216 44
## # … with 10 more variables: `Rented - State or territory housing
## # authority` <dbl>, `Rented - Person not in same household` <dbl>,
## # `Rented - Housing co-operative, community or church group` <dbl>,
## # `Rented - Other landlord type` <dbl>, `Rented - Landlord type not
## # stated` <dbl>, `Other tenure type` <dbl>, `Tenure type not
## # stated` <dbl>, Total <dbl>, `Median mortgage repayment
## # ($/monthly)` <dbl>, State <chr>
#data dimensions
dim(data)
## [1] 559 14
#data classification
class(data)
## [1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
#variable classification
str(data)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 559 obs. of 14 variables:
## $ Local Government Area : chr "LGA10050 Albury (C)" "LGA10110 Armidale Dumaresq (A)" "LGA10150 Ashfield (A)" "LGA10200 Auburn (C)" ...
## $ Owned - outright : num 5440 2888 4408 5505 6274 ...
## $ Owned - with a mortgage : num 6403 2322 4240 7104 4169 ...
## $ Rented - Real estate agent : num 3783 1803 4624 5824 2415 ...
## $ Rented - State or territory housing authority : num 827 451 231 836 602 ...
## $ Rented - Person not in same household : num 1067 519 972 1432 1065 ...
## $ Rented - Housing co-operative, community or church group: num 144 103 158 130 135 20 469 72 102 31 ...
## $ Rented - Other landlord type : num 170 123 93 236 241 47 508 106 149 70 ...
## $ Rented - Landlord type not stated : num 88 76 62 128 93 23 305 99 111 53 ...
## $ Other tenure type : num 149 71 125 135 199 11 394 71 114 52 ...
## $ Tenure type not stated : num 545 233 384 751 404 ...
## $ Total : num 18616 8589 15297 22081 15597 ...
## $ Median mortgage repayment ($/monthly) : num 1450 1430 2167 2000 1733 ...
## $ State : chr "New South Wales" "New South Wales" "New South Wales" "New South Wales" ...
## - attr(*, "spec")=
## .. cols(
## .. `Local Government Area` = col_character(),
## .. `Owned - outright` = col_double(),
## .. `Owned - with a mortgage` = col_double(),
## .. `Rented - Real estate agent` = col_double(),
## .. `Rented - State or territory housing authority` = col_double(),
## .. `Rented - Person not in same household` = col_double(),
## .. `Rented - Housing co-operative, community or church group` = col_double(),
## .. `Rented - Other landlord type` = col_double(),
## .. `Rented - Landlord type not stated` = col_double(),
## .. `Other tenure type` = col_double(),
## .. `Tenure type not stated` = col_double(),
## .. Total = col_double(),
## .. `Median mortgage repayment ($/monthly)` = col_double(),
## .. State = col_character()
## .. )
sapply(data, class)
## Local Government Area
## "character"
## Owned - outright
## "numeric"
## Owned - with a mortgage
## "numeric"
## Rented - Real estate agent
## "numeric"
## Rented - State or territory housing authority
## "numeric"
## Rented - Person not in same household
## "numeric"
## Rented - Housing co-operative, community or church group
## "numeric"
## Rented - Other landlord type
## "numeric"
## Rented - Landlord type not stated
## "numeric"
## Other tenure type
## "numeric"
## Tenure type not stated
## "numeric"
## Total
## "numeric"
## Median mortgage repayment ($/monthly)
## "numeric"
## State
## "character"
###subsetting and pivoting
area <-data$'Local Government Area'
owned <- data$`Owned - outright`
ownedm <- data$`Owned - with a mortgage`
all_owned <- owned + ownedm
rent1 <- data$`Rented - Real estate agent`
rent2 <- data$`Rented - State or territory housing authority`
rent3 <- data$`Rented - Person not in same household`
rent4 <- data$`Rented - Housing co-operative, community or church group`
rent5 <- data$`Rented - Other landlord type`
rent6 <- data$`Rented - Landlord type not stated`
other <- data$`Other tenure type`
n_s <- data$`Tenure type not stated`
total <- data$Total
state <- data$State
all_rent <- rent1 + rent2 + rent3 + rent4 + rent5 + rent6
all_other <- other + n_s
all_not_owned <- all_other + all_rent
tot_ownedm <- sum(ownedm)
tot_owned <- sum(owned)
tot_all_owned <- sum(all_owned)
tot_nowned <- sum(all_not_owned)
tot_total <- sum(total)
nsw_owned <- subset(owned, state=="New South Wales")
nsw_ownedm <- subset(ownedm, state=="New South Wales")
nsw_all_owned <-subset(all_owned, state=="New South Wales")
nsw_nowned <- subset(all_not_owned, state=="New South Wales")
nsw_tot <- subset(total, state=="New South Wales")
vic_owned <-subset(owned, state=="Victoria")
vic_ownedm <- subset(ownedm, state=="Victoria")
vic_all_owned <-subset(all_owned, state=="Victoria")
vic_nowned <- subset(all_not_owned, state=="Victoria")
vic_tot <- subset(total, state=="Victoria")
qld_owned <-subset(owned, state=="Queensland")
qld_ownedm <- subset(ownedm, state=="Queensland")
qld_all_owned <-subset(all_owned, state=="Queensland")
qld_nowned<-subset(all_not_owned, state=="Queensland")
qld_tot <- subset(total, state=="Queensland")
sa_owned<-subset(owned, state=="South Australia")
sa_ownedm<-subset(ownedm, state=="South Australia")
sa_all_owned <-subset(all_owned, state=="South Australia")
sa_nowned<-subset(all_not_owned, state=="South Australia")
sa_tot <- subset(total, state=="South Australia")
wa_owned<-subset(owned, state=="Western Australia")
wa_ownedm<-subset(ownedm, state=="Western Australia")
wa_all_owned <-subset(all_owned, state=="Western Australia")
wa_nowned<-subset(all_not_owned, state=="Western Australia")
wa_tot <- subset(total, state=="Western Australia")
tas_owned<-subset(owned, state=="Tasmania")
tas_ownedm<-subset(ownedm, state=="Tasmania")
tas_all_owned <-subset(all_owned, state=="Tasmania")
tas_nowned<-subset(all_not_owned, state=="Tasmania")
tas_tot <- subset(total, state=="Tasmania")
nt_owned<-subset(owned, state=="Northern Territory")
nt_ownedm<-subset(ownedm, state=="Northern Territory")
nt_all_owned <-subset(all_owned, state=="Northern Territory")
nt_nowned<-subset(all_not_owned, state=="Northern Territory")
nt_tot <- subset(total, state=="Northern Territory")
data_2 <- data.frame(Owned_OR = c(sum(nsw_owned), sum(vic_owned), sum(qld_owned), sum(sa_owned), sum(wa_owned), sum(tas_owned), sum(nt_owned)), Owned_M = c(sum(nsw_ownedm), sum(vic_ownedm), sum(qld_ownedm), sum(sa_ownedm), sum(wa_ownedm), sum(tas_ownedm), sum(nt_ownedm)), Not_Owned = c(sum(nsw_nowned), sum(vic_nowned), sum(qld_nowned), sum(sa_nowned), sum(wa_nowned), sum(tas_nowned), sum(nt_nowned)), State = as.factor(c("NSW", "Vic", "QLD", "SA", "WA", "Tas", "NT")))
data_2_pivot <- data_2 %>% gather("Tenure", "Count", -State)
nsw_perc_owned <- (sum(nsw_owned)/sum(nsw_tot))*100
vic_perc_owned <-(sum(vic_owned)/sum(vic_tot))*100
qld_perc_owned <-(sum(qld_owned)/sum(qld_tot))*100
sa_perc_owned <-(sum(sa_owned)/sum(sa_tot))*100
wa_perc_owned <-(sum(wa_owned)/sum(wa_tot))*100
tas_perc_owned <-(sum(tas_owned)/sum(tas_tot))*100
nt_perc_owned <-(sum(nt_owned)/sum(nt_tot))*100
tot_perc_owned <-(tot_owned/tot_total)*100
nsw_perc_ownedm <- (sum(nsw_ownedm)/sum(nsw_tot))*100
vic_perc_ownedm <-(sum(vic_ownedm)/sum(vic_tot))*100
qld_perc_ownedm <-(sum(qld_ownedm)/sum(qld_tot))*100
sa_perc_ownedm <-(sum(sa_ownedm)/sum(sa_tot))*100
wa_perc_ownedm <-(sum(wa_ownedm)/sum(wa_tot))*100
tas_perc_ownedm <-(sum(tas_ownedm)/sum(tas_tot))*100
nt_perc_ownedm <-(sum(nt_ownedm)/sum(nt_tot))*100
tot_perc_ownedm <-(tot_ownedm/tot_total)*100
nsw_perc_all_owned <- (sum(nsw_all_owned)/sum(nsw_tot))*100
vic_perc_all_owned <-(sum(vic_all_owned)/sum(vic_tot))*100
qld_perc_all_owned <-(sum(qld_all_owned)/sum(qld_tot))*100
sa_perc_all_owned <-(sum(sa_all_owned)/sum(sa_tot))*100
wa_perc_all_owned <-(sum(wa_all_owned)/sum(wa_tot))*100
tas_perc_all_owned <-(sum(tas_all_owned)/sum(tas_tot))*100
nt_perc_all_owned <-(sum(nt_all_owned)/sum(nt_tot))*100
tot_perc_all_owned <-(tot_all_owned/tot_total)*100
data_3 <- data.frame(Perc_Owned_OR = c(nsw_perc_owned, vic_perc_owned, qld_perc_owned, sa_perc_owned, wa_perc_owned, tas_perc_owned, nt_perc_owned, tot_perc_owned), Perc_Owned_M = c(nsw_perc_ownedm, vic_perc_ownedm, qld_perc_ownedm, sa_perc_ownedm, wa_perc_ownedm, tas_perc_ownedm, nt_perc_ownedm, tot_perc_ownedm), Perc_Owned_All = c(nsw_perc_all_owned, vic_perc_all_owned, qld_perc_all_owned, sa_perc_all_owned, wa_perc_all_owned, tas_perc_all_owned, nt_perc_all_owned, tot_perc_all_owned), State = as.factor(c("NSW", "Vic", "QLD", "SA", "WA", "Tas", "NT", "Overall")))
data_3_pivot <- data_3 %>% gather("Tenure", "Percent", -State)
perc_ownedm <-(ownedm/total)*100
perc_all_owned <-(all_owned/total)*100
data_4 <-data.frame("Owned_mortgage" = c(perc_ownedm), "Owned_all" = c(perc_all_owned), "Area" = c(area))
data_4_pivot <-data_4%>%gather("Tenure", "Percent", -"Area")
nsw_area_perc_ownedm <-(subset(ownedm, state=="New South Wales")/subset(total, state=="New South Wales"))*100
nsw_area_perc_all_owned <-(subset(all_owned, state=="New South Wales")/subset(total, state=="New South Wales"))*100
nsw_area <- subset(area, state=="New South Wales")
data_5 <-data.frame("Owned_mortgage" = c(nsw_area_perc_ownedm), "Owned_all" = c(nsw_area_perc_all_owned), "Area" = c(nsw_area))
data_5_pivot <-data_5%>%gather("Tenure", "Percent", -"Area")
The most obvious stakeholder in this dataset is the ABS itself, given that it elicited, compiled, and curated the data. Beyond that, the ABS data has implications for policymakers and actors in the public and private sectors.