Code
# Load packages
library(ggplot2) # Simple, verstile plots
library(tidyr) # Restructuring original datasets (from wide to long format)
library(dplyr) # Data manipulation (factor levels, labels etc.)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RColorBrewer) # Colour-blind friendly palettes
library(xlsx) # for reading .xlsx file
library(lubridate) # for date manipulation
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
## Data
TGIp_NSW <- read.xlsx("C:/Data/RMIT/Master of Data Science/Data Visualization - MATH2270/Assignment 2/Data/5220012_Annual_Household_NSW.xls",2)
TGIp_VIC <- read.xlsx("C:/Data/RMIT/Master of Data Science/Data Visualization - MATH2270/Assignment 2/Data/5220013_Annual_Household_VIC.xls",2)
TGIp_QLD <- read.xlsx("C:/Data/RMIT/Master of Data Science/Data Visualization - MATH2270/Assignment 2/Data/5220014_Annual_Household_QLD.xls",2)
TGIp_SA <- read.xlsx("C:/Data/RMIT/Master of Data Science/Data Visualization - MATH2270/Assignment 2/Data/5220015_Annual_Household_SA.xls",2)
TGIp_WA <- read.xlsx("C:/Data/RMIT/Master of Data Science/Data Visualization - MATH2270/Assignment 2/Data/5220016_Annual_Household_WA.xls",2)
TGIp_TAS <- read.xlsx("C:/Data/RMIT/Master of Data Science/Data Visualization - MATH2270/Assignment 2/Data/5220017_Annual_Household_TAS.xls",2)
TGIp_NT <- read.xlsx("C:/Data/RMIT/Master of Data Science/Data Visualization - MATH2270/Assignment 2/Data/5220018_Annual_Household_NT.xls",2)
TGIp_ACT <- read.xlsx("C:/Data/RMIT/Master of Data Science/Data Visualization - MATH2270/Assignment 2/Data/5220019_Annual_Household_ACT.xls",2)
# convert Year and calculate Net Savings & Gross Disposable Income as % of Total Gross Income- NSW
#-------------------------------------------------------------------------------------------------
# remove first 9 non-data rows
TGIp_NSW <- tail(TGIp_NSW,-9)
# extract columns for year, total gross income, gross disposable income and net savings only
TGIp_NSW <- TGIp_NSW[,c(1,12,24,26)]
# rename columns
colnames(TGIp_NSW) <- c("Year","TGI","GDI","NetSavings")
# extract Year only from date numbers
Year <- year(as.Date(as.numeric(as.character(TGIp_NSW$Year)), origin="1900-01-01"))
# calculate Net Savings & Gross Disposable Income as % of Total Gross Income
NSW = as.numeric(as.character(TGIp_NSW$NetSavings)) / as.numeric(as.character(TGIp_NSW$TGI))
NSW_GDIp = as.numeric(as.character(TGIp_NSW$GDI)) / as.numeric(as.character(TGIp_NSW$TGI))
# create data frame TGIp for final presentation
TGIpNSW <- data.frame(Year, NSW, NSW_GDIp)
# convert Year and calculate Net Savings & Gross Disposable Income as % of Total Gross Income- VIC
#-------------------------------------------------------------------------------------------------
# remove first 9 non-data rows
TGIp_VIC <- tail(TGIp_VIC,-9)
# extract columns for year, total gross income, gross disposable income and net savings only
TGIp_VIC <- TGIp_VIC[,c(1,12,24,26)]
# rename columns
colnames(TGIp_VIC) <- c("Year","TGI","GDI","NetSavings")
# extract Year only from date numbers
Year <- year(as.Date(as.numeric(as.character(TGIp_VIC$Year)), origin="1900-01-01"))
# calculate Net Savings & Gross Disposable Income as % of Total Gross Income
VIC = as.numeric(as.character(TGIp_VIC$NetSavings)) / as.numeric(as.character(TGIp_VIC$TGI))
VIC_GDIp = as.numeric(as.character(TGIp_VIC$GDI)) / as.numeric(as.character(TGIp_VIC$TGI))
# create data frame TGIp for final presentation
TGIpVIC <- data.frame(Year, VIC, VIC_GDIp)
# convert Year and calculate Net Savings & Gross Disposable Income as % of Total Gross Income- QLD
#-------------------------------------------------------------------------------------------------
# remove first 9 non-data rows
TGIp_QLD <- tail(TGIp_QLD,-9)
# extract columns for year, total gross income, gross disposable income and net savings only
TGIp_QLD <- TGIp_QLD[,c(1,12,24,26)]
# rename columns
colnames(TGIp_QLD) <- c("Year","TGI","GDI","NetSavings")
# extract Year only from date numbers
Year <- year(as.Date(as.numeric(as.character(TGIp_QLD$Year)), origin="1900-01-01"))
# calculate Net Savings & Gross Disposable Income as % of Total Gross Income
QLD = as.numeric(as.character(TGIp_QLD$NetSavings)) / as.numeric(as.character(TGIp_QLD$TGI))
QLD_GDIp = as.numeric(as.character(TGIp_QLD$GDI)) / as.numeric(as.character(TGIp_QLD$TGI))
# create data frame TGIp for final presentation
TGIpQLD <- data.frame(Year, QLD, QLD_GDIp)
## convert Year and calculate Net Savings & Gross Disposable Income as % of Total Gross Income- SA
#-------------------------------------------------------------------------------------------------
# remove first 9 non-data rows
TGIp_SA <- tail(TGIp_SA,-9)
# extract columns for year, total gross income, gross disposable income and net savings only
TGIp_SA <- TGIp_SA[,c(1,12,24,26)]
# rename columns
colnames(TGIp_SA) <- c("Year","TGI","GDI","NetSavings")
# extract Year only from date numbers
Year <- year(as.Date(as.numeric(as.character(TGIp_SA$Year)), origin="1900-01-01"))
# calculate Net Savings & Gross Disposable Income as % of Total Gross Income
SA = as.numeric(as.character(TGIp_SA$NetSavings)) / as.numeric(as.character(TGIp_SA$TGI))
SA_GDIp = as.numeric(as.character(TGIp_SA$GDI)) / as.numeric(as.character(TGIp_SA$TGI))
# create data frame TGIp for final presentation
TGIpSA <- data.frame(Year, SA, SA_GDIp)
# convert Year and calculate Net Savings & Gross Disposable Income as % of Total Gross Income- WA
#-------------------------------------------------------------------------------------------------
# remove first 9 non-data rows
TGIp_WA <- tail(TGIp_WA,-9)
# extract columns for year, total gross income, gross disposable income and net savings only
TGIp_WA <- TGIp_WA[,c(1,12,24,26)]
# rename columns
colnames(TGIp_WA) <- c("Year","TGI","GDI","NetSavings")
# extract Year only from date numbers
Year <- year(as.Date(as.numeric(as.character(TGIp_WA$Year)), origin="1900-01-01"))
# calculate Net Savings & Gross Disposable Income as % of Total Gross Income
WA = as.numeric(as.character(TGIp_WA$NetSavings)) / as.numeric(as.character(TGIp_WA$TGI))
WA_GDIp = as.numeric(as.character(TGIp_WA$GDI)) / as.numeric(as.character(TGIp_WA$TGI))
# create data frame TGIp for final presentation
TGIpWA <- data.frame(Year, WA, WA_GDIp)
# convert Year and calculate Net Savings & Gross Disposable Income as % of Total Gross Income- TAS
#-------------------------------------------------------------------------------------------------
# remove first 9 non-data rows
TGIp_TAS <- tail(TGIp_TAS,-9)
# extract columns for year, total gross income, gross disposable income and net savings only
TGIp_TAS <- TGIp_TAS[,c(1,12,24,26)]
# rename columns
colnames(TGIp_TAS) <- c("Year","TGI","GDI","NetSavings")
# extract Year only from date numbers
Year <- year(as.Date(as.numeric(as.character(TGIp_TAS$Year)), origin="1900-01-01"))
# calculate Net Savings & Gross Disposable Income as % of Total Gross Income
TAS = as.numeric(as.character(TGIp_TAS$NetSavings)) / as.numeric(as.character(TGIp_TAS$TGI))
TAS_GDIp = as.numeric(as.character(TGIp_TAS$GDI)) / as.numeric(as.character(TGIp_TAS$TGI))
# create data frame TGIp for final presentation
TGIpTAS <- data.frame(Year, TAS, TAS_GDIp)
# convert Year and calculate Net Savings & Gross Disposable Income as % of Total Gross Income- NT
#-------------------------------------------------------------------------------------------------
# remove first 9 non-data rows
TGIp_NT <- tail(TGIp_NT,-9)
# extract columns for year, total gross income, gross disposable income and net savings only
TGIp_NT <- TGIp_NT[,c(1,12,24,26)]
# rename columns
colnames(TGIp_NT) <- c("Year","TGI","GDI","NetSavings")
# extract Year only from date numbers
Year <- year(as.Date(as.numeric(as.character(TGIp_NT$Year)), origin="1900-01-01"))
# calculate Net Savings & Gross Disposable Income as % of Total Gross Income
NT = as.numeric(as.character(TGIp_NT$NetSavings)) / as.numeric(as.character(TGIp_NT$TGI))
NT_GDIp = as.numeric(as.character(TGIp_NT$GDI)) / as.numeric(as.character(TGIp_NT$TGI))
# create data frame TGIp for final presentation
TGIpNT <- data.frame(Year, NT, NT_GDIp)
# convert Year and calculate Net Savings & Gross Disposable Income as % of Total Gross Income- ACT
#-------------------------------------------------------------------------------------------------
# remove first 9 non-data rows
TGIp_ACT <- tail(TGIp_ACT,-9)
# extract columns for year, total gross income, gross disposable income and net savings only
TGIp_ACT <- TGIp_ACT[,c(1,12,24,26)]
# rename columns
colnames(TGIp_ACT) <- c("Year","TGI","GDI","NetSavings")
# extract Year only from date numbers
Year <- year(as.Date(as.numeric(as.character(TGIp_ACT$Year)), origin="1900-01-01"))
# calculate Net Savings & Gross Disposable Income as % of Total Gross Income
ACT = as.numeric(as.character(TGIp_ACT$NetSavings)) / as.numeric(as.character(TGIp_ACT$TGI))
ACT_GDIp = as.numeric(as.character(TGIp_ACT$GDI)) / as.numeric(as.character(TGIp_ACT$TGI))
# create data frame TGIp for final presentation
TGIpACT <- data.frame(Year, ACT, ACT_GDIp)
# merge all State (TGIpXXX) into TGIp by Year
TGIp_temp <- merge(TGIpNSW, TGIpVIC, by="Year")
TGIp_temp <- merge(TGIp_temp, TGIpQLD, by="Year")
TGIp_temp <- merge(TGIp_temp, TGIpSA, by="Year")
TGIp_temp <- merge(TGIp_temp, TGIpWA, by="Year")
TGIp_temp <- merge(TGIp_temp, TGIpTAS, by="Year")
TGIp_temp <- merge(TGIp_temp, TGIpNT, by="Year")
TGIp_temp <- merge(TGIp_temp, TGIpACT, by="Year")
# Reshape TGIp from wide to long format for presentation
TGIp <- reshape(TGIp_temp, direction='long',
varying=list(c('ACT','NSW','VIC','QLD','SA','WA','TAS','NT'),
c('ACT_GDIp','NSW_GDIp','VIC_GDIp','QLD_GDIp','SA_GDIp','WA_GDIp','TAS_GDIp','NT_GDIp')),
timevar='State',
times=c('ACT', 'NSW', 'VIC', 'QLD', 'SA', 'WA', 'TAS', 'NT'),
v.names=c('SAVEp', 'GDIp'),
idvar='Year')
# Extract and present data from 2010 to 2017 only
TGIp1017 <- filter(TGIp, Year > 2009)
# Sorted by state / Year
TGIp1017 <- TGIp1017[with(TGIp1017, order(State,Year)),]
# create plot object with relevant variables
p5 <- ggplot(TGIp1017, aes(x = Year)) +
# Set y-axis title to show %
scale_y_continuous(labels=scales::percent)
# Set chart type to line graph for Net Savings & GDI as % of TGI
p5 + geom_line(aes(y=GDIp, size=2.0, color="GDI")) +
geom_line(aes(y=SAVEp, size=2.0, color="NetSavings")) +
# set color
scale_color_manual(values=c(GDI="#009E73",NetSavings="#E69F00"),labels=c("Gross Disposable Income","Net Savings")) +
# turn off size legend
guides(size = FALSE) +
# set up facet_grid by State
facet_grid(.~State) +
# set y-axis to show from 0 to 100%
coord_cartesian(ylim=c(0, 1)) +
# Set x and y-axis title
labs(x="\nYear", y="as % of Total Gross Income\n") +
# Set main title
ggtitle("Gross Disposable Income & Net Savings as % of Total Gross Income\nby State (2010 - 2017)\n") +
# Set properties of main title, x-asox title and y-axis title
theme_bw() + theme(plot.title = element_text(hjust=0.5, size=30, face="bold"),
axis.text.x = element_text(angle = 90, hjust = 1, size=14),
axis.text.y = element_text(size=14),axis.title = element_text(size=20, face="bold")) +
# change legend text and key properties
theme(legend.text=element_text(size=20, face="bold"),legend.position = c(0.5, 0.5),legend.title=element_blank(),
legend.key = element_rect(fill = "transparent",colour = "transparent"), legend.key.height = unit(1,"cm"),
legend.key.width=unit(0.7, "cm"),legend.background = element_rect(fill=alpha('lightblue', 0.7))) +
# increase legend point size
guides(colour = guide_legend(override.aes = list(size=5))) +
# set properties for each facet
theme(strip.text = element_text(face="bold", size=20), strip.background = element_rect(fill="lightblue", color="black",size=2))
