Assign 3 Pre Pos

Information is from the site ststed below

http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/6523.02015-16?OpenDocument

In particular I want to look at three files from the above site

Household Income and Income Distribution, Victoria

Household Income and Income Distribution, Tasmania

Household Income and Income Distribution, Western Australia

Viewable at ( Published to ) <<== dont forget to update B4 submission

http://rpubs.com/fast_Eddie/428200

What is this ??

a look at three states household incomes for the years 1995 - 2016

please note : NOT all years were recorded for each state, however the years shown were recorded for each state

# Load packages 
library(readr) 
library(tidyr) # Restructuring original datasets (from wide to long format) 
library(dplyr) # Data manipulation (factor levels, labels etc.)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyverse) # the whole shebeng)

## -- Attaching packages ---------------------------------------- tidyverse 1.2.1 --

## v ggplot2 3.0.0     v purrr   0.2.5
## v tibble  1.4.2     v stringr 1.3.1
## v ggplot2 3.0.0     v forcats 0.3.0

## -- Conflicts ------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(ggplot2)
library(readxl)

Comparisum of Income between Victoria, Tasmania and Western Australia

Please NOTE , I had to rename the data sets to :

 TasHouse .xlsx  and  
 VicHouse .xlsx
 WestHouse .xlxs
 
 Because they had the SAME name, and importing them caused probs 
 as one would simply overwrite the other

Import Section

#library(readxl)

# TasHouse <- read_excel("C:/Users/dan/Desktop/Pre Pro 3/TasHouse.xlsx")
# #View(TasHouse)
# VicHouse <- read_excel("C:/Users/dan/Desktop/Pre Pro 3/VicHouse.xlsx")
# #View(VivHouse)
# WestHouse <- read_excel("C:/Users/dan/Desktop/Pre Pro 3/WestHouse.xlsx")
# #View(WestHouse)

#library(readxl)
TasHouse <- read_excel("TasHouse.xlsx")
#View(TasHouse)
#library(readxl)
VicHouse <- read_excel("VicHouse.xlsx")
#View(VicHouse)
#library(readxl)
WestHouse <- read_excel("WestHouse.xlsx")
#View(WestHouse)

extract certain row and / or columns

All three Sections below will prepare each of the imported data sets

in readiness for analysis

Section 1 ######## Preparing Data for Victoria

All section 1 for Victorian household income <<<==== added

Sort out what i wish to look at in the Victoria data frame

#need rows
vic94a <- VicHouse[,1:16]
vic94b <- VicHouse[ 7:13 , 1:16]  
vic94c <- VicHouse[ 16:24 , 1:16]

vic <- rbind(as.data.frame(vic94a, vic94b, vic94c))

vic1 <- vic[-c(25:117),]
#View(vic1)
vic1 <- vic1[-c(1:3),]
#View(vic1)
vic1 <- vic1[-c(2),]
#View(vic1)
vic1 <- vic1[-c(10),]
#View(vic1)

rownames(vic1) <- NULL  ##  Renumber rows
#View(vic1)
#head(vic1)

##-------------I'm using the row names in this section to 
##-------------Identify which rows are what later in the coding


vic1[1, 1] <- "Years"  ## Change the name of the rows
vic1[1, 2] <- "Dollars"

rownames(vic1) <- NULL##  Now remove row 2

 
vic2 <-  vic1[-c(2),]    ##############           <<<<<===========   Change to vic2 here
rownames(vic2) <- NULL##  Now remove row 2        ##     because this makes it easier to 
#View(vic2)                                       ##     know which works from what dosent 

vic2 <-  vic2[-c(9),]
rownames(vic2) <- NULL##  Now remove row 2
#View(vic2)

## NOTE rows 1 - 8 are Wages , and Rows 9 - 17 are Percentiles <<===== For the Victorian data  ##########


vic2 <- vic2[-c(2:6),]
rownames(vic2) <- NULL##  Now remove row 2
#View(vic2)

vic2 <- vic2[-c(3:12),]     ###                  
rownames(vic2) <- NULL##  Now remove row 

vic2[,2]<- NULL   ## Remove column 2           <<<<<==========    Reduce to 2 columns here
#str(vic2)

##  Rename columns


names(vic2) <- c("Years", "1995", "1996", "1997", "1998",
                          "2000", "2001", "2003", "2004",  
                          "2006", "2008", "2010", "2012",
                          "2014", "2016")

vic2 <- vic2[-1,]   #   c(3:12)
rownames(vic2) <- NULL##  Now remove row 2
#str(vic2)
#View(vic2)

##  Used gather here to Transform wide to long    <<<=============

vic2 <- gather(vic2)   ##  used gather here       <<==============
#str(vic2)
colnames(vic2)[1] <- 'Years' 
colnames(vic2)[2] <- 'VicHouseholds'
## Now remove row 1, then change all to numeric
vic2 <- vic2[-1,]
## Now renumber rows 
rownames(vic2) <- NULL##  Now remove row 2

vic2 <- as.data.frame(vic2)   #, dimnames = NULL


vic2$Years <- as.numeric(vic2$Years)
vic2$VicHouseholds <- as.numeric(vic2$VicHouseholds)

str(vic2)          ## Doing a structure check here check here   <<===============

## 'data.frame':    14 obs. of  2 variables:
##  $ Years        : num  1995 1996 1997 1998 2000 ...
##  $ VicHouseholds: num  1340 1297 1342 1400 1534 ...

## Checks and Winsoring  <<==========   Also imputational if needed

##----------------------------------------------------   KEEP  -------Check functions

##  This looks for NaN, Inf, -Inf , and then tells me if they are 
##  present in this particular column

is.special <- function(vic2){
if (is.numeric(vic2)) !is.finite(vic2) else is.na(vic2)
}

sum (sapply(vic2, is.special))  ## <<===     check here

## [1] 0

# Use Imputational method below ##  Not needed , so no variable changed   <<============

#surveys_combined$hindfoot_length <- impute(surveys_combined$hindfoot_length, fun = mean)

#View(vic2)

Section 2 ######## Preparing Data for Tasmania

Redo all below for Tasmanian household income <<<==== added

———————————

Sort out what i wish to look at in the Victoria data frame

#need rows
t94a <- TasHouse[,1:16]
t94b <- TasHouse[ 7:13 , 1:16]  
t94c <- TasHouse[ 16:24 , 1:16]

tas <- rbind(as.data.frame(t94a, t94b, t94c))

tas1 <- tas[-c(25:117),]
#View(vic1)
tas1 <- tas1[-c(1:3),]
#View(vic1)
tas1 <- tas1[-c(2),]
#View(vic1)
tas1 <- tas1[-c(10),]
#View(vic1)

rownames(vic1) <- NULL  ##  Renumber rows
#View(vic1)
#head(vic1)

##-------------I'm using the row names in this section to 
##-------------Identify which rows are what later in the coding


tas1[1, 1] <- "Years"  ## Change the name of the rows
tas1[1, 2] <- "Dollars"

rownames(tas1) <- NULL##  Now remove row 2

## NOTE rows 1 - 8 are Wages , and Rows 9 - 17 are Percentiles <<===== For the Victorian data  ##########

tas2 <-  tas1[-c(2),]    ##############           <<<<<===========   Change to vic2 here
rownames(tas2) <- NULL##  Now remove row 2        ##     because this makes it easier to 
#View(vic2)                                       ##     know which works from what dosent 

tas2 <-  tas2[-c(9),]
rownames(tas2) <- NULL##  Now remove row 2
View(tas2)

tas2 <- tas2[-c(2:6),]
rownames(tas2) <- NULL##  Now remove row 2
#View(vic2)

tas2 <- tas2[-c(3:12),]     ###                  
rownames(tas2) <- NULL##  Now remove row 

tas2[,2]<- NULL   ## Remove column 2           <<<<<==========    Reduce to 2 columns here
#str(vic2)


##  Rename columns


names(tas2) <- c("Years", "1995", "1996", "1997", "1998",
                          "2000", "2001", "2003", "2004",  
                          "2006", "2008", "2010", "2012",
                          "2014", "2016")

tas2 <- tas2[-1,]   #   c(3:12)
rownames(tas2) <- NULL##  Now remove row 2
#str(vic2)
#View(vic2)


##  Used gather here to Transform wide to long    <<<=============

tas2 <- gather(tas2)
#str(vic2)
colnames(tas2)[1] <- 'Years' 
colnames(tas2)[2] <- 'TasHouseholds'
## Now remove row 1, then change all to numeric
tas2 <- tas2[-1,]
## Now renumber rows 
rownames(tas) <- NULL##  Now remove row 2
## Change column 2 of this data frame to numeric for analysis and NA check

tas2 <- as.data.frame(tas2)   #, dimnames = NULL


tas2$Years <- as.numeric(tas2$Years)
tas2$TasHouseholds <- as.numeric(tas2$TasHouseholds)

str(tas2)          ## Doing a structure check here check here   <<===============

## 'data.frame':    14 obs. of  2 variables:
##  $ Years        : num  1995 1996 1997 1998 2000 ...
##  $ TasHouseholds: num  1146 1138 1122 1107 1134 ...

## Checks and Winsoring  <<==========   Also imputational if needed

##----------------------------------------------------   KEEP  -------Check functions

##  This looks for NaN, Inf, -Inf , and then tells me if they are 
##  present in this particular column

is.special <- function(tas2){
if (is.numeric(tas2)) !is.finite(tas2) else is.na(tas2)
}

sum (sapply(tas2, is.special))  ## <<===     check here

## [1] 0

# Use Imputational method below ##  Not needed , so no variable changed   <<============

#surveys_combined$hindfoot_length <- impute(surveys_combined$hindfoot_length, fun = mean)

#View(tas2)

Section 3 ######## Preparing Data for Western Australia

Redo all below for Western Aus household income <<<==== added

———————————

Sort out what i wish to look at in the Victoria data frame

#need rows
w94a <- WestHouse[,1:16]
w94b <- WestHouse[ 7:13 , 1:16]  
w94c <- WestHouse[ 16:24 , 1:16]

wes <- rbind(as.data.frame(w94a, w94b, w94c))

wes1 <- wes[-c(25:117),]
#View(vic1)
wes2 <- wes1[-c(1:3),]
#View(vic1)
wes2 <- wes2[-c(2),]
#View(vic1)
wes2 <- wes2[-c(10),]
#View(vic1)

rownames(wes2) <- NULL  ##  Renumber rows
#View(vic1)
#head(vic1)

##-------------I'm using the row names in this section to 
##-------------Identify which rows are what later in the coding


wes2[1, 1] <- "Years"  ## Change the name of the rows
wes2[1, 2] <- "Dollars"

rownames(wes2) <- NULL##  Now remove row 2

 
wes2 <-  wes2[-c(2),]    ##############           <<<<<===========   Change to vic2 here
rownames(wes2) <- NULL##  Now remove row 2        ##     because this makes it easier to 
#View(vic2)                                       ##     know which works from what dosent

## NOTE rows 1 - 8 are Wages , and Rows 9 - 17 are Percentiles <<===== For the Victorian data  ##########

wes2 <-  wes2[-c(9),]
rownames(wes2) <- NULL##  Now remove row 2
View(wes2)

wes2 <- wes2[-c(2:6),]
rownames(tas2) <- NULL##  Now remove row 2
#View(vic2)

wes2 <- wes2[-c(3:12),]     ###                  
rownames(wes2) <- NULL##  Now remove row 

wes2[,2]<- NULL   ## Remove column 2           <<<<<==========    Reduce to 2 columns here
#str(vic2)

##  Rename columns


names(wes2) <- c("Years", "1995", "1996", "1997", "1998",
                          "2000", "2001", "2003", "2004",  
                          "2006", "2008", "2010", "2012",
                          "2014", "2016")

wes2 <- wes2[-1,]   #   c(3:12)
rownames(wes2) <- NULL##  Now remove row 2
#str(vic2)
#View(vic2)

##  Used gather here to Transform wide to long    <<<=============

wes2<- gather(wes2)
#str(vic2)
colnames(wes2)[1] <- 'Years' 
colnames(wes2)[2] <- 'WesHouseholds'
## Now remove row 1, then change all to numeric
wes2 <- wes2[-1,]
## Now renumber rows 
rownames(wes2) <- NULL##  Now remove row 2

## Change column 2 of this data frame to numeric for analysis and NA check

wes2 <- as.data.frame(wes2)   #, dimnames = NULL


wes2$Years <- as.numeric(wes2$Years)
wes2$WesHouseholds <- as.numeric(wes2$WesHouseholds)

str(wes2)          ## Doing a structure check here check here   <<===============

## 'data.frame':    14 obs. of  2 variables:
##  $ Years        : num  1995 1996 1997 1998 2000 ...
##  $ WesHouseholds: num  1339 1341 1396 1394 1449 ...

## Checks and Winsoring  <<==========   Also imputational if needed

##----------------------------------------------------   KEEP  -------Check functions

##  This looks for NaN, Inf, -Inf , and then tells me if they are 
##  present in this particular column

is.special <- function(wes2){
if (is.numeric(wes2)) !is.finite(wes2) else is.na(wes2)
}

sum (sapply(vic2, is.special))  ## <<===     check here

## [1] 0

#View(wes2)

# Use Imputational method below ##  Not needed , so no variable changed   <<============

#surveys_combined$hindfoot_length <- impute(surveys_combined$hindfoot_length, fun = mean)

Merge all three cleaned data sets into one data set using left_join and inner_join

Combined <- left_join(vic2, tas2 )      ## This is the first join

## Joining, by = "Years"

Combined <- inner_join(Combined, wes2)  ## This is the second join

## Joining, by = "Years"

                                        ## All joined by years            <<=========
#View(Combined)
#head(Combined)
#str(Combined)
#class(Combined)

Use Mutate to add % dif columns in Household incomes between the three states

Combined <- mutate(Combined, VicTasDif =  TasHouseholds / VicHouseholds )  
Combined$VicTasDif =  round(Combined$VicTasDif, 3)

Combined <- mutate(Combined, VicWesDif =  WesHouseholds / VicHouseholds )
Combined$VicWesDif =  round(Combined$VicWesDif, 3)

Combined <- mutate(Combined, TasWesDif = TasHouseholds / WesHouseholds)
Combined$TasWesDif =  round(Combined$TasWesDif, 3)

#str(Combined)
#View(Combined)
#head(Combined)

ggplots

ggplot(Combined, aes(Combined$Years)) + 
  geom_line(aes(y = Combined$VicHouseholds, colour = "red")) + 
  geom_line(aes(y = Combined$TasHouseholds, colour = "blue")) +
  geom_line(aes(y = Combined$WesHouseholds, colour = "green")) +
  xlab("Years") + ylab("Household Incomes per State ($)") +
  scale_colour_discrete(name="States", labels=c("Tas","WA","Vic")) +
  theme(panel.border = element_blank())

##     Plot of WA & Tas showing % difference in household to Victoria

ggplot(Combined, aes(Combined$Years)) + 
  geom_line(aes(y = Combined$VicTasDif, colour = "red")) + 
  geom_line(aes(y = Combined$VicWesDif, colour = "blue")) +
  #geom_line(aes(y = Combined$TasWesDif, colour = "green")) +
  xlab("Years") + ylab("As  %  to Victoria Household Income") +
  scale_colour_discrete(name="States", labels=c("WA","Tas","Vic")) +
  theme(panel.border = element_blank())

write.csv(Combined, file = "Combined.csv")  ## Create new file and save back to your folder

Assign 3 Pre Pos

s3686502 Dan Enoka and s3350855 Grace Darlene

8 October 2018

Information is from the site ststed below

Viewable at ( Published to ) <<== dont forget to update B4 submission

What is this ??

Comparisum of Income between Victoria, Tasmania and Western Australia

Please NOTE , I had to rename the data sets to :

Import Section

extract certain row and / or columns

All three Sections below will prepare each of the imported data sets

in readiness for analysis

All section 1 for Victorian household income <<<==== added

Sort out what i wish to look at in the Victoria data frame

Redo all below for Tasmanian household income <<<==== added

———————————

Sort out what i wish to look at in the Victoria data frame

Redo all below for Western Aus household income <<<==== added

———————————

Sort out what i wish to look at in the Victoria data frame

Merge all three cleaned data sets into one data set using left_join and inner_join

Use Mutate to add % dif columns in Household incomes between the three states

ggplots

Citations ??