R Markdown

## Loading required package: xml2

## Loading required package: bitops

## Linking to ImageMagick 6.9.9.14
## Enabled features: cairo, freetype, fftw, ghostscript, lcms, pango, rsvg, webp
## Disabled features: fontconfig, x11

## Loading required package: ggplot2

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggplot2':
## 
##     ggsave

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

webpage <- read_html("http://www.abs.gov.au/AUSSTATS/abs@.nsf/Lookup/6401.0Feature+Article2Mar+2017")

CPIwebpage <- read_html("https://www.rba.gov.au/calculator/annualDecimal.html")  ## CPI webpage

Total increase for these years is 12 % CPI

URL to view Presentation <<=========================

         https://youtu.be/D6uG54Jv-HE

I had a few problems getting this to a see-able thing, unfortunely the graphics aren’t fantastic

but it works , so I’m going with this.

Also of note, this is the shortened version as the original was considered to long by one of

the sites so I’ve chopped a few slides out, bringing it down to this basic version

## Show a better visual of the above Tables
tbls <- html_nodes(webpage, "table")
##head(tbls)   ##  Show No of tables and structure

tbls_1 is CPI EXPENDITURE CLASSES RANKED BY AVERAGE WEEKLY HOUSEHOLD EXPENDITURE, 2011

tbls_2 is CPI EXPENDITURE CLASSES RANKED BY AVERAGE WEEKLY HOUSEHOLD EXPENDITURE, 2017

## Create two Data frames for me to extract Data from
tbls_1 <- html_table(html_nodes(tbls, "table")[[1]],fill = TRUE)
#head(tbls_1, n=8)
#str(tbls_1)    ## chr, chr
#View()

tbls_2 <- html_table(html_nodes(tbls, "table")[[3]],fill = TRUE)
#head(tbls_2, n=8)
#str(tbls_2)   ## chr, chr

#View(tbls_1)
# View(tbls_2)

Put each table into a matching order

#colnames(tbls_1)    
tbls_1 <- tbls_1 %>%  arrange(X1,X2)
#head(tbls_1,n=6)
#str(tbls_1)    ## chr, chr
tbls_2 <- tbls_2 %>%  arrange(X1,X2)
#head(tbls_2,n=6)
#str(tbls_2)    ## chr, chr

# View(tbls_1)
# View(tbls_2)

tbls_1 <- tbls_1[-25,]   ##Note as rows deduct , then u deduct same num from expected, -1, -2 etc
tbls_1 <- tbls_1[-75,]
tbls_1 <- tbls_1[-84,]     ## Delete row 76  table 1 as this distorted 
#View(tbls_1)                        ## everything, appeared to be a typo

tbls_2 <- tbls_2[-25,]
tbls_2 <- tbls_2[-84,]
#View(tbls_2)

tbls_1 <- tbls_1[-1,]   ##Note as rows deduct , then u deduct same num from 2nd expected, -1, -2 etc
tbls_1 <- tbls_1[-1,]
#View(tbls_1)

tbls_2 <- tbls_2[-1,]   ##Note as rows deduct , then u deduct same num from 2nd expected, -1, -2 etc
tbls_2 <- tbls_2[-1,]
#View(tbls_2)

# nrow(tbls_1)
# nrow(tbls_2)   ##  Check they now match
# str(tbls_1)   ## chr, chr
# str(tbls_2)   ## chr, chr

## There was a problem here , so I changed to Factor to match for cbinding
#tbls_1$X2 <- as.factor(tbls_1$X2)
#str(tbls_1)   ## chr, fac
tbls_2$X2 <- as.factor(tbls_2$X2)
#str(tbls_2)   ## chr, fac
# View(tbls_1)
# View(tbls_2)
# View(tbls_1)
# View(tbls_2)

tbls_1 <- tbls_1 %>%  arrange(X1)
#head(tbls_1,n=6)
#str(tbls_1)    ## chr, chr
tbls_2 <- tbls_2 %>%  arrange(X1)

# head(tbls_1, n=30)
# head(tbls_2, n=30)
# 
# View(tbls_1)
# View(tbls_2)
# 
# nrow(tbls_1)
# nrow(tbls_2)     ##  All good and it matches

Bind it all up and assign some new names

tbls_1$X2 <- as.numeric(as.character(tbls_1$X2)) ## Change factor to numeric , 2011
tbls_2$X2 <- as.numeric(as.character(tbls_2$X2)) ## Change factor to numeric , 2017
# names(tbls_1)[2] <- 'Price 2011'
# names(tbls_2)[2] <- 'Price 2017'
# head(tbls_1, n=5)
#head(tbls_2, n=5)

Expected2017 <-  as.numeric (tbls_1$X2 * 1.12)  ## total is 12% increase for 2011-2017
Expected2017 <-  round (Expected2017,2)
#matrix(Expected2017) 
#str(tbls_4)
#nrow(Expected2017)
#ncol(Expected2017)
#head(Expected2017)
#View(Expected2017)

Percent <-   round((((  tbls_2$X2 / tbls_1$X2 ) - 1) * 100),2)
#head(percent)  ## Percentage increase or decrease per item
#nrow(percent)

tbls_3 <- cbind.data.frame(tbls_1$X1, tbls_1$X2,  tbls_2$X2,
                           Expected2017,    Percent)

names(tbls_3)[1] <- 'Product'
names(tbls_3)[2] <- 'Price2011'
names(tbls_3)[3] <- 'Price2017'

#head(tbls_3, n=4)

tbls_3$Product <- as.character(tbls_3$Product) # change Factor to char

tbls_4 <- tbls_3 %>% arrange(-Percent) ## sort in descending order for yrs 2011-2017
# head(tbls_4, n=5)
#str(tbls_3)

Choose top 5 and lowest 5 for display

#View(tbls_4)
Sample_1 <- tbls_4 %>% top_n(5)  ## Taking a subgroup of top 5 items only

## Selecting by Percent

#Sample_1

Sample_2 <- tbls_4 %>% top_n(-5)  ## Taking a subgroup of bottom 5 items only

## Selecting by Percent

#Sample_2

# Sample_2 <- Sample_1 %>% arrange(Percent)   ##    Not used but kept as reminder 
# Sample_2

# max_2 <- max(round(Sample_2$Percent, 0) +5)
# max_2
# min_2 <- min(round(Sample_2$Percent, 0) -5)
# min_2

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2)) 


p1 <- ggplot(Sample_2, aes(x = Product, y = Percent)) 
p1 + geom_bar(stat = "identity",fill = "#5E4FA2")+
  labs(title = "5 Highest Decrease's for Yrs 2011-2017",
       y = "Price % Decrease",
       x = " ")+
  geom_text(aes(label=round(Percent,0)), vjust = -0.05,size = 4, hjust = 1.2)+
  #coord_flip()
  ylim(-50, 0)+      #min_2, max_2
  theme(axis.text.x.bottom = element_text(angle = 0, hjust = 0.65, size = 11,vjust = -0.25))+
  coord_flip()

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2))

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2)) 


p2 <- ggplot(Sample_1, aes(x = Product, y = Percent)) 
p2 + geom_bar(stat = "identity",fill = "#5E4FA2")+
  labs(title = "5 Highest Increase's for Years 2011 - 2017",
       y = "Price % Increase",
       x = " ")+
  geom_text(aes(label=round(Percent,0)), vjust = 0,size = 4.3, hjust = - 1)+
  #coord_flip()+
  #coord_flip()
  ylim(0, 90)+
  coord_flip()

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2))

do a combined

Sample_3 <- rbind(Sample_1, Sample_2)
# nrow(Sample_3)
# head(Sample_3, n=10)

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2)) 

p1 <- ggplot(Sample_3, aes(x = Product, y = Percent)) 
p1 +geom_bar(stat = "identity",fill = "#A6CEE3")+      #CAB2D6     #5E4FA2
  labs(title = "Highest and Lowest Increase's ",
       y = "Price % Change for Years 2011-2017",
       x = " ")+
  geom_text(aes(label=round(Percent,0)), vjust = 0.2,size = 4.3, hjust = 1.3)+
  ylim(-60, 90)+
  coord_flip()

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2))

Conclusion

A lot of the working has been kept in to show the steps taken in arriving at this final graph the results are to me extraordinary especially for Tobacco with an 84% increase in price from the 2011 officially listed price. I wasn’t expecting anything like these values, I thought some would go up a little and similarly a few might reduce, not like these shown values.

Assign 2

s3686502 Dan Enoka

18 September 2018