## Loading required package: xml2
## Loading required package: bitops
## Linking to ImageMagick 6.9.9.14
## Enabled features: cairo, freetype, fftw, ghostscript, lcms, pango, rsvg, webp
## Disabled features: fontconfig, x11
## Loading required package: ggplot2
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
webpage <- read_html("http://www.abs.gov.au/AUSSTATS/abs@.nsf/Lookup/6401.0Feature+Article2Mar+2017")
CPIwebpage <- read_html("https://www.rba.gov.au/calculator/annualDecimal.html") ## CPI webpage
https://youtu.be/D6uG54Jv-HE
## Show a better visual of the above Tables
tbls <- html_nodes(webpage, "table")
##head(tbls) ## Show No of tables and structure
## Create two Data frames for me to extract Data from
tbls_1 <- html_table(html_nodes(tbls, "table")[[1]],fill = TRUE)
#head(tbls_1, n=8)
#str(tbls_1) ## chr, chr
#View()
tbls_2 <- html_table(html_nodes(tbls, "table")[[3]],fill = TRUE)
#head(tbls_2, n=8)
#str(tbls_2) ## chr, chr
#View(tbls_1)
# View(tbls_2)
#colnames(tbls_1)
tbls_1 <- tbls_1 %>% arrange(X1,X2)
#head(tbls_1,n=6)
#str(tbls_1) ## chr, chr
tbls_2 <- tbls_2 %>% arrange(X1,X2)
#head(tbls_2,n=6)
#str(tbls_2) ## chr, chr
# View(tbls_1)
# View(tbls_2)
tbls_1 <- tbls_1[-25,] ##Note as rows deduct , then u deduct same num from expected, -1, -2 etc
tbls_1 <- tbls_1[-75,]
tbls_1 <- tbls_1[-84,] ## Delete row 76 table 1 as this distorted
#View(tbls_1) ## everything, appeared to be a typo
tbls_2 <- tbls_2[-25,]
tbls_2 <- tbls_2[-84,]
#View(tbls_2)
tbls_1 <- tbls_1[-1,] ##Note as rows deduct , then u deduct same num from 2nd expected, -1, -2 etc
tbls_1 <- tbls_1[-1,]
#View(tbls_1)
tbls_2 <- tbls_2[-1,] ##Note as rows deduct , then u deduct same num from 2nd expected, -1, -2 etc
tbls_2 <- tbls_2[-1,]
#View(tbls_2)
# nrow(tbls_1)
# nrow(tbls_2) ## Check they now match
# str(tbls_1) ## chr, chr
# str(tbls_2) ## chr, chr
## There was a problem here , so I changed to Factor to match for cbinding
#tbls_1$X2 <- as.factor(tbls_1$X2)
#str(tbls_1) ## chr, fac
tbls_2$X2 <- as.factor(tbls_2$X2)
#str(tbls_2) ## chr, fac
# View(tbls_1)
# View(tbls_2)
# View(tbls_1)
# View(tbls_2)
tbls_1 <- tbls_1 %>% arrange(X1)
#head(tbls_1,n=6)
#str(tbls_1) ## chr, chr
tbls_2 <- tbls_2 %>% arrange(X1)
# head(tbls_1, n=30)
# head(tbls_2, n=30)
#
# View(tbls_1)
# View(tbls_2)
#
# nrow(tbls_1)
# nrow(tbls_2) ## All good and it matches
tbls_1$X2 <- as.numeric(as.character(tbls_1$X2)) ## Change factor to numeric , 2011
tbls_2$X2 <- as.numeric(as.character(tbls_2$X2)) ## Change factor to numeric , 2017
# names(tbls_1)[2] <- 'Price 2011'
# names(tbls_2)[2] <- 'Price 2017'
# head(tbls_1, n=5)
#head(tbls_2, n=5)
Expected2017 <- as.numeric (tbls_1$X2 * 1.12) ## total is 12% increase for 2011-2017
Expected2017 <- round (Expected2017,2)
#matrix(Expected2017)
#str(tbls_4)
#nrow(Expected2017)
#ncol(Expected2017)
#head(Expected2017)
#View(Expected2017)
Percent <- round(((( tbls_2$X2 / tbls_1$X2 ) - 1) * 100),2)
#head(percent) ## Percentage increase or decrease per item
#nrow(percent)
tbls_3 <- cbind.data.frame(tbls_1$X1, tbls_1$X2, tbls_2$X2,
Expected2017, Percent)
names(tbls_3)[1] <- 'Product'
names(tbls_3)[2] <- 'Price2011'
names(tbls_3)[3] <- 'Price2017'
#head(tbls_3, n=4)
tbls_3$Product <- as.character(tbls_3$Product) # change Factor to char
tbls_4 <- tbls_3 %>% arrange(-Percent) ## sort in descending order for yrs 2011-2017
# head(tbls_4, n=5)
#str(tbls_3)
#View(tbls_4)
Sample_1 <- tbls_4 %>% top_n(5) ## Taking a subgroup of top 5 items only
## Selecting by Percent
#Sample_1
Sample_2 <- tbls_4 %>% top_n(-5) ## Taking a subgroup of bottom 5 items only
## Selecting by Percent
#Sample_2
# Sample_2 <- Sample_1 %>% arrange(Percent) ## Not used but kept as reminder
# Sample_2
# max_2 <- max(round(Sample_2$Percent, 0) +5)
# max_2
# min_2 <- min(round(Sample_2$Percent, 0) -5)
# min_2
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
p1 <- ggplot(Sample_2, aes(x = Product, y = Percent))
p1 + geom_bar(stat = "identity",fill = "#5E4FA2")+
labs(title = "5 Highest Decrease's for Yrs 2011-2017",
y = "Price % Decrease",
x = " ")+
geom_text(aes(label=round(Percent,0)), vjust = -0.05,size = 4, hjust = 1.2)+
#coord_flip()
ylim(-50, 0)+ #min_2, max_2
theme(axis.text.x.bottom = element_text(angle = 0, hjust = 0.65, size = 11,vjust = -0.25))+
coord_flip()
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
p2 <- ggplot(Sample_1, aes(x = Product, y = Percent))
p2 + geom_bar(stat = "identity",fill = "#5E4FA2")+
labs(title = "5 Highest Increase's for Years 2011 - 2017",
y = "Price % Increase",
x = " ")+
geom_text(aes(label=round(Percent,0)), vjust = 0,size = 4.3, hjust = - 1)+
#coord_flip()+
#coord_flip()
ylim(0, 90)+
coord_flip()
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
Sample_3 <- rbind(Sample_1, Sample_2)
# nrow(Sample_3)
# head(Sample_3, n=10)
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
p1 <- ggplot(Sample_3, aes(x = Product, y = Percent))
p1 +geom_bar(stat = "identity",fill = "#A6CEE3")+ #CAB2D6 #5E4FA2
labs(title = "Highest and Lowest Increase's ",
y = "Price % Change for Years 2011-2017",
x = " ")+
geom_text(aes(label=round(Percent,0)), vjust = 0.2,size = 4.3, hjust = 1.3)+
ylim(-60, 90)+
coord_flip()
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
A lot of the working has been kept in to show the steps taken in arriving at this final graph the results are to me extraordinary especially for Tobacco with an 84% increase in price from the 2011 officially listed price. I wasn’t expecting anything like these values, I thought some would go up a little and similarly a few might reduce, not like these shown values.