Web Scrapping from the below stated Page

webpage <- read_html(“http://www.abs.gov.au/ausstats/abs@.nsf/mf/3310.0”)

There was no downloadable from this page , hence all stats are taken directly from this page

To be able to complete my Assignment, I needed to access two tables from this web page, assemble the required data into a data frame, wrangle it into a form I could use and then begin my assessments on which parts to use.

Of Note : A lot of blanked out function calls were used in the assessment of this data

but are intentionally blanked so as not to clutter this Assignment, and have

been left in as references for any further possible assessment

webpage <- read_html("http://www.abs.gov.au/ausstats/abs@.nsf/mf/3310.0")

This code below will generate two data frames that I’ll use to extract information from

## Create two Data frames for me to extract Data from
Marriage <- html_table(html_nodes(tbls, "table")[[1]],fill = TRUE)

#head(Marriage)
#str(Marriage)
#View(Marriage)

Divorce <- html_table(html_nodes(tbls, "table")[[2]],fill = TRUE)
#select particular rows

Col_1  <- Marriage[ ,3:11]   ##  this works this is column selection
#View(Col_1)
Col_1a  <- Marriage[ 4 , c(-1,-2,-3,-4)]
#View(Col_1a)
Col_2a  <- Divorce [4 ,]      ##  this is row selection
#View(Col_2)
Col_2a  <- Divorce [4 ,c( -1,-2,-3)]   ##  this is row selection , No. of Divorces
#View(Col_2a)
Col_2b  <- Divorce [10 ,c( -1,-2,-3)]  ## Involving Children
Col_2c  <- Divorce [19 ,c( -1,-2,-3)]  ## Male initiated
Col_2d  <- Divorce [20 ,c( -1,-2,-3)]  ## Female initiated
colnames(Col_1a)[1]<-"1996"
colnames(Col_1a)[2]<-"2006"
colnames(Col_1a)[3]<-"2012"
colnames(Col_1a)[4]<-"2013"
colnames(Col_1a)[5]<-"2014"
colnames(Col_1a)[6]<-"2015"
colnames(Col_1a)[7]<-"2016"

colnames(Col_2a)[1]<-"1996"
colnames(Col_2a)[2]<-"2006"
colnames(Col_2a)[3]<-"2012"
colnames(Col_2a)[4]<-"2013"
colnames(Col_2a)[5]<-"2014"
colnames(Col_2a)[6]<-"2015"
colnames(Col_2a)[7]<-"2016"
colnames(Col_2b)[1]<-"1996"
colnames(Col_2b)[2]<-"2006"
colnames(Col_2b)[3]<-"2012"
colnames(Col_2b)[4]<-"2013"
colnames(Col_2b)[5]<-"2014"
colnames(Col_2b)[6]<-"2015"
colnames(Col_2b)[7]<-"2016"
colnames(Col_2c)[1]<-"1996"
colnames(Col_2c)[2]<-"2006"
colnames(Col_2c)[3]<-"2012"
colnames(Col_2c)[4]<-"2013"
colnames(Col_2c)[5]<-"2014"
colnames(Col_2c)[6]<-"2015"
colnames(Col_2c)[7]<-"2016"
colnames(Col_2d)[1]<-"1996"
colnames(Col_2d)[2]<-"2006"
colnames(Col_2d)[3]<-"2012"
colnames(Col_2d)[4]<-"2013"
colnames(Col_2d)[5]<-"2014"
colnames(Col_2d)[6]<-"2015"
colnames(Col_2d)[7]<-"2016"
##  bind it all together then transpose it 
Combined <-  rbind.data.frame(Col_1a,Col_2a,Col_2b,Col_2c,Col_2d)         #Years 
#View(Combined)

Years <- c(1996, 2006, 2012, 2013, 2014, 2015, 2016) # need this column a little later

Combined2 <- t(Combined)  # Transpose the matrix to use for visuals
#str(Combined2)
#View(Combined2)
## Rename Columns
colnames(Combined2)[0]<-"Year"
colnames(Combined2)[1]<-"Num_Marriages"
colnames(Combined2)[2]<-"Num_Divorces"
colnames(Combined2)[3]<-"Div_Involving_Children"
colnames(Combined2)[4]<-"Male_initiated_Divorce"
colnames(Combined2)[5]<-"Female_initiated_Divorce"
#View(Combined2)
#str(Combined2)
## tidy things up a lil bit more
Combined2 <- as.data.frame(cbind(Years, Combined2))
row.names(Combined2) <- NULL   ##  Remove row names, and renumber as normal
#View(Combined2)

Hit a wall here, couldn’t , wouldn’t be changed to numeric and keep the orig. values ??

Answer was in doin it the ’ol school method, original had a comma separator, hence

confusion everywhere – seriously ??

Combined3 <- as.matrix(Combined2)
Combined3 <- as.data.frame(Combined3)

# n <-list(Combined3$Years)
# n
Combined3$Years <- as.numeric(c(1996, 2006, 2012, 2013, 2014, 2015, 2016))

# n <-list(Combined3$`Num_Marriages`)
# n
Combined3$`Num_Marriages` <- as.numeric(c(106103, 114222, 123243, 118959, 121197, 113595, 118401))

# n <-list(Combined3$`Num_Divorces`)
# n
Combined3$`Num_Divorces` <- as.numeric(c(52466, 51375, 49917, 47638, 46498, 48517, 46604))

# n <-list(Combined3$`Div_Involving_Children`)
# n
Combined3$`Div_Involving_Children` <- as.numeric(c(28138, 25733, 24144, 22590, 21840, 23063, 21864))

# n <-list(Combined3$`Male_initiated_Divorce`)
# n
Combined3$`Male_initiated_Divorce` <- as.numeric(c(17005, 15171, 12958, 12329, 12090, 12178, 11763))

# n <-list(Combined3$`Female_initiated_Divorce`)
# n
Combined3$`Female_initiated_Divorce` <- as.numeric(c(24155, 20574, 17140, 15658, 15127, 15337, 14962))

# head(Combined3)
# str(Combined3)
# View(Combined3)                ##   I use these for checking everything
# attributes(Combined3)
# typeof(Combined3)
# mode(Combined3)

Save file incase I need it later as this site had no downloadable .csv or .xlxs or .pdf files

# write.csv(Combined3, file = "C:/Users/dan/Desktop/a Visualization/Combined3.csv", row.names = FALSE)

Now for the fun stuff

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2))   

a1 = 35000         #46498
a2 = 123243

pc1 <- ggplot(Combined3, aes(x = Years, y = Num_Divorces))+ 
  xlab("Years")+                     
  ylab("Total  Number  of  Marriages")+
  ylim(a1,a2)+
  ggtitle("Australian Divorce Rate for Years 1996 - 2016",
          subtitle = "                    www.abs.gov.au/ausstats/abs@.nsf/mf/3310.0")+
  
  annotate("text",  x=Inf, y = Inf, label = "Marriage = Blue Line\n Divorce = Red Line", vjust=4, hjust=1)
  
  #caption="Image:www.colourbox.com/preview/11169074-hand-drawing-cartoon-happy-family.jpg ")

pc2 <- pc1 +
  geom_smooth(mapping = aes(linetype = "r2"),
              method = "lm",
              formula = y ~ x + log(x), se = FALSE,
              color = "red",
              show.legend = FALSE)

pc3 <- ggplot(Combined3, aes(x = Years, y = Num_Marriages))+ 
   xlab("")+ 
   ylab("")+
   ylim(a1,a2)+
   theme(axis.ticks.y=element_blank())

pc4 <- pc3 +
  geom_smooth(mapping = aes(linetype = "r2"),
              method = "lm",
              formula = y ~ x + log(x), se = FALSE,
              color = "blue",
              show.legend = FALSE)
# 
pc4 <- pc4 + theme(axis.text.y=element_blank(),
                   axis.title.y=element_blank(),
                   axis.ticks.y=element_blank(),
                   axis.text.x=element_blank(),
                   axis.title.x=element_blank(),
                   axis.ticks.x=element_blank(),
                   plot.margin=margin(t = 1, r = 1, b = 45, l = 74))


ggdraw() +
  draw_image("https://www.colourbox.com/preview/11169074-hand-drawing-cartoon-happy-family.jpg") +
  draw_plot(pc2) +
  geom_point() +
  draw_plot(pc4) +
  geom_point()

par(mfrow = c(1,1))   ##  Reset columns to original setting
par(mar=c(5,4,4,2))   ##  Reset margins to Original setting

##----------------------------  labs

Conclusion

Findings to date

To be honest , I wish my data set was larger length wise, I almost want to go grab another data set and see if see if I can make a bigger , better, cool looking visual of something.

References

All Statistical Data taken from the webpage as stated below

http://www.abs.gov.au/ausstats/abs@.nsf/mf/3310.0

To complete the above visual, information was retrieved from two tables on this webpage.

Table 1 Marriages and Table 2 Divorces.

The Image as used in the visual was accessed from the site

https://www.colourbox.com/preview/11169074-hand-drawing-cartoon-happy-family.jpg

As noted , this Image has only been used as a form of demonstration in a University Assignment and will NOT be used for any commercial activity whatsoever.