webpage <- read_html(“http://www.abs.gov.au/ausstats/abs@.nsf/mf/3310.0”)
This publication provides data and information about marriages registered and divorces granted in Australia in 2016 on a state or territory of registration basis rather than a state or territory of usual residence. The publication presents statistics on the number of marriages registered, crude marriage rates, median age at marriage, age-specific marriage rates, previous marital status, use of marriage celebrants, country of birth of those marrying, and living arrangements for couples prior to marriage. Divorce statistics in this publication provide state, territory and national level data for the number of divorces granted, crude divorce rates, ages at marriage, separation and divorce, age-specific divorce rates, divorces involving children, duration of marriage prior to divorce, and applicants for divorce.
webpage <- read_html("http://www.abs.gov.au/ausstats/abs@.nsf/mf/3310.0")
## Show a better visual of the above Tables
tbls <- html_nodes(webpage, "table")
#head(tbls) ## Show No of tables and structure
#str(tbls)
#View(tbls)
Note that the echo = FALSE parameter prevents printing of the R code that generated the plot
## Create two Data frames for me to extract Data from
Marriage <- html_table(html_nodes(tbls, "table")[[1]],fill = TRUE)
#head(Marriage)
#str(Marriage)
#View(Marriage)
Divorce <- html_table(html_nodes(tbls, "table")[[2]],fill = TRUE)
#select particular rows
Col_1 <- Marriage[ ,3:11] ## this works this is column selection
#View(Col_1)
Col_1a <- Marriage[ 4 , c(-1,-2,-3,-4)]
#View(Col_1a)
Col_2a <- Divorce [4 ,] ## this is row selection
#View(Col_2)
Col_2a <- Divorce [4 ,c( -1,-2,-3)] ## this is row selection , No. of Divorces
#View(Col_2a)
Col_2b <- Divorce [10 ,c( -1,-2,-3)] ## Involving Children
Col_2c <- Divorce [19 ,c( -1,-2,-3)] ## Male initiated
Col_2d <- Divorce [20 ,c( -1,-2,-3)] ## Female initiated
colnames(Col_1a)[1]<-"1996"
colnames(Col_1a)[2]<-"2006"
colnames(Col_1a)[3]<-"2012"
colnames(Col_1a)[4]<-"2013"
colnames(Col_1a)[5]<-"2014"
colnames(Col_1a)[6]<-"2015"
colnames(Col_1a)[7]<-"2016"
colnames(Col_2a)[1]<-"1996"
colnames(Col_2a)[2]<-"2006"
colnames(Col_2a)[3]<-"2012"
colnames(Col_2a)[4]<-"2013"
colnames(Col_2a)[5]<-"2014"
colnames(Col_2a)[6]<-"2015"
colnames(Col_2a)[7]<-"2016"
colnames(Col_2b)[1]<-"1996"
colnames(Col_2b)[2]<-"2006"
colnames(Col_2b)[3]<-"2012"
colnames(Col_2b)[4]<-"2013"
colnames(Col_2b)[5]<-"2014"
colnames(Col_2b)[6]<-"2015"
colnames(Col_2b)[7]<-"2016"
colnames(Col_2c)[1]<-"1996"
colnames(Col_2c)[2]<-"2006"
colnames(Col_2c)[3]<-"2012"
colnames(Col_2c)[4]<-"2013"
colnames(Col_2c)[5]<-"2014"
colnames(Col_2c)[6]<-"2015"
colnames(Col_2c)[7]<-"2016"
colnames(Col_2d)[1]<-"1996"
colnames(Col_2d)[2]<-"2006"
colnames(Col_2d)[3]<-"2012"
colnames(Col_2d)[4]<-"2013"
colnames(Col_2d)[5]<-"2014"
colnames(Col_2d)[6]<-"2015"
colnames(Col_2d)[7]<-"2016"
Combined <- rbind.data.frame(Col_1a,Col_2a,Col_2b,Col_2c,Col_2d) #Years
#View(Combined)
Years <- c(1996, 2006, 2012, 2013, 2014, 2015, 2016) # need this column a little later
Combined2 <- t(Combined) # Transpose the matrix to use for visuals
#str(Combined2)
#View(Combined2)
colnames(Combined2)[0]<-"Year"
colnames(Combined2)[1]<-"Num_Marriages"
colnames(Combined2)[2]<-"Num_Divorces"
colnames(Combined2)[3]<-"Div_Involving_Children"
colnames(Combined2)[4]<-"Male_initiated_Divorce"
colnames(Combined2)[5]<-"Female_initiated_Divorce"
#View(Combined2)
#str(Combined2)
Combined2 <- as.data.frame(cbind(Years, Combined2))
row.names(Combined2) <- NULL ## Remove row names, and renumber as normal
#View(Combined2)
Combined3 <- as.matrix(Combined2)
Combined3 <- as.data.frame(Combined3)
# n <-list(Combined3$Years)
# n
Combined3$Years <- as.numeric(c(1996, 2006, 2012, 2013, 2014, 2015, 2016))
# n <-list(Combined3$`Num_Marriages`)
# n
Combined3$`Num_Marriages` <- as.numeric(c(106103, 114222, 123243, 118959, 121197, 113595, 118401))
# n <-list(Combined3$`Num_Divorces`)
# n
Combined3$`Num_Divorces` <- as.numeric(c(52466, 51375, 49917, 47638, 46498, 48517, 46604))
# n <-list(Combined3$`Div_Involving_Children`)
# n
Combined3$`Div_Involving_Children` <- as.numeric(c(28138, 25733, 24144, 22590, 21840, 23063, 21864))
# n <-list(Combined3$`Male_initiated_Divorce`)
# n
Combined3$`Male_initiated_Divorce` <- as.numeric(c(17005, 15171, 12958, 12329, 12090, 12178, 11763))
# n <-list(Combined3$`Female_initiated_Divorce`)
# n
Combined3$`Female_initiated_Divorce` <- as.numeric(c(24155, 20574, 17140, 15658, 15127, 15337, 14962))
# head(Combined3)
# str(Combined3)
# View(Combined3) ## I use these for checking everything
# attributes(Combined3)
# typeof(Combined3)
# mode(Combined3)
# write.csv(Combined3, file = "C:/Users/dan/Desktop/a Visualization/Combined3.csv", row.names = FALSE)
par(mfrow = c(1,1))
library(ggplot2)
theme_set(theme_bw())
# Plot
ggplot(Combined3, aes(x=Years, y=Num_Marriages)) +
geom_point(size=3) +
geom_segment(aes(x=Combined3$Years,
xend=Combined3$Years,
y=0,
yend=Combined3$`Num_Marriages`)) +
labs(title="Lollipop Chart",
subtitle="Number of Marriages",
caption="source: abs.gov.au/ausstats/abs@.nsf/mf/3310.0") +
theme(axis.text.x = element_text(angle=65, vjust=0.6))
par(mfrow = c(1,1))
par(mfrow=c(1, 1)) ## this ones not so good
#library(ggplot2)
#library(Hmisc) #------ gives a great plot , this one not so good ??
p1 <- ggplot(data = Combined3, aes(x = Years, y = Num_Marriages))
p1 + geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 1/2, alpha = .25) +
stat_summary(fun.y = "mean", geom = "point", colour = "red") +
stat_summary(fun.data = "mean_cl_normal", colour = "red",
geom = "errorbar", width = .2) #
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 7 rows containing missing values (geom_errorbar).
par(mfrow = c(1,1))
par(mfrow = c(1,1))
#str(Combined3)
#library(ggplot2)
library(ggcorrplot)
theme_set(theme_bw())
# Correlation matrix
corr <- round(cor(Combined3), 1)
# Plot
ggcorrplot(corr, hc.order = TRUE,
type = "lower",
lab = TRUE,
lab_size = 3,
method="circle",
colors = c("tomato2", "white", "springgreen3"),
title="Correlogram of Australian Marriages and Divorces
for years 1996, 2006, 2012 - 2016", ggtheme=theme_bw)
par(mfrow = c(1,1))
## Marginal Histogram / Boxplot ## This one wont' work correctly in Rmd ??
par(mfrow = c(1,1)) ## I think its more to do with my data set, ?
# load package and data
#library(ggplot2)
library(ggExtra)
# Scatterplot
theme_set(theme_bw()) # pre-set the bw theme.
g <- ggplot(Combined3, aes(Male_initiated_Divorce, Female_initiated_Divorce)) +
geom_count() +
geom_smooth(method="lm", se=F)
g
#ggMarginal(g, type = "histogram", fill="transparent") ## Select 1 for top and right
#ggMarginal(g, type = "boxplot", fill="transparent") ## side view
#ggMarginal(g, type = "density", fill="transparent")
par(mfrow = c(1,1))
#library(ggplot2)
theme_set(theme_bw())
# plot
g <- ggplot(Combined3, aes( Female_initiated_Divorce, Male_initiated_Divorce ))
g + geom_violin() +
labs(title="Violin plot",
subtitle="Female_initiated_Divorce vs Male_initiated_Divorce",
caption="Source: abs.gov.au/ausstats/abs@.nsf/mf/3310.0",
x="Female_initiated_Divorce",
y="Male_initiated_Divorce")
Findings to date
To be be honest , I wish my data set was larger lenght wise, I almost want to go grab another data set and see if see if I can make a bigger , better, cool looking visual of something.