This is a quick EDA on the dataset. I am using my own “blue” theme background for the plots.
Data can be found here: https://www.kaggle.com/australian-bureau-of-statistics/australian-marriage-law-postal-survey/data
View Our Data
glimpse(vote)
## Observations: 27,000
## Variables: 11
## $ State <fctr> New South Wales Divisions, New South Wales Di...
## $ ElectDivision <fctr> Banks, Banks, Banks, Banks, Banks, Banks, Ban...
## $ Gender <fctr> female, female, female, female, female, femal...
## $ Age <fctr> 18-19, 20-24, 25-29, 30-34, 35-39, 40-44, 45-...
## $ Eligible <fctr> 1,514, 4,095, 3,959, 4,070, 4,362, 4,511, 4,8...
## $ UnderAge <int> 18, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70...
## $ SeniorAge <int> 19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 69, 74...
## $ Vote <fctr> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, ...
## $ Total_Votes <int> 37736, 37736, 37736, 37736, 37736, 37736, 3773...
## $ Responses <fctr> ClearResponse, ClearResponse, ClearResponse, ...
## $ Total_Responses <int> 84079, 84079, 84079, 84079, 84079, 84079, 8407...
Now let’s start with an exploratory Data Analysis on our dataset
ggplot(vote, aes(State, y= Total_Votes, fill = Vote)) + geom_bar(stat = "identity", position = "dodge") +
labs(title = "Australian Same-Sex Marriage Law Survey", subtitle = "How each territory has voted, 'Yes' or 'No' to legalised same-sex marriages in the country", caption="Source:marriagesurvey.abs.gov.au. Plot@twitter.com/brenborbon") + scale_fill_manual(values=c("#66C2A5", "#3288BD")) + bluetheme + xlab("") +
ylab("")
We can see the Australian Capital Territory Regions has the most Yes votes followed by Victoria Divisions while Northern Territory Divisions have the lowest Yes vote total. On the other hand, New South Wales Division and Victoria Divisions has the highest no vote total.
ggplot(vote, aes(State, y= Total_Responses, fill = Responses)) + geom_bar(stat = "identity", position = "dodge") + bluetheme +
labs(title = "Australian Same-Sex Marriage Survey Response", subtitle="How each territory has responded with the survey, to allow same-sex marriages in the country" , caption="Source:marriagesurvey.abs.gov.au. Plot@twitter.com/brenborbon") + scale_fill_manual(values=c("#66C2A5" ,"#3288BD", "#ABDDA4")) +
xlab("") + ylab("")
We can observe that there are only very few values for UnclearResponse.
division <- vote %>% filter(Vote == "Yes") %>%
group_by(ElectDivision, Total_Votes) %>%
summarise(Total = n()) %>%
arrange(desc(Total_Votes)) %>% ungroup() %>% head(10)
reg <- aggregate(division$Total_Votes, by=list(division$ElectDivision), FUN=mean) # aggregate
colnames(reg) <- c("division", "votes") # change column names
reg <- reg[order(reg$votes), ] # sort
reg$division <- factor(reg$division, levels = reg$division) # to retain the order in plot.
ggplot(reg, aes(x=division, y=votes, fill = colors)) +
geom_bar(stat="identity", width=.5) +
labs(title="Top 10 Electoral Division Which Have Voted 'Yes'")+ bluetheme + coord_flip() + theme(legend.position = "")
division <- vote %>% filter(Vote == "Yes") %>%
group_by(ElectDivision, Total_Votes) %>%
summarise(Total = n()) %>%
arrange(desc(Total_Votes)) %>% ungroup() %>% tail(10)
reg <- aggregate(division$Total_Votes, by=list(division$ElectDivision), FUN=mean) # aggregate
colnames(reg) <- c("division", "votes") # change column names
reg <- reg[order(reg$votes), ] # sort
reg$division <- factor(reg$division, levels = reg$division) # to retain the order in plot.
ggplot(reg, aes(x=division, y=votes, fill = colors)) +
geom_bar(stat="identity", width=.5) +
labs(title="Lowest Electoral Division Which Have Voted 'Yes'")+ bluetheme + coord_flip() + theme(legend.position = "")
division <- vote %>% filter(Vote == "No") %>%
group_by(ElectDivision, Total_Votes) %>%
summarise(Total = n()) %>%
arrange(desc(Total_Votes)) %>% ungroup() %>% head(10)
reg <- aggregate(division$Total_Votes, by=list(division$ElectDivision), FUN=mean) # aggregate
colnames(reg) <- c("division", "votes") # change column names
reg <- reg[order(reg$votes), ] # sort
reg$division <- factor(reg$division, levels = reg$division) # to retain the order in plot.
ggplot(reg, aes(x=division, y=votes, fill = division)) +
geom_bar(stat="identity", width=.5) +
labs(title="Top 10 Electoral Division Which Have Voted 'No'")+ bluetheme + coord_flip() + theme(legend.position = "")
division <- vote %>% filter(Vote == "No") %>%
group_by(ElectDivision, Total_Votes) %>%
summarise(Total = n()) %>%
arrange(desc(Total_Votes)) %>% ungroup() %>% tail(10)
reg <- aggregate(division$Total_Votes, by=list(division$ElectDivision), FUN=mean) # aggregate
colnames(reg) <- c("division", "votes") # change column names
reg <- reg[order(reg$votes), ] # sort
reg$division <- factor(reg$division, levels = reg$division) # to retain the order in plot.
ggplot(reg, aes(x=division, y=votes, fill = division)) +
geom_bar(stat="identity", width=.5) +
labs(title="Lowest Electoral Division Which Have Voted 'No'")+ bluetheme + coord_flip() + theme(legend.position = "")