# Load the packages
library(readr)
library(ggplot2)
library(dplyr)
library(XML)
library(RCurl)
# Scrape the dataset from the website
aic <- "https://aic.gov.au/publications/tandi/tandi426"
htmlContent <- getURL(aic)
result <- readHTMLTable(htmlContent)
prisoner <- readHTMLTable(htmlContent, which = 4, stringsAsFactors = FALSE,
Header = TRUE)
# Read the dataset
print(prisoner)
## V1 V2 V3
## 1 Age (yrs) Indigenous male prisoners Increase 200010 (%)
## 2 2000 2010 2000
## 3 4549 95 363
## 4 5054 51 179
## 5 5559 14 65
## 6 6064 7 27
## 7 65+ 10 23
## V4 V5 V6 V7
## 1 Indigenous female prisoners Increase 200010 (%) <NA> <NA>
## 2 2010 <NA> <NA> <NA>
## 3 282.1 6 36 500.0
## 4 251.0 3 12 300.0
## 5 364.3 0 0 0.0
## 6 285.7 1 3 200.0
## 7 130.0 0 0 0.0
#Subset the dataset
prisoner_male_2000 <- prisoner[(3:7),(1:2)]
prisoner_male_2010 <- prisoner[(3:7), c(1,3)]
prisoner_female_2000 <- prisoner[(3:7), c(1,5)]
prisoner_female_2010 <- prisoner[(3:7), c(1,6)]
#Add two variables 'Year' and 'Prisoner Gender' to the sub-datasets
prisoner_male_2000 <- cbind(prisoner_male_2000, strrep('2000',1), strrep("Male",1))
prisoner_male_2010 <- cbind(prisoner_male_2010, strrep('2010',1),strrep("Male",1))
prisoner_female_2000 <- cbind(prisoner_female_2000 , strrep('2000',1),strrep("Female",1))
prisoner_female_2010 <- cbind(prisoner_female_2010 , strrep('2010',1),strrep("Female",1))
#Rename the columns of the sub-datasets
colnames(prisoner_male_2000) <- c("Age", "Number","Year", "Prisoner Gender")
colnames(prisoner_male_2010) <- c("Age", "Number","Year", "Prisoner Gender")
colnames(prisoner_female_2000) <- c("Age", "Number","Year", "Prisoner Gender")
colnames(prisoner_female_2010) <- c("Age", "Number","Year", "Prisoner Gender")
#Unite the sub-datasets into a complete big one
prisoner_official <- bind_rows(prisoner_male_2000,prisoner_male_2010,prisoner_female_2000,prisoner_female_2010)
#Summary of the final dataset
str(prisoner_official)
## 'data.frame': 20 obs. of 4 variables:
## $ Age : chr "4549" "5054" "5559" "6064" ...
## $ Number : chr "95" "51" "14" "7" ...
## $ Year : chr "2000" "2000" "2000" "2000" ...
## $ Prisoner Gender: chr "Male" "Male" "Male" "Male" ...
# Assign an object p to the ggplot, specifying the dataset and two main variables
p <- ggplot(data = prisoner_official, aes(x= Year, y = as.integer(Number), fill = Age))
# Create two layers for the plot, using geom_ bar
p <- p + geom_bar(stat="identity", position ="dodge") + theme_minimal(base_size = 16)
# Break the dataset into two subsets of "Female", and "Male"
p <- p + facet_grid(~`Prisoner Gender`)
# Add title, make main and legend titles bold for better clarity
p <- p + ggtitle("Is The Number of Older Indigenous Australian\n Prisoners On The Rise?") +
theme(plot.title = element_text(lineheight=.8, face="bold"), legend.title = element_text(lineheight=.8, face="bold"))
# Add label for x and y axises
p <- p + xlab("Year") + ylab ("Count")
# Visualise the dataset
p
The question: Is the number of older indigenous Australian prisoners on the rise?
The answer: Yes, it is, by almost all age categories and both genders from 2000 to 2010.
Other comments:
The age category ‘45- 49’ was seen to have the most significant rise in both genders from 2000 to 2010.
Australia had more male prisoners than female in 2000 and 2010.