library(tidyverse)
library(skimr)
library(RColorBrewer)
library(networkD3)
library(htmlwidgets)
library(waffle)
library(lubridate)
library(Hmisc)
library(viridis)
library(plotly)
animal_outcomes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_outcomes.csv')
dim(animal_outcomes)
## [1] 664 12
ao = animal_outcomes %>% mutate_at(vars(year,animal_type,outcome), list(factor))
skim(ao)
| Name | ao |
| Number of rows | 664 |
| Number of columns | 12 |
| _______________________ | |
| Column type frequency: | |
| factor | 3 |
| numeric | 9 |
| ________________________ | |
| Group variables |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| year | 0 | 1 | FALSE | 20 | 200: 35, 200: 35, 200: 35, 200: 35 |
| animal_type | 0 | 1 | FALSE | 6 | Cat: 114, Dog: 114, Hor: 114, Liv: 114 |
| outcome | 0 | 1 | FALSE | 8 | Eut: 120, Oth: 120, Tra: 102, Rec: 100 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| ACT | 0 | 1.00 | 201.92 | 354.55 | 0 | 1.00 | 36.0 | 191.00 | 1628 | ▇▁▁▁▁ |
| NSW | 0 | 1.00 | 970.56 | 2182.39 | 0 | 9.00 | 78.5 | 514.75 | 13267 | ▇▁▁▁▁ |
| NT | 11 | 0.98 | 210.77 | 806.20 | 0 | 0.00 | 1.0 | 60.00 | 8150 | ▇▁▁▁▁ |
| QLD | 0 | 1.00 | 1289.88 | 2562.50 | 0 | 15.00 | 198.5 | 854.00 | 15690 | ▇▁▁▁▁ |
| SA | 2 | 1.00 | 318.32 | 662.24 | 0 | 2.00 | 34.5 | 226.75 | 4252 | ▇▁▁▁▁ |
| TAS | 1 | 1.00 | 141.28 | 288.04 | 0 | 3.00 | 21.0 | 96.00 | 1974 | ▇▁▁▁▁ |
| VIC | 0 | 1.00 | 992.26 | 2009.94 | 0 | 13.75 | 108.5 | 680.75 | 10567 | ▇▁▁▁▁ |
| WA | 0 | 1.00 | 88.47 | 319.11 | 0 | 0.00 | 2.0 | 46.25 | 6035 | ▇▁▁▁▁ |
| Total | 3 | 1.00 | 4199.37 | 7693.90 | 0 | 162.00 | 1015.0 | 2839.00 | 42731 | ▇▁▁▁▁ |
Hmisc::describe(ao$year)
## ao$year
## n missing distinct
## 664 0 20
##
## lowest : 1999 2000 2001 2002 2003, highest: 2014 2015 2016 2017 2018
##
## Value 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009
## Frequency 23 23 23 35 35 35 35 35 35 35 35
## Proportion 0.035 0.035 0.035 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
##
## Value 2010 2011 2012 2013 2014 2015 2016 2017 2018
## Frequency 35 35 35 35 35 35 35 35 35
## Proportion 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
intake = animal_outcomes %>% group_by(animal_type,year) %>% tally(Total) %>% ggplot(aes(x= year, y = n, group = animal_type, color=animal_type)) + geom_line() + geom_point() + theme_classic() + theme(legend.position="top") + scale_color_brewer(palette="Dark2") + labs(x="Year", y="Intake count", title="Animal intake types throughout the years", subtitle= "RSPCA Australia animal outcomes 1999-2018")
intake
Data preparation
ao18 = animal_outcomes %>% filter(year == "2018") %>% as.data.frame
links = subset(ao18, select=-c(ACT,NSW,NT,QLD,SA,TAS,VIC,WA,year))
colnames(links)[1] = "source"
colnames(links)[2] = "target"
colnames(links)[3] = "value"
nodes = data.frame(name=c(as.character(links$source),as.character(links$target)) %>% unique())
links$IDsource <- match(links$source, nodes$name)-1
links$IDtarget <- match(links$target, nodes$name)-1
Plot
summary18 <- sankeyNetwork(Links = links, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "name",
sinksRight=FALSE, fontSize = 14)
summary18 <- htmlwidgets::prependContent(summary18, htmltools::tags$h3("Australia RSPCA animal types and outcomes in 2018"))
summary18
Subset
cd_int = animal_outcomes %>% filter(animal_type == "Dogs" | animal_type == "Cats") %>% group_by(animal_type,year) %>% tally(Total) %>% mutate_at(vars(year,animal_type), list(factor)) %>% as.data.frame()
cd_eth = animal_outcomes %>% filter(animal_type == "Dogs" | animal_type == "Cats") %>% filter(outcome=="Euthanized") %>% group_by(animal_type,year, outcome) %>% tally(Total) %>% mutate_at(vars(year,animal_type,outcome), list(factor)) %>% as.data.frame()
cd_reh = animal_outcomes %>% filter(animal_type == "Dogs" | animal_type == "Cats") %>% filter(outcome=="Rehomed") %>% group_by(animal_type,year, outcome) %>% tally(Total) %>% mutate_at(vars(year,animal_type,outcome), list(factor)) %>% as.data.frame()
Get proportion
cd_int$euth_por= ((cd_eth$n/cd_int$n))
head(cd_int)
Plot
cd2 = ggplot(cd_int, aes(x=year, y=euth_por)) +
geom_segment( aes(x=year, xend=year, y=0, yend=euth_por), color="grey49") +
geom_point(aes(x=year, y=euth_por, color= animal_type),size=2) + facet_wrap(~animal_type, nrow=2) + scale_color_brewer(palette="Dark2") +
theme_light() +
theme(
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.ticks.x = element_blank()
) + theme(axis.text.x = element_text(angle=90)) + scale_y_continuous(labels = scales::percent_format(), limits=c(0,1)) + labs(x="Year", y="Euthanized %", title="Proportion of euthanized dogs and cats intakes", subtitle = "RSPCA Australia animal outcomes 1999-2018")
cd2
Get proportion
cd_int$reh_por = ((cd_reh$n/cd_int$n))
head(cd_int)
Plot
rp2 = ggplot(cd_int, aes(x=year, y=reh_por)) +
geom_segment( aes(x=year, xend=year, y=0, yend=reh_por), color="dodgerblue4") +
geom_point(aes(x=year, y=reh_por, color= animal_type),size=2) + facet_wrap(~animal_type, nrow=2) + scale_color_brewer(palette="Dark2") +
theme_light() +
theme(
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.ticks.x = element_blank()
) + theme(axis.text.x = element_text(angle=90)) + scale_y_continuous(labels = scales::percent_format(), limits=c(0,1)) + labs(x="Year", y="Rehomed %", title="Proportion of rehomed dogs and cats intakes", subtitle = "RSPCA Australia animal outcomes 1999-2018")
rp2
dogsOutcome = c('Reclaimed'=13107,'Rehomed'=12872,'Currently In Care'=1941,'Transferred'=1391,'Other'=244,'Euthanized'=4308)
waffle1 = waffle(dogsOutcome/200, rows=5, size =0.5, colors=c("#00509d","#fdc500","#538d22","#f18701","#dad7cd","#2f3e46") ,title="Dogs outcomes in 2018", xlab="1 square = 200 dogs") + theme(legend.position="bottom")
waffle1
catsOutcome = c('Reclaimed'=2665,'Rehomed'=31105,'Currently In Care'=3944,'Transferred'=1033,'Other'=683,'Euthanized'=11740)
waffle2 = waffle(catsOutcome/200, rows=5, size =0.7, colors=c("#00509d","#fdc500","#538d22","#f18701","#dad7cd","#2f3e46") ,title="Cats outcomes in 2018", xlab="1 square = 200 catss") + theme(legend.position="bottom")
waffle2
animal_compliants = read.csv("animal_c.csv", header=TRUE)
colnames(animal_compliants)
## [1] "animalType2" "complaintType" "dateRecieved"
## [4] "suburb" "electoralDivision" "date2"
animal_compliants$date2= as.Date(animal_compliants$date2)
animal_compliants$month = month(animal_compliants$date2)
animal_compliants$year = year(animal_compliants$date2)
ac = animal_compliants %>% mutate_at(vars(animalType2,complaintType,suburb, dateRecieved, electoralDivision),list(factor)) %>% as.data.frame()
str(animal_compliants)
## 'data.frame': 42413 obs. of 8 variables:
## $ animalType2 : Factor w/ 2 levels "cat","dog": 2 2 2 2 2 2 2 2 2 2 ...
## $ complaintType : Factor w/ 6 levels "Aggressive Animal",..: 1 4 4 5 6 2 3 6 3 3 ...
## $ dateRecieved : Factor w/ 81 levels "Apr-14","Apr-15",..: 47 47 47 47 47 47 47 47 47 47 ...
## $ suburb : Factor w/ 85 levels "Aitkenvale","Alice River",..: 2 2 2 2 2 10 10 10 11 11 ...
## $ electoralDivision: Factor w/ 11 levels "Division 1","Division 10",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ date2 : Date, format: "2020-06-01" "2020-06-01" ...
## $ month : num 6 6 6 6 6 6 6 6 6 6 ...
## $ year : num 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
gt1= ac %>% filter(animalType2 == "dog") %>% group_by(year, month) %>% tally() %>% ggplot(aes(x=month, y=year)) + geom_tile(aes(fill = n), color = "black") +
scale_fill_gradientn(colors = c("#a8dadc", "#457b9d", "#1d3557")) +
theme_classic() + scale_x_continuous(breaks = seq(1, 12, 1), labels = month.abb) + theme_classic() + scale_y_continuous(breaks=seq(2013,2020, by =1)) +
labs(x = "Month(date recieved)", y = "Year(date recieved)", fill = "Count", title = "Complaints about dog", subtitle= "Townsville Animal Complaints")
gt1
gt2= ac %>% filter(animalType2 == "cat") %>% group_by(year, month) %>% tally() %>% ggplot(aes(x=month, y=year)) + geom_tile(aes(fill = n), color = "black") +
scale_fill_gradientn(colors = c("#F4F269", "#606c38", "#283618")) +
theme_classic() + scale_x_continuous(breaks = seq(1, 12, 1), labels = month.abb) + theme_classic() + scale_y_continuous(breaks=seq(2013,2020, by =1)) +
labs(x = "Month(date recieved)", y = "Year(date recieved)", fill = "Count", title = "Complaints about cats", subtitle= "Townsville Animal Complaints")
gt2
bp1 = ggplot(ac) + geom_bar(aes(x=year, fill= complaintType)) + facet_wrap(~animalType2, nrow=2) + theme_classic() + scale_fill_brewer(palette="Dark2") + labs(x="Year(date recieved)", y = "Count", title= "Complaints over the years", subtitle= "Townsville Animal Complaints 2013 to 2020")
bp1
trend1= ac %>% filter(between(year, 2014,2019)) %>% group_by(year, complaintType) %>% tally() %>% ggplot(aes(x=year,y=n, group= complaintType, color=complaintType)) + geom_line(stat='identity') + scale_color_brewer(palette="Dark2") + theme_classic() + labs(x="Year(date recieved)", y = "Number of complaints", title= "Complaint types over the years", subtitle= "Townsville Animal Complaints 2014 to 2019")
trend1
vp1= ac %>% filter(animalType2 == "dog" & (between(year, 2014,2019))) %>% group_by(year,complaintType) %>% tally()
vp2 = vp1 %>% ggplot(aes(x= complaintType, y= n, fill=complaintType, color=complaintType)) + geom_violin(width=2.1,size=0.2) + scale_fill_viridis(discrete=TRUE) + scale_color_viridis(discrete=TRUE) + ylim(0,1600) + theme_minimal() + theme(legend.position="none") + coord_flip() + labs(x="Complaint type", y = "Count", title= "Dogs: Complaints type (2014 to 2019)", subtitle= "Townsville Animal Complaints")
vp2
vp3= ac %>% filter(animalType2 == "cat" & (between(year, 2014,2019))) %>% group_by(year,complaintType) %>% tally()
vp4 = vp3 %>% ggplot(aes(x= complaintType, y= n, fill=complaintType, color=complaintType)) + geom_violin(width=2.1,size=0.2) + scale_fill_viridis(discrete=TRUE) + scale_color_viridis(discrete=TRUE) + ylim(0,1600) + theme_minimal() + theme(legend.position="none") + coord_flip() + labs(x="Complaint type", y = "Count", title= "Cats: Complaints type (2014 to 2019)", subtitle= "Townsville Animal Complaints")
vp4
brisbane_complaints = readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/brisbane_complaints.csv')
brisbane_complaints$yearquarter = brisbane_complaints$date_range
unique(brisbane_complaints$date_range)
## [1] "1st-quarter-2016-17.csv"
## [2] "april-june-2016.csv"
## [3] "apr-jun-2019.csv"
## [4] "apr-to-jun-2018.csv"
## [5] "april-to-june-2017.csv"
## [6] "jan-mar-2019.csv"
## [7] "jan-to-mar-2018.csv"
## [8] "january-to-march-2017.csv"
## [9] "jul-to-sep-2018.csv"
## [10] "jul-to-sep-2019.csv"
## [11] "july-to-september-2017.csv"
## [12] "oct-to-dec-2018.csv"
## [13] "october-to-december-2016.csv"
## [14] "october-to-december-2017.csv"
## [15] "cars-srsa-open-data-animal-related-complaints-apr-to-jun-2020.csv"
## [16] "cars-srsa-open-data-animal-related-complaints-jan-to-mar-2020.csv"
## [17] "cars-srsa-open-data-animal-related-complaints-oct-to-dec-2019.csv"
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "1st-quarter-2016-17.csv", "2016-Q1")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "apr-jun-2019.csv", "2019-Q2")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "apr-to-jun-2018.csv", "2018-Q2")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "april-june-2016.csv", "2016-Q2")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "april-to-june-2017.csv", "2017-Q2")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "cars-srsa-open-data-animal-related-complaints-apr-to-jun-2020.csv", "2020-Q2")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "cars-srsa-open-data-animal-related-complaints-jan-to-mar-2020.csv", "2020-Q1")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "cars-srsa-open-data-animal-related-complaints-oct-to-dec-2019.csv", "2019-Q4")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "jan-mar-2019.csv", "2019-Q1")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "jan-to-mar-2018.csv", "2018-Q1")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "january-to-march-2017.csv", "2017-Q1")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "jul-to-sep-2018.csv", "2018-Q3")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "july-to-september-2017.csv", "2017-Q3")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "jul-to-sep-2019.csv", "2017-Q3")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "oct-to-dec-2018.csv", "2018-Q4")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "october-to-december-2016.csv", "2016-Q4")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "october-to-december-2017.csv", "2017-Q4")
sort(unique(brisbane_complaints$yearquarter),decreasing=FALSE)
## [1] "2016-Q1" "2016-Q2" "2016-Q4" "2017-Q1" "2017-Q2" "2017-Q3" "2017-Q4"
## [8] "2018-Q1" "2018-Q2" "2018-Q3" "2018-Q4" "2019-Q1" "2019-Q2" "2019-Q4"
## [15] "2020-Q1" "2020-Q2"
sum(is.na(brisbane_complaints$responsible_office))
## [1] 31330
bc= subset(brisbane_complaints, select=-c(responsible_office, date_range, nature, city))
##remove 2016 obs
bc = bc[!grepl("2016", brisbane_complaints$yearquarter),]
sort(unique(bc$yearquarter),decreasing=FALSE)
## [1] "2017-Q1" "2017-Q2" "2017-Q3" "2017-Q4" "2018-Q1" "2018-Q2" "2018-Q3"
## [8] "2018-Q4" "2019-Q1" "2019-Q2" "2019-Q4" "2020-Q1" "2020-Q2"
bc = bc %>% mutate_at(vars(animal_type,category,suburb, yearquarter),list(factor))
summary(bc)
## animal_type category suburb
## Attack : 7087 Wandering :4250 INALA : 859
## Cat : 1845 Fencing Issues :4208 BRACKEN RIDGE: 500
## Cat Trapping: 2256 Attack On An Animal:2467 WYNNUM : 456
## Dog :11253 Unregistered :1912 FOREST LAKE : 452
## Other Animal: 3865 Attack On A Person :1827 ACACIA RIDGE : 364
## (Other) :6976 (Other) :23373
## NA's :4666 NA's : 302
## yearquarter
## 2017-Q3: 3896
## 2020-Q2: 2799
## 2020-Q1: 2146
## 2018-Q3: 2101
## 2018-Q4: 1968
## 2019-Q2: 1954
## (Other):11442
bv1= bc %>% group_by(category) %>% tally() %>% ggplot(aes(x=category, y=n, fill=n)) + geom_bar(stat="identity") + theme_classic() + scale_fill_viridis() + coord_flip() + labs(x="Complaints types", y = "Count", title= "Complaint Categories", subtitle= "Brisbane Animal Complaints Jan 2017 to June 2020")
bv1
bv2= bc %>% group_by(animal_type) %>% tally() %>% ggplot(aes(x=animal_type, y=n, fill=n)) + geom_bar(stat="identity") + theme_classic() + scale_fill_viridis() + coord_flip() + labs(x="Animal types", y = "Count", title= "Animal Types", subtitle= "Brisbane Animal Complaints Jan 2017 to June 2020")
bv2
bv3= bc %>% group_by(yearquarter) %>% tally() %>% ggplot(aes(x=yearquarter, y=n, group=1)) + geom_line(color="black") + geom_point(color = "white", shape = 22, aes(fill = n), size = 5) + scale_fill_viridis_c(option = "plasma") + theme(axis.text.x = element_text(angle=90)) + labs(x = "Year-quarter", y = "Number of complaints", title = "Number of complaints throughout the quarters",subtitle= "Brisbane Animal Complaints Jan 2017 to June 2020")
bv3
bv4= bc %>% group_by(yearquarter,animal_type) %>% tally() %>%
ggplot( aes(x=yearquarter, y=n, group=animal_type, fill=animal_type)) +
geom_area() +
scale_fill_viridis(discrete = TRUE) +
theme(legend.position="none") +
ggtitle("Complaints by animal types from 2017-Q1 to 2020-Q2 ") +
theme_minimal() + scale_x_discrete("2017-Q1 to 2020-Q2",breaks=2) +
theme(
legend.position="none",
panel.spacing = unit(0.2, "lines"),
strip.text.x = element_text(size = 8),
plot.title = element_text(size=14)
) +
facet_wrap(~animal_type)
bv4