We want to explore a dataset containing the costs of animal rescue for several categories
options(repos = c(CRAN = "https://<your_chosen_mirror>"))
install.packages("tidyverse")
## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
## cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'
## Warning: package 'tidyverse' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
## cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'
install.packages("maps")
## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
## cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'
## Warning: package 'maps' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
## cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'
install.packages("rgdal")
## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
## cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'
## Warning: package 'rgdal' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
## cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'
install.packages("ggmap")
## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
## cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'
## Warning: package 'ggmap' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
## cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'
install.packages("naniar")
## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
## cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'
## Warning: package 'naniar' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
## cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'
install.packages("skimr")
## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
## cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'
## Warning: package 'skimr' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
## cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'
install.packages("usethis")
## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
## cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'
## Warning: package 'usethis' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
## cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## Warning: package 'tidyr' was built under R version 4.2.2
## Warning: package 'dplyr' was built under R version 4.2.2
## Warning: package 'forcats' was built under R version 4.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(skimr)
## Warning: package 'skimr' was built under R version 4.2.2
library(stringr)
library(dplyr)
library(naniar)
## Warning: package 'naniar' was built under R version 4.2.2
##
## Attaching package: 'naniar'
##
## The following object is masked from 'package:skimr':
##
## n_complete
library(usethis)
## Warning: package 'usethis' was built under R version 4.2.2
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
mydata <- read.csv("Raw data.csv")
`
colnames(mydata)
## [1] "DateTimeOfCall" "HourlyNotionalCost"
## [3] "IncidentNotionalCost" "AnimalGroupParent"
## [5] "PropertyCategory" "SpecialServiceTypeCategory"
## [7] "SpecialServiceType" "Area"
We begin by taking a glimpse of our data
glimpse(mydata)
## Rows: 8,939
## Columns: 8
## $ DateTimeOfCall <chr> "01/01/2009 03:01", "01/01/2009 08:51", "04…
## $ HourlyNotionalCost <int> 255, 255, 255, 255, 255, 255, 255, 255, 255…
## $ IncidentNotionalCost <chr> "510", "255", "255", "255", "255", "255", "…
## $ AnimalGroupParent <chr> "Dog", "Fox", "Dog", "Horse", "Ra^&it", "Un…
## $ PropertyCategory <chr> "Dwelling", "Outdoor Structure", "Outdoor S…
## $ SpecialServiceTypeCategory <chr> "Other animal assistance", "Other animal as…
## $ SpecialServiceType <chr> "Animal assistance involving livestock - Ot…
## $ Area <chr> "Ranelagh", "Ranelagh", "Sutton", "Smithfie…
We spot some misspelled column names and proceed to clean them We proceed to handle the various columns individually
mydata <- mydata %>%
rename(HourlyNationalCost = HourlyNotionalCost) %>%
rename(IncidentNationalCost = IncidentNotionalCost)
Deal with NAs
sum(is.na(mydata$HourlyNationalCost))
## [1] 0
mydata$IncidentNationalCost <- as.integer(mydata$IncidentNationalCost)
## Warning: NAs introduced by coercion
sum(is.na(mydata$IncidentNationalCost))
## [1] 63
mydata1 <- na.omit(mydata)
sum(is.na(mydata1$HourlyNationalCost | mydata1$IncidentNationalCost))
## [1] 0
We summarize our data to understand the the values
summary(mydata1)
## DateTimeOfCall HourlyNationalCost IncidentNationalCost AnimalGroupParent
## Length:8876 Min. :255 Min. : 0 Length:8876
## Class :character 1st Qu.:260 1st Qu.: 290 Class :character
## Mode :character Median :326 Median : 328 Mode :character
## Mean :310 Mean : 364
## 3rd Qu.:346 3rd Qu.: 352
## Max. :364 Max. :3912
## PropertyCategory SpecialServiceTypeCategory SpecialServiceType
## Length:8876 Length:8876 Length:8876
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## Area
## Length:8876
## Class :character
## Mode :character
##
##
##
Let us explore the Special service type category table
unique(mydata1$SpecialServiceTypeCategory)
## [1] "Other animal assistance" "Animal rescue from below ground"
## [3] "Animal rescue from water" "Animal rescue from height"
sort(table(mydata1$SpecialServiceTypeCategory)) %>% view()
mydata1 %>%
ggplot(aes(x= IncidentNationalCost, y = SpecialServiceTypeCategory, color = SpecialServiceTypeCategory )) +
geom_jitter(size = 2, alpha = 0.6, width = 0.2, height = 0.2) +
xlab("Incident cost") +
ylab("") +
ggtitle("Special service category showing distribution of costs") +
theme(axis.text.x = element_text( vjust = 0.5, hjust = 1)) +
scale_color_brewer(palette = "Paired") +
guides(color = "none") + theme_bw()
Lets explore the Special service type
unique(mydata1$SpecialServiceType)
## [1] "Animal assistance involving livestock - Other action"
## [2] "Animal rescue from below ground - Domestic pet"
## [3] "Animal rescue from water - Farm animal"
## [4] "Animal rescue from water - Domestic pet"
## [5] "Wild animal rescue from height"
## [6] "Animal rescue from height - Domestic pet"
## [7] "Animal rescue from water - Bird"
## [8] "Animal rescue from height - Bird"
## [9] "Wild animal rescue from water or mud"
## [10] "Animal assistance - Lift heavy livestock animal"
## [11] "Wild animal rescue from below ground"
## [12] "Animal rescue from below ground - Bird"
## [13] "Animal rescue from height - Farm animal"
## [14] "Animal rescue from below ground - Farm animal"
## [15] "Assist trapped domestic animal"
## [16] "Animal harm involving domestic animal"
## [17] "Animal assistance involving wild animal - Other action"
## [18] "Animal assistance involving domestic animal - Other action"
## [19] "Animal harm involving wild animal"
## [20] "Assist trapped livestock animal"
## [21] "Assist trapped wild animal"
## [22] "Animal assistance - Lift heavy wild animal"
## [23] "Animal assistance - Lift heavy domestic animal"
## [24] "Animal harm involving livestock"
Lets make a table to see the frequency of these values
view(sort(table(mydata1$SpecialServiceType), decreasing = TRUE))
Now we focus on the Property category column
unique(mydata1$PropertyCategory)
## [1] "Dwelling" "Outdoor Structure" "Non Residential"
## [4] "Outdoor" "Road Vehicle" "Other Residential"
## [7] "Dwe^&ing" "Dwe!!ing" "Boat"
clean the Property category column
mydata1$PropertyCategory_clean <- ifelse(mydata1$PropertyCategory == "Dwe!!ing", "Dwelling",
ifelse(mydata1$PropertyCategory == "Dwe^&ing", "Dwelling", mydata1$PropertyCategory))
mydata1 <- mydata1 %>%
mutate(PropertyCategory_clean = tolower(PropertyCategory_clean))
unique(mydata1$PropertyCategory_clean)
## [1] "dwelling" "outdoor structure" "non residential"
## [4] "outdoor" "road vehicle" "other residential"
## [7] "boat"
Visualize the property category table, check which property has what frequency count, we can train more staff to handle calls on this type of property category.
mydata1 %>%
group_by(PropertyCategory_clean) %>%
summarize(frequency = n()) %>%
mutate(PropertyCategory_clean = fct_reorder(PropertyCategory_clean, frequency)) %>%
ggplot(aes(x = PropertyCategory_clean, y = frequency)) +
geom_bar(stat = "identity", fill = "blue") +
theme_minimal() +
xlab("Property type") + ylab("Frequency") + scale_fill_brewer(palette = "Paired") +
labs(title = "Frequency of the property category",
subtitle= "We see most counts for the dwelling animals")+ guides(fill = "none")
Next we want to see the average national cost for each property
category
mydata1 %>%
group_by(PropertyCategory_clean) %>%
summarize(avg_cost = mean(IncidentNationalCost)) %>%
mutate(PropertyCategory_clean = fct_reorder(PropertyCategory_clean, avg_cost )) %>%
ggplot(aes(x = PropertyCategory_clean, y = avg_cost)) +
geom_bar(stat = "identity", fill = "orange") +
geom_text(aes(label = scales::percent(avg_cost), y = avg_cost), stat = "identity", vjust = -0.5) +
theme_minimal() +
xlab("Property type") + ylab("Average cost")
The cost of rescuing an animal from a boat incident is more expensive as
expected, due to the extra expertise the worker may need to maneuver,
the Property type with more frequency is the Dwelling, so we may have to
focus more resources there.
Now we check the Animal group parent column to confirm what animals have the most count and cost
unique(mydata1$AnimalGroupParent)
## [1] "Dog"
## [2] "Fox"
## [3] "Horse"
## [4] "Ra^&it"
## [5] "Unknown - Heavy Livestock Animal"
## [6] "Squirrel"
## [7] "Cat"
## [8] "Bird"
## [9] "D+g"
## [10] "Unknown - Domestic Animal Or Pet"
## [11] "Ca&"
## [12] "Sheep"
## [13] "Ca$"
## [14] "Bir*"
## [15] "Deer"
## [16] "Unknown - Wild Animal"
## [17] "Snake"
## [18] "Lizard"
## [19] "Bi^&"
## [20] "Hedgehog"
## [21] "cat"
## [22] "Hamster"
## [23] "Lamb"
## [24] "Rabbit"
## [25] "Fish"
## [26] "Bull"
## [27] "Cow"
## [28] "Ferret"
## [29] "Budgie"
## [30] "Unknown - Animal rescue from water - Farm animal"
## [31] "Pigeon"
## [32] "Goat"
## [33] "Tortoise"
## [34] "Unknown - Animal rescue from below ground - Farm animal"
clean the Animal group parent column
mydata1$AnimalGroupParent_clean <- ifelse(mydata1$AnimalGroupParent == "Ra^&it","Rabbit",
ifelse(mydata1$AnimalGroupParent =="D+g", "Dog",
ifelse(mydata1$AnimalGroupParent == "Ca&", "Cat",
ifelse(mydata1$AnimalGroupParent == "Ca$", "Cat",
ifelse(mydata1$AnimalGroupParent == "Bir*", "Bird",
ifelse(mydata1$AnimalGroupParent == "Bi^&", "Bird", mydata1$AnimalGroupParent))))))
mydata1 <- mydata1 %>%
mutate(AnimalGroupParent_clean = tolower(AnimalGroupParent_clean))
unique(mydata1$AnimalGroupParent_clean)
## [1] "dog"
## [2] "fox"
## [3] "horse"
## [4] "rabbit"
## [5] "unknown - heavy livestock animal"
## [6] "squirrel"
## [7] "cat"
## [8] "bird"
## [9] "unknown - domestic animal or pet"
## [10] "sheep"
## [11] "deer"
## [12] "unknown - wild animal"
## [13] "snake"
## [14] "lizard"
## [15] "hedgehog"
## [16] "hamster"
## [17] "lamb"
## [18] "fish"
## [19] "bull"
## [20] "cow"
## [21] "ferret"
## [22] "budgie"
## [23] "unknown - animal rescue from water - farm animal"
## [24] "pigeon"
## [25] "goat"
## [26] "tortoise"
## [27] "unknown - animal rescue from below ground - farm animal"
`
Lets see the frequency of these values
AnimalGroupParent_freq <- table(mydata1$AnimalGroupParent_clean)
c(max = round(max(AnimalGroupParent_freq)), avg = round(mean(AnimalGroupParent_freq)), min = round(min(AnimalGroupParent_freq)))
## max avg min
## 4342 329 1
view(sort(table(mydata1$AnimalGroupParent_clean), decreasing = TRUE))
mydata1$short_label <- str_sub(mydata1$AnimalGroupParent_clean,1,10)
We make a plot of the frequency of Animal group, and further group these by the Property category.
ggplot(mydata1, aes(x = short_label, fill = PropertyCategory_clean)) +
geom_bar(stat = "count") +
xlab("Animals group parent cartegory") +
ylab("Frequency") +
ggtitle("This plot shows that 3 main animals have much more frequency")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
We create a plot to see the incident cost distribution for the various
animal category
ggplot(mydata1, aes(x = short_label, y = IncidentNationalCost)) +
geom_line(size = 1, color = "blue") +
geom_point(color = "orange", size = 5, alpha = 0.3 ) +
xlab("Animals group parent cartegory") +
ylab("Incident cost") + guides (fill = "none") +
ggtitle("Plot of animal groups, showing cats, horses and dogs with highest costs")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
The obvious animal groups are the bird, cat and the dog. we shall filter for these animals for a later analysis. cats get the highest frequency of calls and is amog the most expensive to rescue
Now we check the area column
unique(mydata$Area)
## [1] "Ranelagh" "Sutton" "Smithfield" "Perrystown"
## [5] "Crumlin" "Milltown" "Belfield" "Rathfarnham"
## [9] "Terenure" "Rathmines" "Templeogue" "Dundrum"
## [13] "Walkinstown" "Leopardstown" "Stepaside" "Tallaght"
## [17] "Clontarf" "Harolds Cross" "Donnybrook" "Killester"
## [21] "Ballymun" "Ballyfermot" "Drumcondra" "Clonsilla"
## [25] "Dalkey" "Cabra" "Glasnevin" "Santry"
## [29] "Portobello" "Killiney" "Blackrock" "Dun laoighre"
## [33] "Temple Bar" "Rathm&^es" "Drumco\"!ra" "Ta!!aght"
## [37] "Donnybr$*k" "Clondalkin" "Finglas" "Castleknock"
## [41] "Tandridge" "SUTTON" ""
clean the Area column
mydata1$Area_clean <- ifelse(mydata1$Area == "Ta!!aght", "Tallaght",
ifelse(mydata1$Area =="Donnybr$*k", "Donnybrook",
ifelse(mydata1$Area == "Rathm&^es","Rathmines",
ifelse(mydata1$Area == "Drumco\"!ra","Drumcondra",
ifelse(mydata1$Area == "", "Unknown", mydata1$Area)))))
mydata1 <- mydata1 %>%
mutate(Area_clean = tolower(Area_clean))
unique(mydata1$Area_clean)
## [1] "ranelagh" "sutton" "smithfield" "perrystown"
## [5] "crumlin" "milltown" "belfield" "rathfarnham"
## [9] "terenure" "rathmines" "templeogue" "dundrum"
## [13] "walkinstown" "leopardstown" "stepaside" "tallaght"
## [17] "clontarf" "harolds cross" "donnybrook" "killester"
## [21] "ballymun" "ballyfermot" "drumcondra" "clonsilla"
## [25] "dalkey" "cabra" "glasnevin" "santry"
## [29] "portobello" "killiney" "blackrock" "dun laoighre"
## [33] "temple bar" "clondalkin" "finglas" "castleknock"
## [37] "tandridge" "unknown"
We explore the Area column further
view(sort(table(mydata1$Area_clean), decreasing = TRUE))
Area_freq <- table(mydata1$Area_clean)
c(max = round(max(Area_freq)), avg = round(mean(Area_freq)), min = round(min(Area_freq)))
## max avg min
## 395 234 1
We create a lollipop plot to broadly view the average incident costs for the areas in our column
mydata1 %>%
group_by(Area_clean) %>%
summarize(avg_cost = mean(IncidentNationalCost)) %>%
mutate(Area_clean = fct_reorder(Area_clean, avg_cost)) %>%
ggplot(aes(x = Area_clean, y = avg_cost)) +
geom_segment(aes(x = Area_clean, xend = Area_clean, y = mean(mydata1$IncidentNationalCost), yend = avg_cost), color = "Orange", size = 2) +
geom_point(size = 5, color = "blue", alpha = 0.6) +
geom_hline(yintercept = mean(mydata1$IncidentNationalCost), color = "grey", linetype = "solid") + theme(axis.text.x = element_text(angle = 90)) +
xlab("") +
ylab("Average incident cost")+
labs(title = "Lollipop chart showing the average costs plotted against the Area")
Creating a frequency plot to see the Area column
ggplot(mydata1, aes(x = Area_clean, fill = "mean_cost")) +
geom_bar(stat = "count", fill = "orange") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
xlab("Area") +
ylab("Frequency") +
ggtitle("Barplot of Area to see frequency of occurrence")
lets check the top 3 incident call areas
Area_freq <- table(mydata1$Area_clean)
view(top_Areas <- names(sort(Area_freq, decreasing = TRUE))[1:3])
top_Areas_filtered <- subset(mydata1, Area_clean %in% top_Areas)
We identify a particular Area, Sutton and make a visualization to view the Animals and costs, faceted by the Property types they are rescued from
mydata1 %>%
filter(Area_clean == "sutton") %>%
ggplot(aes(x = str_trunc(AnimalGroupParent_clean, 25), y = IncidentNationalCost, group = PropertyCategory_clean, color = AnimalGroupParent_clean, size = 2)) +
geom_point(alpha = 0.6) +
xlab("Animals") +
ylab("Cost") +
ggtitle("Plot of cost, for different animal types, In Sutton, grouped by property type") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+
facet_grid(~ PropertyCategory_clean)+
scale_color_brewer(palette = "Paired") +
scale_size(range = c(3, 8)) +
guides(color = "none", size = "none")
We can repeat the above code for any particular area of interest, so as
to understand where we need to allocate more resources
Now we clean and explore the trends shown by our date column
typeof(mydata1$DateTimeOfCall)
## [1] "character"
str(mydata1$DateTimeOfCall)
## chr [1:8876] "01/01/2009 03:01" "01/01/2009 08:51" "04/01/2009 10:07" ...
mydata1$date <- dmy_hm(mydata1$DateTimeOfCall)
Create a weekday column
mydata1$wday <- weekdays(mydata1$date)
Filter the data to only include the values “cat”, “dog”, and “bird”
filtered_data <- mydata1 %>% filter(AnimalGroupParent_clean %in% c("cat", "dog", "bird"))
Now Calculate the mean cost for each day of the week
mean_cost_by_day_and_animal <- filtered_data %>%
group_by(wday, AnimalGroupParent_clean, SpecialServiceTypeCategory) %>%
summarise(mean_cost = mean(IncidentNationalCost))
## `summarise()` has grouped output by 'wday', 'AnimalGroupParent_clean'. You can
## override using the `.groups` argument.
Create the plot for dog, cat, and bird, identifying mean costs and grouping by week days
ggplot(mean_cost_by_day_and_animal, aes(x = wday, y = mean_cost, fill = SpecialServiceTypeCategory)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~ AnimalGroupParent_clean) +
ylab("Mean Cost") +
xlab("Day of the Week") +
scale_fill_discrete(name = "SpecialServiceTypeCategory")
Now we create a new column called months of the year
mydata1$month <- month(mydata1$date, label = TRUE)
We group our columns of interest
mean_values <- mydata1 %>%
group_by(month, SpecialServiceTypeCategory) %>%
summarise(SpecialServiceTypeCategory, IncidentNationalCost, PropertyCategory_clean)
## `summarise()` has grouped output by 'month', 'SpecialServiceTypeCategory'. You
## can override using the `.groups` argument.
Now we make a plot to see the costs per month grouped into Property category
ggplot(mean_values, aes(x = month, y = IncidentNationalCost, color = PropertyCategory_clean)) +
geom_line(size = 4) +
ylab("Mean Cost") +
xlab("Month")+
guides(color = guide_legend(title = "Property"))+
ggtitle("Average cost per month, grouped by property type")
Now we further group the plot by separating it into service category
ggplot(mean_values, aes(x = month, y = IncidentNationalCost)) +
geom_line(size = 3, color = "blue") +
ylab("Mean Cost") +
xlab("Month") +
facet_wrap(~ SpecialServiceTypeCategory)
We highlight the “Other Animal assistance column and explore the top
animals therein
filtered_data3 <- mydata1 %>% filter(SpecialServiceTypeCategory %in% c("Other animal assistance"))%>%
filter(AnimalGroupParent_clean %in% c("dog", "cat", "bird"))
mean_cost_other_animals <- filtered_data3 %>%
group_by(AnimalGroupParent_clean) %>%
summarise(AnimalGroupParent_clean, IncidentNationalCost, PropertyCategory_clean)
## `summarise()` has grouped output by 'AnimalGroupParent_clean'. You can override
## using the `.groups` argument.
We create a plot for this particular column to see the costs per animal in dog, cat and bird.
ggplot(mean_cost_other_animals, aes(x = AnimalGroupParent_clean, y = IncidentNationalCost,
color = PropertyCategory_clean))+
geom_line(size = 7, alpha = 0.7) +
ylab("cost") +
xlab("Animal")+
guides(color = guide_legend(title = "Property group"))+
theme_bw()+
ggtitle("Animals within the(other Animal assistance) rendered, grouped by property type")
colnames(mydata1)
## [1] "DateTimeOfCall" "HourlyNationalCost"
## [3] "IncidentNationalCost" "AnimalGroupParent"
## [5] "PropertyCategory" "SpecialServiceTypeCategory"
## [7] "SpecialServiceType" "Area"
## [9] "PropertyCategory_clean" "AnimalGroupParent_clean"
## [11] "short_label" "Area_clean"
## [13] "date" "wday"
## [15] "month"