analysis project 2

We want to explore a dataset containing the costs of animal rescue for several categories

Installing the required packages and libraries

options(repos = c(CRAN = "https://<your_chosen_mirror>"))

install.packages("tidyverse")

## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)

## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
##   cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'

## Warning: package 'tidyverse' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages

## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
##   cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'

install.packages("maps")

## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)

## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
##   cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'

## Warning: package 'maps' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages

## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
##   cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'

install.packages("rgdal")

## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)

## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
##   cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'

## Warning: package 'rgdal' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages

## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
##   cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'

install.packages("ggmap")

## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)

## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
##   cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'

## Warning: package 'ggmap' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages

## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
##   cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'

install.packages("naniar")

## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)

## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
##   cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'

## Warning: package 'naniar' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages

## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
##   cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'

install.packages("skimr")

## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)

## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
##   cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'

## Warning: package 'skimr' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages

## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
##   cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'

install.packages("usethis")

## Installing package into 'C:/Users/HP PC/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)

## Warning: unable to access index for repository https://<your_chosen_mirror>/src/contrib:
##   cannot open URL 'https://<your_chosen_mirror>/src/contrib/PACKAGES'

## Warning: package 'usethis' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages

## Warning: unable to access index for repository https://<your_chosen_mirror>/bin/windows/contrib/4.2:
##   cannot open URL 'https://<your_chosen_mirror>/bin/windows/contrib/4.2/PACKAGES'

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.2.2

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2

## Warning: package 'tidyr' was built under R version 4.2.2

## Warning: package 'dplyr' was built under R version 4.2.2

## Warning: package 'forcats' was built under R version 4.2.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(skimr)

## Warning: package 'skimr' was built under R version 4.2.2

library(stringr)
library(dplyr)
library(naniar)

## Warning: package 'naniar' was built under R version 4.2.2

## 
## Attaching package: 'naniar'
## 
## The following object is masked from 'package:skimr':
## 
##     n_complete

library(usethis)

## Warning: package 'usethis' was built under R version 4.2.2

library(lubridate)

## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Call in our dataset and understand the types of variables and columns

mydata <- read.csv("Raw data.csv")

colnames(mydata)

## [1] "DateTimeOfCall"             "HourlyNotionalCost"        
## [3] "IncidentNotionalCost"       "AnimalGroupParent"         
## [5] "PropertyCategory"           "SpecialServiceTypeCategory"
## [7] "SpecialServiceType"         "Area"

We begin by taking a glimpse of our data

glimpse(mydata)

## Rows: 8,939
## Columns: 8
## $ DateTimeOfCall             <chr> "01/01/2009 03:01", "01/01/2009 08:51", "04…
## $ HourlyNotionalCost         <int> 255, 255, 255, 255, 255, 255, 255, 255, 255…
## $ IncidentNotionalCost       <chr> "510", "255", "255", "255", "255", "255", "…
## $ AnimalGroupParent          <chr> "Dog", "Fox", "Dog", "Horse", "Ra^&it", "Un…
## $ PropertyCategory           <chr> "Dwelling", "Outdoor Structure", "Outdoor S…
## $ SpecialServiceTypeCategory <chr> "Other animal assistance", "Other animal as…
## $ SpecialServiceType         <chr> "Animal assistance involving livestock - Ot…
## $ Area                       <chr> "Ranelagh", "Ranelagh", "Sutton", "Smithfie…

Data cleaning, exploration and visualization

We spot some misspelled column names and proceed to clean them We proceed to handle the various columns individually

mydata <- mydata %>% 
  rename(HourlyNationalCost = HourlyNotionalCost) %>%
  rename(IncidentNationalCost = IncidentNotionalCost)

Deal with NAs

sum(is.na(mydata$HourlyNationalCost))

## [1] 0

mydata$IncidentNationalCost <- as.integer(mydata$IncidentNationalCost)

## Warning: NAs introduced by coercion

sum(is.na(mydata$IncidentNationalCost))

## [1] 63

mydata1 <- na.omit(mydata)

sum(is.na(mydata1$HourlyNationalCost | mydata1$IncidentNationalCost))

## [1] 0

We summarize our data to understand the the values

summary(mydata1)

##  DateTimeOfCall     HourlyNationalCost IncidentNationalCost AnimalGroupParent 
##  Length:8876        Min.   :255        Min.   :   0         Length:8876       
##  Class :character   1st Qu.:260        1st Qu.: 290         Class :character  
##  Mode  :character   Median :326        Median : 328         Mode  :character  
##                     Mean   :310        Mean   : 364                           
##                     3rd Qu.:346        3rd Qu.: 352                           
##                     Max.   :364        Max.   :3912                           
##  PropertyCategory   SpecialServiceTypeCategory SpecialServiceType
##  Length:8876        Length:8876                Length:8876       
##  Class :character   Class :character           Class :character  
##  Mode  :character   Mode  :character           Mode  :character  
##                                                                  
##                                                                  
##                                                                  
##      Area          
##  Length:8876       
##  Class :character  
##  Mode  :character  
##                    
##                    
##

Let us explore the Special service type category table

unique(mydata1$SpecialServiceTypeCategory)

## [1] "Other animal assistance"         "Animal rescue from below ground"
## [3] "Animal rescue from water"        "Animal rescue from height"

sort(table(mydata1$SpecialServiceTypeCategory)) %>% view()

mydata1 %>%
ggplot(aes(x= IncidentNationalCost, y = SpecialServiceTypeCategory, color = SpecialServiceTypeCategory )) + 
  geom_jitter(size = 2, alpha = 0.6, width = 0.2, height = 0.2) +
  xlab("Incident cost") +
  ylab("") +
  ggtitle("Special service category showing distribution of costs") +
  theme(axis.text.x = element_text( vjust = 0.5, hjust = 1)) +
  scale_color_brewer(palette = "Paired") +
  guides(color = "none") + theme_bw()

Lets explore the Special service type

unique(mydata1$SpecialServiceType)

##  [1] "Animal assistance involving livestock - Other action"      
##  [2] "Animal rescue from below ground - Domestic pet"            
##  [3] "Animal rescue from water - Farm animal"                    
##  [4] "Animal rescue from water - Domestic pet"                   
##  [5] "Wild animal rescue from height"                            
##  [6] "Animal rescue from height - Domestic pet"                  
##  [7] "Animal rescue from water - Bird"                           
##  [8] "Animal rescue from height - Bird"                          
##  [9] "Wild animal rescue from water or mud"                      
## [10] "Animal assistance - Lift heavy livestock animal"           
## [11] "Wild animal rescue from below ground"                      
## [12] "Animal rescue from below ground - Bird"                    
## [13] "Animal rescue from height - Farm animal"                   
## [14] "Animal rescue from below ground - Farm animal"             
## [15] "Assist trapped domestic animal"                            
## [16] "Animal harm involving domestic animal"                     
## [17] "Animal assistance involving wild animal - Other action"    
## [18] "Animal assistance involving domestic animal - Other action"
## [19] "Animal harm involving wild animal"                         
## [20] "Assist  trapped livestock animal"                          
## [21] "Assist trapped wild animal"                                
## [22] "Animal assistance - Lift heavy wild animal"                
## [23] "Animal assistance - Lift heavy domestic animal"            
## [24] "Animal harm involving livestock"

Lets make a table to see the frequency of these values

view(sort(table(mydata1$SpecialServiceType), decreasing = TRUE))

Now we focus on the Property category column

unique(mydata1$PropertyCategory)

## [1] "Dwelling"          "Outdoor Structure" "Non Residential"  
## [4] "Outdoor"           "Road Vehicle"      "Other Residential"
## [7] "Dwe^&ing"          "Dwe!!ing"          "Boat"

clean the Property category column

mydata1$PropertyCategory_clean <- ifelse(mydata1$PropertyCategory == "Dwe!!ing", "Dwelling",
                                  ifelse(mydata1$PropertyCategory == "Dwe^&ing", "Dwelling", mydata1$PropertyCategory))

mydata1 <- mydata1 %>%
  mutate(PropertyCategory_clean = tolower(PropertyCategory_clean))

unique(mydata1$PropertyCategory_clean)

## [1] "dwelling"          "outdoor structure" "non residential"  
## [4] "outdoor"           "road vehicle"      "other residential"
## [7] "boat"

Visualize the property category table, check which property has what frequency count, we can train more staff to handle calls on this type of property category.

mydata1 %>% 
  group_by(PropertyCategory_clean) %>% 
  summarize(frequency = n()) %>%
  mutate(PropertyCategory_clean = fct_reorder(PropertyCategory_clean, frequency)) %>%
  ggplot(aes(x = PropertyCategory_clean, y = frequency)) +
  geom_bar(stat = "identity", fill = "blue") +
  theme_minimal() +
  xlab("Property type") + ylab("Frequency") + scale_fill_brewer(palette = "Paired") + 
  labs(title = "Frequency of the property category",
       subtitle= "We see most counts for the dwelling animals")+ guides(fill = "none")

Next we want to see the average national cost for each property category

mydata1 %>%
  group_by(PropertyCategory_clean) %>%
  summarize(avg_cost = mean(IncidentNationalCost)) %>%
  mutate(PropertyCategory_clean = fct_reorder(PropertyCategory_clean, avg_cost )) %>%
  ggplot(aes(x = PropertyCategory_clean, y = avg_cost)) +
  geom_bar(stat = "identity", fill = "orange") +
  geom_text(aes(label = scales::percent(avg_cost), y = avg_cost), stat = "identity", vjust = -0.5) +
  theme_minimal() +
  xlab("Property type") + ylab("Average cost")

The cost of rescuing an animal from a boat incident is more expensive as expected, due to the extra expertise the worker may need to maneuver, the Property type with more frequency is the Dwelling, so we may have to focus more resources there.

Now we check the Animal group parent column to confirm what animals have the most count and cost

unique(mydata1$AnimalGroupParent)

##  [1] "Dog"                                                    
##  [2] "Fox"                                                    
##  [3] "Horse"                                                  
##  [4] "Ra^&it"                                                 
##  [5] "Unknown - Heavy Livestock Animal"                       
##  [6] "Squirrel"                                               
##  [7] "Cat"                                                    
##  [8] "Bird"                                                   
##  [9] "D+g"                                                    
## [10] "Unknown - Domestic Animal Or Pet"                       
## [11] "Ca&"                                                    
## [12] "Sheep"                                                  
## [13] "Ca$"                                                    
## [14] "Bir*"                                                   
## [15] "Deer"                                                   
## [16] "Unknown - Wild Animal"                                  
## [17] "Snake"                                                  
## [18] "Lizard"                                                 
## [19] "Bi^&"                                                   
## [20] "Hedgehog"                                               
## [21] "cat"                                                    
## [22] "Hamster"                                                
## [23] "Lamb"                                                   
## [24] "Rabbit"                                                 
## [25] "Fish"                                                   
## [26] "Bull"                                                   
## [27] "Cow"                                                    
## [28] "Ferret"                                                 
## [29] "Budgie"                                                 
## [30] "Unknown - Animal rescue from water - Farm animal"       
## [31] "Pigeon"                                                 
## [32] "Goat"                                                   
## [33] "Tortoise"                                               
## [34] "Unknown - Animal rescue from below ground - Farm animal"

clean the Animal group parent column

mydata1$AnimalGroupParent_clean <- ifelse(mydata1$AnimalGroupParent == "Ra^&it","Rabbit",
                                   ifelse(mydata1$AnimalGroupParent =="D+g", "Dog",
                                          ifelse(mydata1$AnimalGroupParent == "Ca&", "Cat",
                                                 ifelse(mydata1$AnimalGroupParent == "Ca$", "Cat",
                                                        ifelse(mydata1$AnimalGroupParent == "Bir*", "Bird",
                                                               ifelse(mydata1$AnimalGroupParent == "Bi^&", "Bird", mydata1$AnimalGroupParent))))))

mydata1 <- mydata1 %>%
  mutate(AnimalGroupParent_clean = tolower(AnimalGroupParent_clean))

unique(mydata1$AnimalGroupParent_clean)

##  [1] "dog"                                                    
##  [2] "fox"                                                    
##  [3] "horse"                                                  
##  [4] "rabbit"                                                 
##  [5] "unknown - heavy livestock animal"                       
##  [6] "squirrel"                                               
##  [7] "cat"                                                    
##  [8] "bird"                                                   
##  [9] "unknown - domestic animal or pet"                       
## [10] "sheep"                                                  
## [11] "deer"                                                   
## [12] "unknown - wild animal"                                  
## [13] "snake"                                                  
## [14] "lizard"                                                 
## [15] "hedgehog"                                               
## [16] "hamster"                                                
## [17] "lamb"                                                   
## [18] "fish"                                                   
## [19] "bull"                                                   
## [20] "cow"                                                    
## [21] "ferret"                                                 
## [22] "budgie"                                                 
## [23] "unknown - animal rescue from water - farm animal"       
## [24] "pigeon"                                                 
## [25] "goat"                                                   
## [26] "tortoise"                                               
## [27] "unknown - animal rescue from below ground - farm animal"

Lets see the frequency of these values

AnimalGroupParent_freq <- table(mydata1$AnimalGroupParent_clean)

c(max = round(max(AnimalGroupParent_freq)), avg = round(mean(AnimalGroupParent_freq)), min = round(min(AnimalGroupParent_freq)))

##  max  avg  min 
## 4342  329    1

view(sort(table(mydata1$AnimalGroupParent_clean), decreasing = TRUE))

mydata1$short_label <- str_sub(mydata1$AnimalGroupParent_clean,1,10)

We make a plot of the frequency of Animal group, and further group these by the Property category.

ggplot(mydata1, aes(x = short_label, fill = PropertyCategory_clean)) + 
  geom_bar(stat = "count") +
  xlab("Animals group parent cartegory") +
  ylab("Frequency") +
  ggtitle("This plot shows that 3 main animals have much more frequency")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

We create a plot to see the incident cost distribution for the various animal category

ggplot(mydata1, aes(x = short_label, y = IncidentNationalCost)) + 
  geom_line(size = 1, color = "blue") +
  geom_point(color = "orange", size = 5,  alpha = 0.3 ) +
  xlab("Animals group parent cartegory") +
  ylab("Incident cost") + guides (fill = "none") +
  ggtitle("Plot of animal groups, showing cats, horses and dogs with highest costs")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

The obvious animal groups are the bird, cat and the dog. we shall filter for these animals for a later analysis. cats get the highest frequency of calls and is amog the most expensive to rescue

Now we check the area column

unique(mydata$Area)

##  [1] "Ranelagh"      "Sutton"        "Smithfield"    "Perrystown"   
##  [5] "Crumlin"       "Milltown"      "Belfield"      "Rathfarnham"  
##  [9] "Terenure"      "Rathmines"     "Templeogue"    "Dundrum"      
## [13] "Walkinstown"   "Leopardstown"  "Stepaside"     "Tallaght"     
## [17] "Clontarf"      "Harolds Cross" "Donnybrook"    "Killester"    
## [21] "Ballymun"      "Ballyfermot"   "Drumcondra"    "Clonsilla"    
## [25] "Dalkey"        "Cabra"         "Glasnevin"     "Santry"       
## [29] "Portobello"    "Killiney"      "Blackrock"     "Dun laoighre" 
## [33] "Temple Bar"    "Rathm&^es"     "Drumco\"!ra"   "Ta!!aght"     
## [37] "Donnybr$*k"    "Clondalkin"    "Finglas"       "Castleknock"  
## [41] "Tandridge"     "SUTTON"        ""

clean the Area column

mydata1$Area_clean <- ifelse(mydata1$Area == "Ta!!aght", "Tallaght",
                      ifelse(mydata1$Area =="Donnybr$*k", "Donnybrook",
                             ifelse(mydata1$Area == "Rathm&^es","Rathmines",
                                    ifelse(mydata1$Area == "Drumco\"!ra","Drumcondra",
                                           ifelse(mydata1$Area == "", "Unknown", mydata1$Area)))))

mydata1 <- mydata1 %>%
  mutate(Area_clean = tolower(Area_clean))

unique(mydata1$Area_clean)

##  [1] "ranelagh"      "sutton"        "smithfield"    "perrystown"   
##  [5] "crumlin"       "milltown"      "belfield"      "rathfarnham"  
##  [9] "terenure"      "rathmines"     "templeogue"    "dundrum"      
## [13] "walkinstown"   "leopardstown"  "stepaside"     "tallaght"     
## [17] "clontarf"      "harolds cross" "donnybrook"    "killester"    
## [21] "ballymun"      "ballyfermot"   "drumcondra"    "clonsilla"    
## [25] "dalkey"        "cabra"         "glasnevin"     "santry"       
## [29] "portobello"    "killiney"      "blackrock"     "dun laoighre" 
## [33] "temple bar"    "clondalkin"    "finglas"       "castleknock"  
## [37] "tandridge"     "unknown"

We explore the Area column further

view(sort(table(mydata1$Area_clean), decreasing = TRUE))

Area_freq <- table(mydata1$Area_clean)

c(max = round(max(Area_freq)), avg = round(mean(Area_freq)), min = round(min(Area_freq)))

## max avg min 
## 395 234   1

We create a lollipop plot to broadly view the average incident costs for the areas in our column

mydata1 %>%
  group_by(Area_clean) %>%
  summarize(avg_cost = mean(IncidentNationalCost)) %>%
  mutate(Area_clean = fct_reorder(Area_clean, avg_cost)) %>%
  ggplot(aes(x = Area_clean, y = avg_cost)) +
  geom_segment(aes(x = Area_clean, xend = Area_clean, y = mean(mydata1$IncidentNationalCost), yend = avg_cost), color = "Orange", size = 2) +
  geom_point(size = 5, color = "blue", alpha = 0.6) +
  geom_hline(yintercept = mean(mydata1$IncidentNationalCost), color = "grey", linetype = "solid") + theme(axis.text.x = element_text(angle = 90)) +
  xlab("") +
  ylab("Average incident cost")+
   labs(title = "Lollipop chart showing the average costs plotted against the Area")

Creating a frequency plot to see the Area column

ggplot(mydata1, aes(x = Area_clean, fill = "mean_cost")) + 
  geom_bar(stat = "count", fill = "orange") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  xlab("Area") +
  ylab("Frequency") +
  ggtitle("Barplot of Area to see frequency of occurrence")

lets check the top 3 incident call areas

Area_freq <- table(mydata1$Area_clean)

view(top_Areas <- names(sort(Area_freq, decreasing = TRUE))[1:3])

top_Areas_filtered <- subset(mydata1, Area_clean %in% top_Areas)

We identify a particular Area, Sutton and make a visualization to view the Animals and costs, faceted by the Property types they are rescued from

mydata1 %>% 
  filter(Area_clean == "sutton") %>%
  ggplot(aes(x = str_trunc(AnimalGroupParent_clean, 25), y = IncidentNationalCost, group = PropertyCategory_clean, color = AnimalGroupParent_clean, size = 2)) +
  geom_point(alpha = 0.6) +
  xlab("Animals") +
  ylab("Cost") +
  ggtitle("Plot of cost, for different animal types, In Sutton, grouped by property type") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+
  facet_grid(~ PropertyCategory_clean)+
  scale_color_brewer(palette = "Paired") +
  scale_size(range = c(3, 8)) + 
  guides(color = "none", size = "none")

We can repeat the above code for any particular area of interest, so as to understand where we need to allocate more resources

Now we clean and explore the trends shown by our date column

typeof(mydata1$DateTimeOfCall)

## [1] "character"

str(mydata1$DateTimeOfCall)

##  chr [1:8876] "01/01/2009 03:01" "01/01/2009 08:51" "04/01/2009 10:07" ...

mydata1$date <- dmy_hm(mydata1$DateTimeOfCall)

Create a weekday column

mydata1$wday <- weekdays(mydata1$date)

Filter the data to only include the values “cat”, “dog”, and “bird”

filtered_data <- mydata1 %>% filter(AnimalGroupParent_clean %in% c("cat", "dog", "bird"))

Now Calculate the mean cost for each day of the week

mean_cost_by_day_and_animal <- filtered_data %>% 
  group_by(wday, AnimalGroupParent_clean, SpecialServiceTypeCategory) %>% 
  summarise(mean_cost = mean(IncidentNationalCost))

## `summarise()` has grouped output by 'wday', 'AnimalGroupParent_clean'. You can
## override using the `.groups` argument.

Create the plot for dog, cat, and bird, identifying mean costs and grouping by week days

ggplot(mean_cost_by_day_and_animal, aes(x = wday, y = mean_cost, fill = SpecialServiceTypeCategory)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  facet_wrap(~ AnimalGroupParent_clean) +
  ylab("Mean Cost") +
  xlab("Day of the Week") +
  scale_fill_discrete(name = "SpecialServiceTypeCategory")

Now we create a new column called months of the year

mydata1$month <- month(mydata1$date, label = TRUE)

We group our columns of interest

mean_values <- mydata1 %>% 
  group_by(month, SpecialServiceTypeCategory) %>% 
  summarise(SpecialServiceTypeCategory, IncidentNationalCost, PropertyCategory_clean)

## `summarise()` has grouped output by 'month', 'SpecialServiceTypeCategory'. You
## can override using the `.groups` argument.

Now we make a plot to see the costs per month grouped into Property category

ggplot(mean_values, aes(x = month, y = IncidentNationalCost, color = PropertyCategory_clean)) +
  geom_line(size = 4) +
  ylab("Mean Cost") +
  xlab("Month")+
  guides(color = guide_legend(title = "Property"))+
  ggtitle("Average cost per month, grouped by property type")

Now we further group the plot by separating it into service category

ggplot(mean_values, aes(x = month, y = IncidentNationalCost)) +
  geom_line(size = 3, color = "blue") +
  ylab("Mean Cost") +
  xlab("Month") +
  facet_wrap(~ SpecialServiceTypeCategory)

We highlight the “Other Animal assistance column and explore the top animals therein

filtered_data3 <- mydata1 %>% filter(SpecialServiceTypeCategory %in% c("Other animal assistance"))%>%
  filter(AnimalGroupParent_clean %in% c("dog", "cat", "bird"))

mean_cost_other_animals <- filtered_data3 %>% 
  group_by(AnimalGroupParent_clean) %>% 
  summarise(AnimalGroupParent_clean, IncidentNationalCost, PropertyCategory_clean)

## `summarise()` has grouped output by 'AnimalGroupParent_clean'. You can override
## using the `.groups` argument.

We create a plot for this particular column to see the costs per animal in dog, cat and bird.

ggplot(mean_cost_other_animals, aes(x = AnimalGroupParent_clean, y = IncidentNationalCost, 
                                    color = PropertyCategory_clean))+ 
  geom_line(size = 7, alpha = 0.7) +
  ylab("cost") + 
  xlab("Animal")+
  guides(color = guide_legend(title = "Property group"))+
  theme_bw()+
  ggtitle("Animals within the(other Animal assistance) rendered, grouped by property type")

colnames(mydata1)

##  [1] "DateTimeOfCall"             "HourlyNationalCost"        
##  [3] "IncidentNationalCost"       "AnimalGroupParent"         
##  [5] "PropertyCategory"           "SpecialServiceTypeCategory"
##  [7] "SpecialServiceType"         "Area"                      
##  [9] "PropertyCategory_clean"     "AnimalGroupParent_clean"   
## [11] "short_label"                "Area_clean"                
## [13] "date"                       "wday"                      
## [15] "month"

analysis project 2

Chinemerem

2023-1-22

Installing the required packages and libraries

Call in our dataset and understand the types of variables and columns

Data cleaning, exploration and visualization