The Task
Using the dataset from below, present three ggplot2 plots that attempt to answer questions about the data that you think are interesting. The focus of this assignment is exploration and experimentation. Concentrate on answering each question in a variety of ways and exploring the functionality of ggplot2.
The Data
data("squirrels")
head(squirrels)## # A tibble: 6 × 35
## long lat unique_sq…¹ hectare shift date hecta…² age prima…³ highl…⁴
## <dbl> <dbl> <chr> <chr> <chr> <date> <dbl> <chr> <chr> <chr>
## 1 -74.0 40.8 13A-PM-101… 13A PM 2018-10-14 4 <NA> Gray <NA>
## 2 -74.0 40.8 15F-PM-101… 15F PM 2018-10-10 6 Adult Gray <NA>
## 3 -74.0 40.8 19C-PM-101… 19C PM 2018-10-18 2 Adult Gray Cinnam…
## 4 -74.0 40.8 21B-AM-101… 21B AM 2018-10-19 4 <NA> <NA> <NA>
## 5 -74.0 40.8 23A-AM-101… 23A AM 2018-10-18 2 Juve… Black <NA>
## 6 -74.0 40.8 38H-PM-101… 38H PM 2018-10-12 1 Adult Gray <NA>
## # … with 25 more variables: combination_of_primary_and_highlight_color <chr>,
## # color_notes <chr>, location <chr>, above_ground_sighter_measurement <chr>,
## # specific_location <chr>, running <lgl>, chasing <lgl>, climbing <lgl>,
## # eating <lgl>, foraging <lgl>, other_activities <chr>, kuks <lgl>,
## # quaas <lgl>, moans <lgl>, tail_flags <lgl>, tail_twitches <lgl>,
## # approaches <lgl>, indifferent <lgl>, runs_from <lgl>,
## # other_interactions <chr>, zip_codes <dbl>, community_districts <dbl>, …
Is there any missing data?
vis_miss(squirrels)Cleaning the data
dpc <- c('color_notes','zip_codes','community_districts', 'borough_boundaries','city_council_districts', 'police_precincts')
sq <- squirrels %>%
select (-dpc) %>%
filter(!is.na(primary_fur_color)) %>%
filter(!is.na(highlight_fur_color)) %>%
filter(!is.na(combination_of_primary_and_highlight_color)) %>%
filter(!is.na(age))%>%
filter(!is.na(location))%>%
filter(age != "?")
head(sq)## # A tibble: 6 × 29
## long lat unique_sq…¹ hectare shift date hecta…² age prima…³ highl…⁴
## <dbl> <dbl> <chr> <chr> <chr> <date> <dbl> <chr> <chr> <chr>
## 1 -74.0 40.8 19C-PM-101… 19C PM 2018-10-18 2 Adult Gray Cinnam…
## 2 -74.0 40.8 1I-PM-1012… 01I PM 2018-10-12 4 Juve… Cinnam… White
## 3 -74.0 40.8 3F-PM-1013… 03F PM 2018-10-13 3 Adult Cinnam… Gray
## 4 -74.0 40.8 9I-AM-1014… 09I AM 2018-10-14 8 Adult Gray Cinnam…
## 5 -74.0 40.8 2F-PM-1014… 02F PM 2018-10-14 10 Adult Gray White
## 6 -74.0 40.8 34A-PM-101… 34A PM 2018-10-13 4 Adult Gray Cinnam…
## # … with 19 more variables: combination_of_primary_and_highlight_color <chr>,
## # location <chr>, above_ground_sighter_measurement <chr>,
## # specific_location <chr>, running <lgl>, chasing <lgl>, climbing <lgl>,
## # eating <lgl>, foraging <lgl>, other_activities <chr>, kuks <lgl>,
## # quaas <lgl>, moans <lgl>, tail_flags <lgl>, tail_twitches <lgl>,
## # approaches <lgl>, indifferent <lgl>, runs_from <lgl>,
## # other_interactions <chr>, and abbreviated variable names …
Plotting
ggplot(sq, aes(long,lat, shape = factor(age))) +
geom_point(aes(color = factor(age)), alpha = 0.6, size= 1.5)+
labs(
title= "Where do squirrels like to hang out?",
caption = "The graph tells us that there is a large concentration of squirrels on the ground plane at all hours\nof the day. I was hoping to find more of these small mammals above the ground during the day.")+
facet_grid(~shift~location)+
theme_bw()+
theme(plot.title = element_text(size = 12, face = "bold",
hjust = 0.5, vjust = 0.5),
strip.background = element_rect(color = "black",
fill= "white",
size= 0.5,
linetype= "dotted"),
strip.text.x = element_text(face = "bold.italic"),
panel.background = element_blank(),
panel.spacing = unit(3, "lines"),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(face= "bold.italic", size= 10),
plot.caption.position = "panel",
plot.caption = element_text(hjust = 0, size= 10)
)What the squirrels do during the day?
sq_act <- sq %>%
select(c(shift, "running", "chasing", "climbing", "eating", "foraging", "other_activities")) %>%
mutate(running =as.integer(running),
chasing= as.integer(chasing),
climbing =as.integer(climbing),
eating= as.integer(eating),
foraging= as.integer(foraging),
others = as.integer(other_activities))%>%
select(c(shift, running, chasing, climbing, eating, foraging, others))%>%
pivot_longer(-(shift), names_to = "Activities", values_to= "Value")%>%
filter(Value== TRUE)ggplot(sq_act, aes(x= Activities, fill =shift))+
geom_bar(position = "dodge")+
scale_fill_manual(values= c("#56B4E9","#000000")) +
labs( y= "Number of Squirrels",
x= " ",
title= "What the squirrels do during the day?",
caption = "The graph clearly shows us that the activity that stands out the most is foraging at night.\nThe other activities are distributed more evenly throughout the day.") +
theme_classic()+
theme(
axis.line=element_blank(),
text = element_text(size = 11),
plot.title = element_text(size = 12, face = "bold",
hjust = 0.5, vjust = 0.5),
legend.title = element_blank(),
plot.caption.position = "panel",
plot.caption = element_text(hjust = 0, size= 10))sq %>%
filter(age == "Juvenile") %>%
ggplot(aes(lat, long, color = shift))+
labs(
title= "Moves of the Juvenile Squirreles During the Day",
caption = "How we can see in the graph, they are active all over the park, but during the AM shift\nthey tend to visit less the coordenates between -73.96 and 40.78"
)+
geom_line(aes(x= lat, y= long, linetype= shift))+
geom_point()+
geom_smooth()+
scale_linetype_manual(values=c("twodash", "dotted"))+
scale_color_manual(values=c("#56B4E9","#000000"))+
scale_size_manual(values=c(1, 1.5))+
theme_minimal()+
theme(legend.position="top",
plot.caption.position = "panel",
plot.caption = element_text(hjust = 0, size= 10),
plot.title = element_text(size = 12, face = "bold",
hjust = 0.5, vjust = 0.5)
)ggsave("KarolO.pdf", width = 8, height = 4)