Central Park Squirrel Census, 2018

Karol Orozco

The Task

Using the dataset from below, present three ggplot2 plots that attempt to answer questions about the data that you think are interesting. The focus of this assignment is exploration and experimentation. Concentrate on answering each question in a variety of ways and exploring the functionality of ggplot2.

The Data

data("squirrels")
head(squirrels)
## # A tibble: 6 × 35
##    long   lat unique_sq…¹ hectare shift date       hecta…² age   prima…³ highl…⁴
##   <dbl> <dbl> <chr>       <chr>   <chr> <date>       <dbl> <chr> <chr>   <chr>  
## 1 -74.0  40.8 13A-PM-101… 13A     PM    2018-10-14       4 <NA>  Gray    <NA>   
## 2 -74.0  40.8 15F-PM-101… 15F     PM    2018-10-10       6 Adult Gray    <NA>   
## 3 -74.0  40.8 19C-PM-101… 19C     PM    2018-10-18       2 Adult Gray    Cinnam…
## 4 -74.0  40.8 21B-AM-101… 21B     AM    2018-10-19       4 <NA>  <NA>    <NA>   
## 5 -74.0  40.8 23A-AM-101… 23A     AM    2018-10-18       2 Juve… Black   <NA>   
## 6 -74.0  40.8 38H-PM-101… 38H     PM    2018-10-12       1 Adult Gray    <NA>   
## # … with 25 more variables: combination_of_primary_and_highlight_color <chr>,
## #   color_notes <chr>, location <chr>, above_ground_sighter_measurement <chr>,
## #   specific_location <chr>, running <lgl>, chasing <lgl>, climbing <lgl>,
## #   eating <lgl>, foraging <lgl>, other_activities <chr>, kuks <lgl>,
## #   quaas <lgl>, moans <lgl>, tail_flags <lgl>, tail_twitches <lgl>,
## #   approaches <lgl>, indifferent <lgl>, runs_from <lgl>,
## #   other_interactions <chr>, zip_codes <dbl>, community_districts <dbl>, …

Is there any missing data?

vis_miss(squirrels)

Cleaning the data

dpc <- c('color_notes','zip_codes','community_districts', 'borough_boundaries','city_council_districts', 'police_precincts')


sq <- squirrels %>%
  select (-dpc) %>%
  filter(!is.na(primary_fur_color)) %>%
  filter(!is.na(highlight_fur_color)) %>%
  filter(!is.na(combination_of_primary_and_highlight_color)) %>%
  filter(!is.na(age))%>%
  filter(!is.na(location))%>%
  filter(age != "?")


head(sq)
## # A tibble: 6 × 29
##    long   lat unique_sq…¹ hectare shift date       hecta…² age   prima…³ highl…⁴
##   <dbl> <dbl> <chr>       <chr>   <chr> <date>       <dbl> <chr> <chr>   <chr>  
## 1 -74.0  40.8 19C-PM-101… 19C     PM    2018-10-18       2 Adult Gray    Cinnam…
## 2 -74.0  40.8 1I-PM-1012… 01I     PM    2018-10-12       4 Juve… Cinnam… White  
## 3 -74.0  40.8 3F-PM-1013… 03F     PM    2018-10-13       3 Adult Cinnam… Gray   
## 4 -74.0  40.8 9I-AM-1014… 09I     AM    2018-10-14       8 Adult Gray    Cinnam…
## 5 -74.0  40.8 2F-PM-1014… 02F     PM    2018-10-14      10 Adult Gray    White  
## 6 -74.0  40.8 34A-PM-101… 34A     PM    2018-10-13       4 Adult Gray    Cinnam…
## # … with 19 more variables: combination_of_primary_and_highlight_color <chr>,
## #   location <chr>, above_ground_sighter_measurement <chr>,
## #   specific_location <chr>, running <lgl>, chasing <lgl>, climbing <lgl>,
## #   eating <lgl>, foraging <lgl>, other_activities <chr>, kuks <lgl>,
## #   quaas <lgl>, moans <lgl>, tail_flags <lgl>, tail_twitches <lgl>,
## #   approaches <lgl>, indifferent <lgl>, runs_from <lgl>,
## #   other_interactions <chr>, and abbreviated variable names …

Plotting

ggplot(sq, aes(long,lat, shape = factor(age))) +
    
  geom_point(aes(color = factor(age)), alpha = 0.6, size= 1.5)+

    
    labs(
      title= "Where do squirrels like to hang out?",
    caption = "The graph tells us that there is a large concentration of squirrels on the ground plane at all hours\nof the day. I was hoping to find more of these small mammals above the ground during the day.")+

    
    facet_grid(~shift~location)+

    theme_bw()+

    theme(plot.title = element_text(size = 12, face = "bold", 
                                  hjust = 0.5, vjust = 0.5),
      strip.background = element_rect(color = "black", 
                                      fill= "white", 
                                      size= 0.5, 
                                      linetype= "dotted"),
      strip.text.x = element_text(face = "bold.italic"),
      panel.background = element_blank(),
      panel.spacing = unit(3, "lines"),
      legend.position = "bottom",
      legend.title = element_blank(),
      legend.text = element_text(face= "bold.italic", size= 10),
      plot.caption.position = "panel",
      plot.caption = element_text(hjust = 0, size= 10)
    
    )

What the squirrels do during the day?

sq_act <- sq %>%
  select(c(shift, "running", "chasing", "climbing", "eating", "foraging", "other_activities")) %>%
  mutate(running =as.integer(running),
         chasing= as.integer(chasing),
         climbing =as.integer(climbing),
         eating= as.integer(eating),
         foraging= as.integer(foraging),
         others = as.integer(other_activities))%>%
  select(c(shift, running, chasing, climbing, eating, foraging, others))%>%
  pivot_longer(-(shift), names_to = "Activities", values_to= "Value")%>%
    filter(Value== TRUE)
ggplot(sq_act, aes(x= Activities, fill =shift))+
  geom_bar(position = "dodge")+ 
    scale_fill_manual(values= c("#56B4E9","#000000")) +

  labs( y= "Number of Squirrels",
        x= " ",
        title= "What the squirrels do during the day?",
      caption = "The graph clearly shows us that the activity that stands out the most is foraging at night.\nThe other activities are distributed more evenly throughout the day.") +
  
  theme_classic()+
  
  theme(
        axis.line=element_blank(),
        text = element_text(size = 11),
        plot.title = element_text(size = 12, face = "bold", 
                                  hjust = 0.5, vjust = 0.5),
        legend.title = element_blank(),
        plot.caption.position = "panel",
        plot.caption = element_text(hjust = 0, size= 10))

sq %>% 
  filter(age == "Juvenile") %>%
  
  ggplot(aes(lat, long, color = shift))+
  labs(
    title= "Moves of the Juvenile Squirreles During the Day",
    caption = "How we can see in the graph, they are active all over the park, but during the AM shift\nthey tend to visit less the coordenates between -73.96 and 40.78"
  )+
  
  geom_line(aes(x= lat, y= long, linetype= shift))+ 
  geom_point()+
  geom_smooth()+
  scale_linetype_manual(values=c("twodash", "dotted"))+
  scale_color_manual(values=c("#56B4E9","#000000"))+
  scale_size_manual(values=c(1, 1.5))+
  

  theme_minimal()+
      theme(legend.position="top",
             plot.caption.position = "panel",
        plot.caption = element_text(hjust = 0, size= 10),
        plot.title = element_text(size = 12, face = "bold", 
                                  hjust = 0.5, vjust = 0.5)
        )

ggsave("KarolO.pdf", width = 8, height = 4)