###I had significant issues with knitting the document it had something to do with the the dataset, it had issues when I loaded, but I somehow forced it. #Default Libraries
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(naniar)
library(stringr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(skimr)
##
## Attaching package: 'skimr'
## The following object is masked from 'package:naniar':
##
## n_complete
library(knitr)
#Dataset - Squirrels because what could this be for.
#install.packages("remotes")
#remotes::install_github("mine-cetinkaya-rundel/nycsquirrels18")
library(nycsquirrels18)
#Learn about the data
skim(squirrels)
| Name | squirrels |
| Number of rows | 3023 |
| Number of columns | 35 |
| _______________________ | |
| Column type frequency: | |
| character | 13 |
| Date | 1 |
| logical | 13 |
| numeric | 8 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| unique_squirrel_id | 0 | 1.00 | 13 | 14 | 0 | 3018 | 0 |
| hectare | 0 | 1.00 | 3 | 3 | 0 | 339 | 0 |
| shift | 0 | 1.00 | 2 | 2 | 0 | 2 | 0 |
| age | 121 | 0.96 | 1 | 8 | 0 | 3 | 0 |
| primary_fur_color | 55 | 0.98 | 4 | 8 | 0 | 3 | 0 |
| highlight_fur_color | 1086 | 0.64 | 4 | 22 | 0 | 10 | 0 |
| combination_of_primary_and_highlight_color | 0 | 1.00 | 1 | 27 | 0 | 22 | 0 |
| color_notes | 2841 | 0.06 | 3 | 153 | 0 | 135 | 0 |
| location | 64 | 0.98 | 12 | 12 | 0 | 2 | 0 |
| above_ground_sighter_measurement | 114 | 0.96 | 1 | 5 | 0 | 41 | 0 |
| specific_location | 2547 | 0.16 | 4 | 102 | 0 | 304 | 0 |
| other_activities | 2586 | 0.14 | 4 | 132 | 0 | 307 | 0 |
| other_interactions | 2783 | 0.08 | 2 | 106 | 0 | 197 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| date | 0 | 1 | 2018-10-06 | 2018-10-20 | 2018-10-12 | 11 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| running | 0 | 1 | 0.24 | FAL: 2293, TRU: 730 |
| chasing | 0 | 1 | 0.09 | FAL: 2744, TRU: 279 |
| climbing | 0 | 1 | 0.22 | FAL: 2365, TRU: 658 |
| eating | 0 | 1 | 0.25 | FAL: 2263, TRU: 760 |
| foraging | 0 | 1 | 0.47 | FAL: 1588, TRU: 1435 |
| kuks | 0 | 1 | 0.03 | FAL: 2921, TRU: 102 |
| quaas | 0 | 1 | 0.02 | FAL: 2973, TRU: 50 |
| moans | 0 | 1 | 0.00 | FAL: 3020, TRU: 3 |
| tail_flags | 0 | 1 | 0.05 | FAL: 2868, TRU: 155 |
| tail_twitches | 0 | 1 | 0.14 | FAL: 2589, TRU: 434 |
| approaches | 0 | 1 | 0.06 | FAL: 2845, TRU: 178 |
| indifferent | 0 | 1 | 0.48 | FAL: 1569, TRU: 1454 |
| runs_from | 0 | 1 | 0.22 | FAL: 2345, TRU: 678 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| long | 0 | 1 | -73.97 | 0.01 | -73.98 | -73.97 | -73.97 | -73.96 | -73.95 | ▅▇▅▆▂ |
| lat | 0 | 1 | 40.78 | 0.01 | 40.76 | 40.77 | 40.78 | 40.79 | 40.80 | ▇▇▃▅▆ |
| hectare_squirrel_number | 0 | 1 | 4.12 | 3.10 | 1.00 | 2.00 | 3.00 | 6.00 | 23.00 | ▇▂▁▁▁ |
| zip_codes | 3014 | 0 | 11828.22 | 995.98 | 10090.00 | 12081.00 | 12420.00 | 12423.00 | 12423.00 | ▂▁▁▁▇ |
| community_districts | 0 | 1 | 19.00 | 0.23 | 11.00 | 19.00 | 19.00 | 19.00 | 23.00 | ▁▁▁▇▁ |
| borough_boundaries | 0 | 1 | 4.00 | 0.00 | 4.00 | 4.00 | 4.00 | 4.00 | 4.00 | ▁▁▇▁▁ |
| city_council_districts | 0 | 1 | 19.07 | 1.35 | 19.00 | 19.00 | 19.00 | 19.00 | 51.00 | ▇▁▁▁▁ |
| police_precincts | 0 | 1 | 13.00 | 0.22 | 10.00 | 13.00 | 13.00 | 13.00 | 18.00 | ▁▇▁▁▁ |
#View the data
view(squirrels)
#Other ways to learn about the data
str(squirrels)
## tibble [3,023 x 35] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ long : num [1:3023] -74 -74 -74 -74 -74 ...
## $ lat : num [1:3023] 40.8 40.8 40.8 40.8 40.8 ...
## $ unique_squirrel_id : chr [1:3023] "13A-PM-1014-04" "15F-PM-1010-06" "19C-PM-1018-02" "21B-AM-1019-04" ...
## $ hectare : chr [1:3023] "13A" "15F" "19C" "21B" ...
## $ shift : chr [1:3023] "PM" "PM" "PM" "AM" ...
## $ date : Date[1:3023], format: "2018-10-14" "2018-10-10" ...
## $ hectare_squirrel_number : num [1:3023] 4 6 2 4 2 1 6 2 2 4 ...
## $ age : chr [1:3023] NA "Adult" "Adult" NA ...
## $ primary_fur_color : chr [1:3023] "Gray" "Gray" "Gray" NA ...
## $ highlight_fur_color : chr [1:3023] NA NA "Cinnamon" NA ...
## $ combination_of_primary_and_highlight_color: chr [1:3023] "Gray+" "Gray+" "Gray+Cinnamon" "+" ...
## $ color_notes : chr [1:3023] NA NA NA NA ...
## $ location : chr [1:3023] "Ground Plane" "Ground Plane" "Ground Plane" NA ...
## $ above_ground_sighter_measurement : chr [1:3023] "FALSE" "FALSE" "FALSE" NA ...
## $ specific_location : chr [1:3023] NA NA NA NA ...
## $ running : logi [1:3023] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ chasing : logi [1:3023] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ climbing : logi [1:3023] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ eating : logi [1:3023] TRUE FALSE FALSE FALSE TRUE FALSE ...
## $ foraging : logi [1:3023] FALSE TRUE TRUE FALSE FALSE TRUE ...
## $ other_activities : chr [1:3023] NA NA NA NA ...
## $ kuks : logi [1:3023] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ quaas : logi [1:3023] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ moans : logi [1:3023] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ tail_flags : logi [1:3023] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ tail_twitches : logi [1:3023] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ approaches : logi [1:3023] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ indifferent : logi [1:3023] FALSE TRUE FALSE FALSE TRUE FALSE ...
## $ runs_from : logi [1:3023] FALSE FALSE FALSE FALSE FALSE TRUE ...
## $ other_interactions : chr [1:3023] NA NA NA NA ...
## $ zip_codes : num [1:3023] NA NA NA NA NA NA NA NA NA NA ...
## $ community_districts : num [1:3023] 19 19 19 19 19 19 19 19 19 19 ...
## $ borough_boundaries : num [1:3023] 4 4 4 4 4 4 4 4 4 4 ...
## $ city_council_districts : num [1:3023] 19 19 19 19 19 19 19 19 19 19 ...
## $ police_precincts : num [1:3023] 13 13 13 13 13 13 13 13 13 13 ...
summary(squirrels)
## long lat unique_squirrel_id hectare
## Min. :-73.98 Min. :40.76 Length:3023 Length:3023
## 1st Qu.:-73.97 1st Qu.:40.77 Class :character Class :character
## Median :-73.97 Median :40.78 Mode :character Mode :character
## Mean :-73.97 Mean :40.78
## 3rd Qu.:-73.96 3rd Qu.:40.79
## Max. :-73.95 Max. :40.80
##
## shift date hectare_squirrel_number
## Length:3023 Min. :2018-10-06 Min. : 1.000
## Class :character 1st Qu.:2018-10-08 1st Qu.: 2.000
## Mode :character Median :2018-10-12 Median : 3.000
## Mean :2018-10-11 Mean : 4.124
## 3rd Qu.:2018-10-14 3rd Qu.: 6.000
## Max. :2018-10-20 Max. :23.000
##
## age primary_fur_color highlight_fur_color
## Length:3023 Length:3023 Length:3023
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## combination_of_primary_and_highlight_color color_notes
## Length:3023 Length:3023
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## location above_ground_sighter_measurement specific_location
## Length:3023 Length:3023 Length:3023
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## running chasing climbing eating
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:2293 FALSE:2744 FALSE:2365 FALSE:2263
## TRUE :730 TRUE :279 TRUE :658 TRUE :760
##
##
##
##
## foraging other_activities kuks quaas
## Mode :logical Length:3023 Mode :logical Mode :logical
## FALSE:1588 Class :character FALSE:2921 FALSE:2973
## TRUE :1435 Mode :character TRUE :102 TRUE :50
##
##
##
##
## moans tail_flags tail_twitches approaches
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:3020 FALSE:2868 FALSE:2589 FALSE:2845
## TRUE :3 TRUE :155 TRUE :434 TRUE :178
##
##
##
##
## indifferent runs_from other_interactions zip_codes
## Mode :logical Mode :logical Length:3023 Min. :10090
## FALSE:1569 FALSE:2345 Class :character 1st Qu.:12081
## TRUE :1454 TRUE :678 Mode :character Median :12420
## Mean :11828
## 3rd Qu.:12423
## Max. :12423
## NA's :3014
## community_districts borough_boundaries city_council_districts police_precincts
## Min. :11 Min. :4 Min. :19.00 Min. :10
## 1st Qu.:19 1st Qu.:4 1st Qu.:19.00 1st Qu.:13
## Median :19 Median :4 Median :19.00 Median :13
## Mean :19 Mean :4 Mean :19.07 Mean :13
## 3rd Qu.:19 3rd Qu.:4 3rd Qu.:19.00 3rd Qu.:13
## Max. :23 Max. :4 Max. :51.00 Max. :18
##
#Initial observations: Lots of missing variables There is a unique squirrel ID so we can rule out duplicates later For color, the best bet seems to be to use the Primary color, the combination seems to be a concatenate which doesn’t populate if the information is missing. When trying to determine what hectare referred to, I found this documentation: https://mine-cetinkaya-rundel.github.io/nycsquirrels18/reference/squirrels.html Latitude and longitude are probably too specific for what I’m doing. Hectare will probably be better for location
The last five variables: zip_codes community_districts borough_boundaries city_council_districts police_precincts Do not yet strike me as useful variables for discerning variations on the squires.
Variables I think I am interested in learning more about: Location of the squires If certain colors appear more often in certain areas (Ruled this out after I realized that most were gray by quite a bit) Breakdown the frequency of noises and maybe then by location to see if squirrels in certain areas communicate in other ways Graph the hectare and then a unique count of squirrels in those areas <-do this first, maybe filter by a specific day to prevent overlap Would the count of ages in areas be interesting? What’s the typical squirrel reaction to humans, and is it denser in some area (maybe from people feeding them)
Things I need to do: Count hectare Separate hectare to get a grid
Variables I will probably use: 1. Hectare 2. Unique Squirrel ID 3. Date (Didn’t end up using) 4. Primary Fur Color 7. Approaches, Indifferent, runs_from (Human reaction variables)
#Learn some information
count(squirrels, hectare)
## # A tibble: 339 x 2
## hectare n
## <chr> <int>
## 1 01A 11
## 2 01B 27
## 3 01C 12
## 4 01D 16
## 5 01E 8
## 6 01F 8
## 7 01G 7
## 8 01H 4
## 9 01I 4
## 10 02A 15
## # ... with 329 more rows
Maybe I can break out the letters and numbers
squirrels %>%
separate(hectare, c("NS","EW"), sep=2)
## # A tibble: 3,023 x 36
## long lat unique_squirrel~ NS EW shift date hectare_squirre~
## <dbl> <dbl> <chr> <chr> <chr> <chr> <date> <dbl>
## 1 -74.0 40.8 13A-PM-1014-04 13 A PM 2018-10-14 4
## 2 -74.0 40.8 15F-PM-1010-06 15 F PM 2018-10-10 6
## 3 -74.0 40.8 19C-PM-1018-02 19 C PM 2018-10-18 2
## 4 -74.0 40.8 21B-AM-1019-04 21 B AM 2018-10-19 4
## 5 -74.0 40.8 23A-AM-1018-02 23 A AM 2018-10-18 2
## 6 -74.0 40.8 38H-PM-1012-01 38 H PM 2018-10-12 1
## 7 -74.0 40.8 3D-AM-1006-06 03 D AM 2018-10-06 6
## 8 -74.0 40.8 42C-AM-1007-02 42 C AM 2018-10-07 2
## 9 -74.0 40.8 9A-PM-1010-02 09 A PM 2018-10-10 2
## 10 -74.0 40.8 9B-AM-1010-04 09 B AM 2018-10-10 4
## # ... with 3,013 more rows, and 28 more variables: age <chr>,
## # primary_fur_color <chr>, highlight_fur_color <chr>,
## # combination_of_primary_and_highlight_color <chr>, color_notes <chr>,
## # location <chr>, above_ground_sighter_measurement <chr>,
## # specific_location <chr>, running <lgl>, chasing <lgl>, climbing <lgl>,
## # eating <lgl>, foraging <lgl>, other_activities <chr>, kuks <lgl>,
## # quaas <lgl>, moans <lgl>, tail_flags <lgl>, tail_twitches <lgl>,
## # approaches <lgl>, indifferent <lgl>, runs_from <lgl>,
## # other_interactions <chr>, zip_codes <dbl>, community_districts <dbl>,
## # borough_boundaries <dbl>, city_council_districts <dbl>,
## # police_precincts <dbl>
Awesome, that worked, going to save it as a variable and graph counts
###Let’s see their locations
ggplot(squirrel_location, aes(x=EW, y=NS))+
geom_point()
Okay, that showed me where they live, I kept EW on the x axis to mirror east to west, and NS is on the Y axis so it will look more like the park. I want to see how many are there though
ggplot(squirrel_location, aes(x=EW, y=NS, size = unique_squirrel_id))+
geom_point()
## Warning: Using size for a discrete variable is not advised.
I didn’t think that through, and R got mad at me, of course a discreet variable wouldn’t work, let’s find a way for code to work that through
squirrel_location %>%
group_by(EW, NS) %>%
count(unique_squirrel_id, name = "squirrel_count")
## # A tibble: 3,018 x 4
## # Groups: EW, NS [339]
## EW NS unique_squirrel_id squirrel_count
## <chr> <chr> <chr> <int>
## 1 A 01 1A-AM-1007-01 1
## 2 A 01 1A-AM-1007-02 1
## 3 A 01 1A-AM-1007-03 1
## 4 A 01 1A-AM-1007-04 1
## 5 A 01 1A-PM-1014-01 1
## 6 A 01 1A-PM-1014-02 1
## 7 A 01 1A-PM-1014-03 1
## 8 A 01 1A-PM-1014-04 1
## 9 A 01 1A-PM-1014-05 1
## 10 A 01 1A-PM-1014-06 1
## # ... with 3,008 more rows
Now try this as it’s own so I can map it
squirrel_location_and_count <- squirrel_location %>%
group_by(EW, NS) %>%
count(unique_squirrel_id, name = "squirrel_count")
ggplot(squirrel_location_and_count, aes(x=EW, y = NS, size = squirrel_count)) +
geom_point()
Still not what I want. I want a count in each area. This shows how many times a specific squirrel was in an area. While interesting, I want to know which area is the best for squirrel viewing.
squirrel_location_and_count2 <- squirrel_location_and_count %>%
mutate(sum(squirrel_count))
ggplot(squirrel_location_and_count2, aes(EW, NS, size = "sum(squirrel_count")) +
geom_point()
## Warning: Using size for a discrete variable is not advised.
###But it’s not a discreet variable, what have I done wrong
view(squirrel_location_and_count2)
###Let’s change the column name
squirrel_location_and_count3 <- squirrel_location_and_count2 %>%
rename("total_squirrels" = "sum(squirrel_count)")
###It says it’s an int, let’s try this again. Also, ask follow up: do I need to keep assigning as an object, or will just running it be enough. In the mean time, i am creating new objects each time so I can go back as needed.
ggplot(squirrel_location_and_count3, aes(EW, NS, size = total_squirrels))+
geom_point()
#It worked but it is super hard to read.
ggplot(squirrel_location_and_count3, aes(EW, NS, size = total_squirrels))+
geom_point(alpha = 0.3)
#Whew, better. Observations, for the best squirrel viewing experience, go to the middle of the park. Also, assuming the sections where there are no squirrels correlate to bodies of water in the park, simply from knowing the park has them.
#Let’s try a theme or two to clean it up a little
ggplot(squirrel_location_and_count3, aes(EW, NS, size = total_squirrels))+
geom_point(alpha = 0.6, color = "blue")+ #played with alpha as I played with ratio
ggtitle("Best Squirrel Viewing Spots in Central Park") +
xlab("East to West") + ylab("North to South")+
##labs(fill = "Total Squirrels") ##Why can I not get this to change?
##guides(fill=guide_legend(title=NULL)) ## tried to get rid of it, still didn't work. Revisit later
theme_minimal()+
coord_fixed(ratio = .55) #played with ratios here to make it look more like central park, and eliminate white space
#Playing around with color and gradient
ggplot(squirrel_location_and_count3, aes(EW, NS, size = total_squirrels))+
geom_point(color= "blue", aes(alpha = total_squirrels))+ #this is what I wanted to see, size and alpha density makes those pop more
ggtitle("Best Squirrel Viewing Spots in Central Park") +
xlab("East to West") + ylab("North to South")+
theme_minimal()+
coord_fixed(ratio = .55)+
scale_alpha()
#save
ggplot(squirrel_location_and_count3, aes(EW, NS, size = total_squirrels))+
geom_point(color= "blue", aes(alpha = total_squirrels))+
ggtitle("Best Squirrel Viewing Spots in Central Park") +
xlab("East to West") + ylab("North to South")+
theme_minimal()+
coord_fixed(ratio = .55)+
scale_alpha()
ggsave("Central Park Squirrel Viewing Areas.pdf", width = 8, height = 4)
#One graph done, 195 lines later.
###Graph 2 Let’s try a nice bar graph, binning the frequency of fur patterns
ggplot(squirrels, aes(primary_fur_color))+
geom_bar()
#Okay, it’s kind of boring and I don’t like the NAs. Let’s remove the nas
squirrels %>%
count(primary_fur_color)
## # A tibble: 4 x 2
## primary_fur_color n
## <chr> <int>
## 1 Black 103
## 2 Cinnamon 392
## 3 Gray 2473
## 4 <NA> 55
no_na_fur_color <- squirrels %>%
drop_na(primary_fur_color)
ggplot(no_na_fur_color, aes(primary_fur_color)) +
geom_bar()
#make it prettier
#Okay pause I was trying in vain to assign colors to the bar to collerate to the fur because I thought that would be clever, I tried a few things:
Attempt 1: Try it this way, thanks stack overflow
squirrel_colors <- c("#000000", "#D2691E", "#808080")
names(squirrel_colors) <- levels(factor(c(levels(squirrels$primary_fur_color))))
my_scale <- scale_fill_manual(name = "primary_fur_colors", values = squirrel_colors)
ggplot(no_na_fur_color, aes(primary_fur_color)) +
geom_bar()+
ggtitle("Primary Color of Central Park Squirrels") +
xlab("Fur Color") + ylab("Count")+
theme_minimal()+
my_scale
#It runs but nothing changes
Attempt 2:
ggplot(no_na_fur_color, aes(primary_fur_color)) +
geom_bar(scale_colour_manual = c("Black" = "#000000", "Cinnamon" = "#D2691E", "Gray" = "#808080"))+
ggtitle("Primary Color of Central Park Squirrels") +
xlab("Fur Color") + ylab("Count")+
theme_minimal()
## Warning: Ignoring unknown parameters: scale_colour_manual
Also runs and nothing changes. HOW DO I DO THIS.
#Attempt 3
ggplot(no_na_fur_color, aes(primary_fur_color)) +
geom_bar(scale_colour_manual = c("#000000","#D2691E","#808080"))+
ggtitle("Primary Color of Central Park Squirrels") +
xlab("Fur Color") + ylab("Count")+
theme_minimal()
## Warning: Ignoring unknown parameters: scale_colour_manual
#Attempt four and final attempt, this is getting silly
ggplot(no_na_fur_color, aes(primary_fur_color)) +
geom_bar()+
scale_colour_manual("primary_fur_color", values = c("#000000","#D2691E","#808080")) +
ggtitle("Primary Color of Central Park Squirrels") +
xlab("Fur Color") + ylab("Count")+
theme_minimal()
#Okay I tried. Finishing this graph without being cute about fur colors
ggplot(no_na_fur_color, aes(primary_fur_color)) +
geom_bar(color="black", fill = "black")+
ggtitle("Primary Color of Central Park Squirrels") +
xlab("Fur Color") + ylab("Count")+
theme_minimal()+
ggsave("Color of Central Park Squirrels.pdf", width = 8, height = 4)
###Graph 3
squirrel_behavior <- squirrels %>%
select("approaches", "indifferent", "runs_from")
summary(squirrel_behavior)
## approaches indifferent runs_from
## Mode :logical Mode :logical Mode :logical
## FALSE:2845 FALSE:1569 FALSE:2345
## TRUE :178 TRUE :1454 TRUE :678
squirrel_location_approach <- squirrels %>%
separate(hectare, c("NS","EW"), sep=2) %>%
filter(approaches == TRUE)
view(squirrel_location_approach)
I viewed it to make sure I just had 178 approaches
Okay, so I could see where they live using the code above or see if I am more likely to be approached in the morning or evening, let’s do that. Maybe can add where
ggplot(squirrel_location_approach, aes(shift)) +
geom_bar()
side note, I tried a geom_point at first but the boolean seemed to provide an issue, I tried as.numeric, but that didn’t work either, went with a bar graph.
Squirrels are a little more likely to approach at night. Not a meaningful stat. Let’s see if we can find where they are more likely to approach.
ggplot(squirrel_location_approach, aes(EW, NS, size = approaches))+
geom_point()
## Warning: Using size for a discrete variable is not advised.
Okay this just shows me where I can feed them at all, I want to know if one area is more likely to do so.
#This time learning what I did in the first graph!
squirrel_location_approach_count <- squirrel_location_approach %>%
group_by(EW, NS) %>%
count(approaches, name = "approach_count")
view(squirrel_location_approach_count) #just made sure it worked as intended
ggplot(squirrel_location_approach_count, aes(EW, NS, size = approach_count))+
geom_point()
It appears that I am a little bit more likely to be approached on the Southern End. Let’s mirror the same format as my first graph.
#cleaner version:
ggplot(squirrel_location_approach_count, aes(EW, NS, size = approach_count))+
geom_point(color= "black", aes(alpha = approach_count))+
ggtitle("Friendliest Squirrels in Central Park") +
xlab("East to West") + ylab("North to South")+
theme_minimal()+
coord_fixed(ratio = .55)+
scale_alpha()
#Let’s print it. I made it black because I decided I liked it better.
ggplot(squirrel_location_approach_count, aes(EW, NS, size = approach_count))+
geom_point(color= "black", aes(alpha = approach_count))+
ggtitle("Friendliest Squirrels in Central Park") +
xlab("East to West") + ylab("North to South")+
theme_minimal()+
coord_fixed(ratio = .55)+
scale_alpha()
ggsave("Friendliest Squirrels in Central Park.pdf", width = 8, height = 4)