R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# README for Project FeederWatch on TidyTuesday
# link includes data dictionary/explanation of each column
# https://github.com/rfordatascience/tidytuesday/blob/master/data/2023/2023-01-10/readme.md

# Get the Data
feederwatch <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-01-10/PFW_2021_public.csv')
## Rows: 100000 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): loc_id, subnational1_code, entry_technique, sub_id, obs_id, PROJ_P...
## dbl (14): latitude, longitude, Month, Day, Year, how_many, valid, reviewed, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
site_data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-01-10/PFW_count_site_data_public_2021.csv')
## Rows: 254355 Columns: 62
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): loc_id, proj_period_id
## dbl (60): yard_type_pavement, yard_type_garden, yard_type_landsca, yard_type...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Ch. 8
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (readr)
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
#1
read_delim("~/Documents/BSCI 5890 /Wk_2/spellman-combined.txt")
## New names:
## Rows: 6178 Columns: 83
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): ...1 dbl (77): cln3-1, cln3-2, clb2-2, clb2-1, alpha0, alpha7, alpha14,
## alpha21, ... lgl (5): clb, alpha, cdc15, cdc28, elu
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## # A tibble: 6,178 × 83
##    ...1    `cln3-1` `cln3-2` clb   `clb2-2` `clb2-1` alpha alpha0 alpha7 alpha14
##    <chr>      <dbl>    <dbl> <lgl>    <dbl>    <dbl> <lgl>  <dbl>  <dbl>   <dbl>
##  1 YAL001C     0.15    NA    NA       -0.22     0.07 NA     -0.15  -0.15   -0.21
##  2 YAL002W    -0.07    -0.76 NA       -0.12    -0.25 NA     -0.11   0.1     0.01
##  3 YAL003W    -1.22    -0.27 NA       -0.1      0.23 NA     -0.14  -0.71    0.1 
##  4 YAL004W    -0.09     1.2  NA        0.16    -0.14 NA     -0.02  -0.48   -0.11
##  5 YAL005C    -0.6      1.01 NA        0.24     0.65 NA     -0.05  -0.53   -0.47
##  6 YAL007C     0.65     1.39 NA       -0.29    -0.54 NA     -0.6   -0.45   -0.13
##  7 YAL008W    -0.36    -0.22 NA       -0.2      0.1  NA     -0.28  -0.22   -0.06
##  8 YAL009W     0.25    -0.79 NA       -0.22    -0.54 NA     -0.03  -0.27    0.17
##  9 YAL010C    -0.3     -0.6  NA       -0.18     0.01 NA     -0.05   0.13    0.13
## 10 YAL011W    -0.15    -0.71 NA       -0.15    -0.25 NA     -0.31  -0.43   -0.3 
## # ℹ 6,168 more rows
## # ℹ 73 more variables: alpha21 <dbl>, alpha28 <dbl>, alpha35 <dbl>,
## #   alpha42 <dbl>, alpha49 <dbl>, alpha56 <dbl>, alpha63 <dbl>, alpha70 <dbl>,
## #   alpha77 <dbl>, alpha84 <dbl>, alpha91 <dbl>, alpha98 <dbl>, alpha105 <dbl>,
## #   alpha112 <dbl>, alpha119 <dbl>, cdc15 <lgl>, cdc15_10 <dbl>,
## #   cdc15_30 <dbl>, cdc15_50 <dbl>, cdc15_70 <dbl>, cdc15_80 <dbl>,
## #   cdc15_90 <dbl>, cdc15_100 <dbl>, cdc15_110 <dbl>, cdc15_120 <dbl>, …
"cowplot" %in% rownames(installed.packages())
## [1] TRUE
dim(feederwatch) #100000 obs by 22 variables are the dimension
## [1] 100000     22
dim(site_data) #254355 obs by 62 variables are the dim
## [1] 254355     62
view(feederwatch)
view(site_data)

feederwatch2<- feederwatch %>% rename(proj_period_id = "PROJ_PERIOD_ID")
view(feederwatch2)   #rename column

feed.and.site.merged <- inner_join(feederwatch2, site_data, by = c("proj_period_id" , "loc_id")) 
view(feed.and.site.merged)
dim(feed.and.site.merged) #dims are 77520, 82
## [1] 77520    82
head(feed.and.site.merged)
## # A tibble: 6 × 82
##   loc_id    latitude longitude subnational1_code entry_technique   sub_id obs_id
##   <chr>        <dbl>     <dbl> <chr>             <chr>             <chr>  <chr> 
## 1 L981010       52.1    -122.  CA-BC             POSTCODE LAT/LON… S8320… OBS10…
## 2 L3161698      43.8    -123.  US-OR             /GOOGLE_MAP/ZOOM… S7803… OBS10…
## 3 L13210778     39.7     -75.9 US-MD             /GOOGLE_MAP/ZOOM… S8131… OBS10…
## 4 L13258348     42.2     -83.7 US-MI             /GOOGLE_MAP/ZOOM… S7925… OBS10…
## 5 L149639       32.7     -79.9 US-SC             PointMaker1.0_2   S7918… OBS10…
## 6 L10140349     46.1     -73.1 CA-QC             /GOOGLE_MAP/ZOOM… S8344… OBS10…
## # ℹ 75 more variables: Month <dbl>, Day <dbl>, Year <dbl>,
## #   proj_period_id <chr>, species_code <chr>, how_many <dbl>, valid <dbl>,
## #   reviewed <dbl>, day1_am <dbl>, day1_pm <dbl>, day2_am <dbl>, day2_pm <dbl>,
## #   effort_hrs_atleast <dbl>, snow_dep_atleast <dbl>, Data_Entry_Method <chr>,
## #   yard_type_pavement <dbl>, yard_type_garden <dbl>, yard_type_landsca <dbl>,
## #   yard_type_woods <dbl>, yard_type_desert <dbl>, hab_dcid_woods <dbl>,
## #   hab_evgr_woods <dbl>, hab_mixed_woods <dbl>, hab_orchard <dbl>, …
#2

fav_state_birds<-feed.and.site.merged%>%filter(subnational1_code=="US-TX")%>%group_by(species_code)%>%summarise(sum_total = sum(how_many))%>%arrange(desc(sum_total)) 
view(fav_state_birds) #In my fave state TX, the top 5 commonly seen birds are pinsis, whwdov, houspa, amegfi,cedwax

least_fav_state_birds<-feed.and.site.merged%>%filter(subnational1_code=="US-NY")%>%group_by(species_code)%>%summarise(sum_total = sum(how_many))%>%arrange(desc(sum_total)) 
view(least_fav_state_birds) #In my least fave state NY: moudov, daejun, houspa, amegfi,bkcchi

overlapping_birds<-inner_join(fav_state_birds,least_fav_state_birds, by=c("species_code"="species_code"))
view(overlapping_birds) #60/81 birdspecies are shared between my fav and least fav state

#3: Pinsis seems to be around for all three months, coming in at the highest, but takes a dip in Mar. Amegfi becomes the top bird in Mar. cedwax goes away from top 5 in Mar. 
january<- feed.and.site.merged%>%filter(subnational1_code=="US-TX")%>%filter(Month==1)%>%group_by(species_code)%>%summarise(sum_total = sum(how_many))%>%arrange(desc(sum_total))
view(january)#january: pinsis,whwdov,amegfi,houspa,cedwax

feb<- feed.and.site.merged%>%filter(subnational1_code=="US-TX")%>%filter(Month==2)%>%group_by(species_code)%>%summarise(sum_total = sum(how_many))%>%arrange(desc(sum_total))
view(feb) #pinsis,cedwax,amegfi,whwdov,norcar

mar<- feed.and.site.merged%>%filter(subnational1_code=="US-TX")%>%filter(Month==3)%>%group_by(species_code)%>%summarise(sum_total = sum(how_many))%>%arrange(desc(sum_total))
view(mar)#amegfi,bnhcow,pinsis, chispa, norcar

#4 Does snow depth (snow_dep_atleast) presence affect bird species (pinsis, whwdov, moudov, blujay,cedwax)
snow.and.bird<-feed.and.site.merged%>%filter(snow_dep_atleast==5)
view(snow.and.bird)
bird_counts <- feed.and.site.merged %>%
  filter(species_code %in% c("pinsis", "whwdov", "moudov", "blujay", "cedwax")) %>%
  group_by(species_code, squirrels) %>%
  summarize(count = n(), .groups = "drop")
view(bird_counts)