This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
# README for Project FeederWatch on TidyTuesday
# link includes data dictionary/explanation of each column
# https://github.com/rfordatascience/tidytuesday/blob/master/data/2023/2023-01-10/readme.md
# Get the Data
feederwatch <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-01-10/PFW_2021_public.csv')
## Rows: 100000 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): loc_id, subnational1_code, entry_technique, sub_id, obs_id, PROJ_P...
## dbl (14): latitude, longitude, Month, Day, Year, how_many, valid, reviewed, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
site_data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-01-10/PFW_count_site_data_public_2021.csv')
## Rows: 254355 Columns: 62
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): loc_id, proj_period_id
## dbl (60): yard_type_pavement, yard_type_garden, yard_type_landsca, yard_type...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Ch. 8
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (readr)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
#1
read_delim("~/Documents/BSCI 5890 /Wk_2/spellman-combined.txt")
## New names:
## Rows: 6178 Columns: 83
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): ...1 dbl (77): cln3-1, cln3-2, clb2-2, clb2-1, alpha0, alpha7, alpha14,
## alpha21, ... lgl (5): clb, alpha, cdc15, cdc28, elu
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## # A tibble: 6,178 × 83
## ...1 `cln3-1` `cln3-2` clb `clb2-2` `clb2-1` alpha alpha0 alpha7 alpha14
## <chr> <dbl> <dbl> <lgl> <dbl> <dbl> <lgl> <dbl> <dbl> <dbl>
## 1 YAL001C 0.15 NA NA -0.22 0.07 NA -0.15 -0.15 -0.21
## 2 YAL002W -0.07 -0.76 NA -0.12 -0.25 NA -0.11 0.1 0.01
## 3 YAL003W -1.22 -0.27 NA -0.1 0.23 NA -0.14 -0.71 0.1
## 4 YAL004W -0.09 1.2 NA 0.16 -0.14 NA -0.02 -0.48 -0.11
## 5 YAL005C -0.6 1.01 NA 0.24 0.65 NA -0.05 -0.53 -0.47
## 6 YAL007C 0.65 1.39 NA -0.29 -0.54 NA -0.6 -0.45 -0.13
## 7 YAL008W -0.36 -0.22 NA -0.2 0.1 NA -0.28 -0.22 -0.06
## 8 YAL009W 0.25 -0.79 NA -0.22 -0.54 NA -0.03 -0.27 0.17
## 9 YAL010C -0.3 -0.6 NA -0.18 0.01 NA -0.05 0.13 0.13
## 10 YAL011W -0.15 -0.71 NA -0.15 -0.25 NA -0.31 -0.43 -0.3
## # ℹ 6,168 more rows
## # ℹ 73 more variables: alpha21 <dbl>, alpha28 <dbl>, alpha35 <dbl>,
## # alpha42 <dbl>, alpha49 <dbl>, alpha56 <dbl>, alpha63 <dbl>, alpha70 <dbl>,
## # alpha77 <dbl>, alpha84 <dbl>, alpha91 <dbl>, alpha98 <dbl>, alpha105 <dbl>,
## # alpha112 <dbl>, alpha119 <dbl>, cdc15 <lgl>, cdc15_10 <dbl>,
## # cdc15_30 <dbl>, cdc15_50 <dbl>, cdc15_70 <dbl>, cdc15_80 <dbl>,
## # cdc15_90 <dbl>, cdc15_100 <dbl>, cdc15_110 <dbl>, cdc15_120 <dbl>, …
"cowplot" %in% rownames(installed.packages())
## [1] TRUE
dim(feederwatch) #100000 obs by 22 variables are the dimension
## [1] 100000 22
dim(site_data) #254355 obs by 62 variables are the dim
## [1] 254355 62
view(feederwatch)
view(site_data)
feederwatch2<- feederwatch %>% rename(proj_period_id = "PROJ_PERIOD_ID")
view(feederwatch2) #rename column
feed.and.site.merged <- inner_join(feederwatch2, site_data, by = c("proj_period_id" , "loc_id"))
view(feed.and.site.merged)
dim(feed.and.site.merged) #dims are 77520, 82
## [1] 77520 82
head(feed.and.site.merged)
## # A tibble: 6 × 82
## loc_id latitude longitude subnational1_code entry_technique sub_id obs_id
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 L981010 52.1 -122. CA-BC POSTCODE LAT/LON… S8320… OBS10…
## 2 L3161698 43.8 -123. US-OR /GOOGLE_MAP/ZOOM… S7803… OBS10…
## 3 L13210778 39.7 -75.9 US-MD /GOOGLE_MAP/ZOOM… S8131… OBS10…
## 4 L13258348 42.2 -83.7 US-MI /GOOGLE_MAP/ZOOM… S7925… OBS10…
## 5 L149639 32.7 -79.9 US-SC PointMaker1.0_2 S7918… OBS10…
## 6 L10140349 46.1 -73.1 CA-QC /GOOGLE_MAP/ZOOM… S8344… OBS10…
## # ℹ 75 more variables: Month <dbl>, Day <dbl>, Year <dbl>,
## # proj_period_id <chr>, species_code <chr>, how_many <dbl>, valid <dbl>,
## # reviewed <dbl>, day1_am <dbl>, day1_pm <dbl>, day2_am <dbl>, day2_pm <dbl>,
## # effort_hrs_atleast <dbl>, snow_dep_atleast <dbl>, Data_Entry_Method <chr>,
## # yard_type_pavement <dbl>, yard_type_garden <dbl>, yard_type_landsca <dbl>,
## # yard_type_woods <dbl>, yard_type_desert <dbl>, hab_dcid_woods <dbl>,
## # hab_evgr_woods <dbl>, hab_mixed_woods <dbl>, hab_orchard <dbl>, …
#2
fav_state_birds<-feed.and.site.merged%>%filter(subnational1_code=="US-TX")%>%group_by(species_code)%>%summarise(sum_total = sum(how_many))%>%arrange(desc(sum_total))
view(fav_state_birds) #In my fave state TX, the top 5 commonly seen birds are pinsis, whwdov, houspa, amegfi,cedwax
least_fav_state_birds<-feed.and.site.merged%>%filter(subnational1_code=="US-NY")%>%group_by(species_code)%>%summarise(sum_total = sum(how_many))%>%arrange(desc(sum_total))
view(least_fav_state_birds) #In my least fave state NY: moudov, daejun, houspa, amegfi,bkcchi
overlapping_birds<-inner_join(fav_state_birds,least_fav_state_birds, by=c("species_code"="species_code"))
view(overlapping_birds) #60/81 birdspecies are shared between my fav and least fav state
#3: Pinsis seems to be around for all three months, coming in at the highest, but takes a dip in Mar. Amegfi becomes the top bird in Mar. cedwax goes away from top 5 in Mar.
january<- feed.and.site.merged%>%filter(subnational1_code=="US-TX")%>%filter(Month==1)%>%group_by(species_code)%>%summarise(sum_total = sum(how_many))%>%arrange(desc(sum_total))
view(january)#january: pinsis,whwdov,amegfi,houspa,cedwax
feb<- feed.and.site.merged%>%filter(subnational1_code=="US-TX")%>%filter(Month==2)%>%group_by(species_code)%>%summarise(sum_total = sum(how_many))%>%arrange(desc(sum_total))
view(feb) #pinsis,cedwax,amegfi,whwdov,norcar
mar<- feed.and.site.merged%>%filter(subnational1_code=="US-TX")%>%filter(Month==3)%>%group_by(species_code)%>%summarise(sum_total = sum(how_many))%>%arrange(desc(sum_total))
view(mar)#amegfi,bnhcow,pinsis, chispa, norcar
#4 Does snow depth (snow_dep_atleast) presence affect bird species (pinsis, whwdov, moudov, blujay,cedwax)
snow.and.bird<-feed.and.site.merged%>%filter(snow_dep_atleast==5)
view(snow.and.bird)
bird_counts <- feed.and.site.merged %>%
filter(species_code %in% c("pinsis", "whwdov", "moudov", "blujay", "cedwax")) %>%
group_by(species_code, squirrels) %>%
summarize(count = n(), .groups = "drop")
view(bird_counts)