#Data is about distribution of crane observations across the years. #New things learned: parsing month and year from date #Also learned to subset data and plot it. #Also learned to rename columns. #Download data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytuesdayR)
tuesdata <- tidytuesdayR::tt_load(2025, week = 39)
## ---- Compiling #TidyTuesday Information for 2025-09-30 ----
## --- There is 1 file available ---
## 
## 
## ── Downloading files ───────────────────────────────────────────────────────────
## 
##   1 of 1: "cranes.csv"

#View data

cranes <- tuesdata$cranes
view(cranes)

#Parse year from date

cranes$date <- format(cranes$date, "%Y") 
view(cranes)

#Renaming column “date”

colnames(cranes)[1] <- "year"
view(cranes)

#Has the crane population at Lake Hornborgasjön grown over the past 30 years? #What is the year wise distribution of cranes?

cranes |> 
  ggplot(aes(x = year, y = observations))+
  geom_point()+
  labs(
    x = "Year", 
    y = "Number of cranes"
  )
## Warning: Removed 161 rows containing missing values or values outside the scale range
## (`geom_point()`).

#It does appear as if the crane population has increased. #But could it be that the number of observers have increased?

#Parsing the year and month from the date

cranes <- tuesdata$cranes
cranes$year <- cranes$date
cranes$year <- format(cranes$year, "%y")
cranes$month <- cranes$date
cranes$month <- format(cranes$month, "%m")

#If you wanted to see thousands of cranes, when is the best time of year to visit? #When are most observations made?

cranes |> 
  ggplot(aes(x = month, y = observations))+
  geom_point()+
  labs(
    x = "Month",
    y = "Observations"
  )
## Warning: Removed 161 rows containing missing values or values outside the scale range
## (`geom_point()`).

#In April

#What is the distribution of cranes across the year in 2024?

cranes |> 
  subset(year == "24") |> 
  ggplot(aes(x = month, y = observations))+
    geom_point()
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Is it possible to predict the arrival of the cranes from weather patterns? Take year 2024 as an example

cranes |> 
  ggplot(aes(x = observations, fill = weather_disruption), stat = "identity")+
  geom_bar()
## Warning: Removed 161 rows containing non-finite outside the scale range
## (`stat_count()`).

#Cannot thing of a better graph that will demonstrate such a pattern. Need to explore. #stat = identity is required for the graph to accept the y axis or it takes the default count for x axis just like histogram.