#Load data and packages

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytuesdayR)
tuesdata <- tidytuesdayR::tt_load(2025, week = 39)
## ---- Compiling #TidyTuesday Information for 2025-09-30 ----
## --- There is 1 file available ---
## 
## 
## ── Downloading files ───────────────────────────────────────────────────────────
## 
##   1 of 1: "cranes.csv"
cranes <- tuesdata$cranes

#Parse year and month from date #Using small y gives year in two digits, Y gives four digits. #Problem with using year in four digits is that graph doesn’t come out right. #-the years till 2000 come at the wrong end of the graph.

cranes$year <- cranes$date #to create new columns
cranes$month <- cranes$date
cranes$year <- format(cranes$year, "%Y") #to parse year in four digits
cranes$month <- format(cranes$month, "%m")
view(cranes)

#Has the crane population at Lake Hornborgasjön grown over the past 30 years? #What is the year wise distribution of cranes? #A graph is the best way to see this. But first to summarise yearly data.

cranes |> 
  group_by(year) |>
  summarize(
    TotalSum = sum(observations, na.rm = TRUE)
    ) |>
  mutate(yearly_obs = TotalSum)-> cranes_col

#Now the graph.

cranes_col |>
  ggplot(aes(x = year, y = yearly_obs))+
  geom_bar(stat = "identity") 

#identity helps to show y as the height of the bar instead of the count of x. 

#The number of observations have increased over the years.

#If you wanted to see thousands of cranes, when is the best time of year to visit? #When are most observations made?

cranes |> 
  group_by(month) |> 
  summarise(
    Total_sum = sum(observations, na.rm = TRUE)
  ) |> 
  mutate(monthly_obs = Total_sum)-> cranes_col_month

cranes |> 
  ggplot(aes(x = month, y = observations))+
  geom_bar(stat = "identity")
## Warning: Removed 161 rows containing missing values or values outside the scale range
## (`geom_bar()`).

#April is the best time to visit

#What is the distribution of cranes across the year in 2024?

cranes |> 
  filter(year == "2024") |> 
  ggplot(aes(x = month, y = observations))+
    geom_bar(stat = "identity")
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_bar()`).

```