#Load data and packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytuesdayR)
tuesdata <- tidytuesdayR::tt_load(2025, week = 39)
## ---- Compiling #TidyTuesday Information for 2025-09-30 ----
## --- There is 1 file available ---
##
##
## ── Downloading files ───────────────────────────────────────────────────────────
##
## 1 of 1: "cranes.csv"
cranes <- tuesdata$cranes
#Parse year and month from date #Using small y gives year in two digits, Y gives four digits. #Problem with using year in four digits is that graph doesn’t come out right. #-the years till 2000 come at the wrong end of the graph.
cranes$year <- cranes$date #to create new columns
cranes$month <- cranes$date
cranes$year <- format(cranes$year, "%Y") #to parse year in four digits
cranes$month <- format(cranes$month, "%m")
view(cranes)
#Has the crane population at Lake Hornborgasjön grown over the past 30 years? #What is the year wise distribution of cranes? #A graph is the best way to see this. But first to summarise yearly data.
cranes |>
group_by(year) |>
summarize(
TotalSum = sum(observations, na.rm = TRUE)
) |>
mutate(yearly_obs = TotalSum)-> cranes_col
#Now the graph.
cranes_col |>
ggplot(aes(x = year, y = yearly_obs))+
geom_bar(stat = "identity")
#identity helps to show y as the height of the bar instead of the count of x.
#The number of observations have increased over the years.
#If you wanted to see thousands of cranes, when is the best time of year to visit? #When are most observations made?
cranes |>
group_by(month) |>
summarise(
Total_sum = sum(observations, na.rm = TRUE)
) |>
mutate(monthly_obs = Total_sum)-> cranes_col_month
cranes |>
ggplot(aes(x = month, y = observations))+
geom_bar(stat = "identity")
## Warning: Removed 161 rows containing missing values or values outside the scale range
## (`geom_bar()`).
#April is the best time to visit
#What is the distribution of cranes across the year in 2024?
cranes |>
filter(year == "2024") |>
ggplot(aes(x = month, y = observations))+
geom_bar(stat = "identity")
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_bar()`).
```