#Data is about distribution of crane observations across the years. #New things learned: parsing month and year from date #Also learned to subset data and plot it. #Also learned to rename columns. #Download data
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytuesdayR)
tuesdata <- tidytuesdayR::tt_load(2025, week = 39)
## ---- Compiling #TidyTuesday Information for 2025-09-30 ----
## --- There is 1 file available ---
##
##
## ── Downloading files ───────────────────────────────────────────────────────────
##
## 1 of 1: "cranes.csv"
#View data
cranes <- tuesdata$cranes
view(cranes)
#Parse year from date
cranes$date <- format(cranes$date, "%Y")
view(cranes)
#Renaming column “date”
colnames(cranes)[1] <- "year"
view(cranes)
#Has the crane population at Lake Hornborgasjön grown over the past 30 years? #What is the year wise distribution of cranes?
cranes |>
ggplot(aes(x = year, y = observations))+
geom_point()+
labs(
x = "Year",
y = "Number of cranes"
)
## Warning: Removed 161 rows containing missing values or values outside the scale range
## (`geom_point()`).
#It does appear as if the crane population has increased. #But could it
be that the number of observers have increased?
#Parsing the year and month from the date
cranes <- tuesdata$cranes
cranes$year <- cranes$date
cranes$year <- format(cranes$year, "%y")
cranes$month <- cranes$date
cranes$month <- format(cranes$month, "%m")
#If you wanted to see thousands of cranes, when is the best time of year to visit? #When are most observations made?
cranes |>
ggplot(aes(x = month, y = observations))+
geom_point()+
labs(
x = "Month",
y = "Observations"
)
## Warning: Removed 161 rows containing missing values or values outside the scale range
## (`geom_point()`).
#In April
#What is the distribution of cranes across the year in 2024?
cranes |>
subset(year == "24") |>
ggplot(aes(x = month, y = observations))+
geom_point()
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
#Is it possible to predict the arrival of the cranes from weather patterns? Take year 2024 as an example
cranes |>
ggplot(aes(x = observations, fill = weather_disruption), stat = "identity")+
geom_bar()
## Warning: Removed 161 rows containing non-finite outside the scale range
## (`stat_count()`).
#Cannot thing of a better graph that will demonstrate such a pattern.
Need to explore. #stat = identity is required for the graph to accept
the y axis or it takes the default count for x axis just like
histogram.