This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
###########################################################################
# Library
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringr)
library(purrr)
library(ggplot2)
#install.packages('ggthemes')
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.5.3
###########################################################################
# Cleaning air temperature data
# folder containing files
folder_path <- "request/"
# list all csv files
files <- list.files(
folder_path,
pattern = "^2022(0[8-9]|1[0-2])\\.csv$|^2023(0[1-9]|1[0-2])\\.csv$|^202401
\\.csv$",
full.names = TRUE
)
# read and combine them
combined_data <- files %>%
lapply(read_csv) %>%
bind_rows()
## Rows: 31 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 30 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 30 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 28 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 30 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 30 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 30 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 30 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): station, time
## dbl (6): latitude [degrees_north], longitude [degrees_east], elevation [feet...
## lgl (3): temp_9m_max [kelvin], temp_9m_min [kelvin], temp_9m_avg [kelvin]
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# remove timezone and convert date column to dates the r can process
data_clean <- combined_data %>%
mutate(
time = str_remove(time, " (EDT|EST)$"),
time = ymd(time)
)
# Add week column
data_clean_w <- data_clean %>%
mutate(week = floor_date(time, "week"))
# Create average temperature column grouped by week
air_temp_average_w <- data_clean_w %>%
group_by(week) %>%
summarise(avg_temp = mean(`temp_2m_avg [kelvin]`, na.rm = TRUE))
# Anomaly (week)
air_temp_average_w <- air_temp_average_w %>%
mutate(anomaly = avg_temp - mean(avg_temp, na.rm = TRUE))
#Rename anomaly column and delete average temp column (week)
Air_Temp_Anom_w <- air_temp_average_w %>%
rename(AT_Anomaly_w = anomaly)
#########################################################################
# Cleaning settlement data
settlement_data <- read_csv('Height_Readings_Date.csv')
## Rows: 1027 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Time
## dbl (1): EWE-TP-056A - Height
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Convert date column to dates that R can read
settlement_data_clean <- settlement_data %>%
mutate(
Time=mdy(Time)
)
# Add week column
settlement_data_clean_w <- settlement_data_clean %>%
mutate(week = floor_date(Time,"week"))
# Add average settlement reading column grouped by week
Settlement_Data_Average_w <- settlement_data_clean_w %>%
group_by(week) %>%
summarise(Settlement = mean(`EWE-TP-056A - Height`, na.rm = TRUE))
#########################################################################
# Create one table with all temperature and settlement data (week)
Combined_Table_W <- reduce(list(Air_Temp_Anom_w,
Settlement_Data_Average_w),
full_join, by = "week")
# Identify scale factor in order to plot everything on the same graph (week)
scale_factor_w <- max(Combined_Table_W$AT_Anomaly_w,
na.rm = TRUE) /
max(Combined_Table_W$Settlement,
na.rm = TRUE)
# Dual axis Plot
ggplot(Combined_Table_W, aes(x = week)) +
geom_line(aes(y = AT_Anomaly_w,
color = "Temperature"),
linewidth = 0.75) +
geom_line(aes(y = Settlement * scale_factor_w,
color = "Settlement"),
linetype = "dashed", linewidth = 1) +
scale_y_continuous(name = "Normalized Temperature (K)",
sec.axis = sec_axis(~ . / scale_factor_w, name = "Settlement (in)")) +
labs(x = "Date",
color = "Legend",
title = "Time Series Plot of Normalized Temperature and Settlement Data"
) +
theme_minimal()
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_line()`).
# The product shows that there is an inverse relationship between temperature
# settlement
############################################################################
model <- lm(Settlement ~ avg_temp,
data = Combined_Table_W,
na.action = na.exclude)
Combined_Table_W$settlement_detrended <- resid(model)
ggplot(Combined_Table_W, aes(x = week, y = Settlement)) +
geom_line(aes(y = Settlement, color = "Raw data"),
linetype = "dashed") +
geom_line(aes(y = settlement_detrended, color = "Detrended data")) +
labs(color = "Legend",
x = "Date",
title = "Settlement Time Series with Temperature Effect Removed" ) +
theme_minimal()
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_line()`).
# The product here shows that we can extract the temperature effect from the
# data and reveal real settlement trends
##############################################################################
# Settlement vs temperature plot
ggplot(Combined_Table_W, aes(x = avg_temp, y = Settlement)) +
geom_point(aes(color = "Settlement"),
size = 2) +
geom_smooth(method = "lm",
color = "red",
linewidth = 1) +
labs(x = "Temperature",
y = "Settlement",
title = "Temperature vs Setllement Data") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 7 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
# The product here shows a scatter plot, confirming the relationship between
# temperature and settlement. I tried assigning each dot a separate colour based
# on the month as discussed with Professor Frei but could not find a relationship