We will begin by loading in the packages we will use in our analysis
Our data is in the data folder in our R project
Here is the pseudo code
temp_kalauhaihai_garage <- read_csv(here("data/data_temp_NovFeb2024_KalauhaihaiGarage_20970109.csv"), skip = 1)
temp_kanewai_auwai_under_footbridge <- read_csv(here("data/data_temp_NovFeb2024_KanewaiAuwaiUnderFootBridge_21445021.csv"), skip = 1)
temp_kanewai_fishpond_edge_norfolk <- read_csv(here("data/data_temp_NovFeb2024_KanewaiFishpondEdgeNorfolk_20970123.csv"), skip = 1)
temp_kanewai_rockstairs_by_wall <- read_csv(here("data/data_temp_NovFeb2024_KanewaiRockStairsByWall_20970115.csv"), skip = 1)
temp_kanewai_spring_ledge_makai_end <- read_csv(here("data/data_temp_NovFeb2024_KanewaiSpringLedgeMakaiEnd_20970094.csv"), skip = 1)
Aweosme! You can now view the data by clicking on it in your environment (top right pane in R Studio) or by writing code like this to view the first 10 rows
head(temp_kalauhaihai_garage)
## # A tibble: 6 × 8
## `#` `Date Time, GMT-10:00` Temp, °C (LGR S/N: 20970…¹ Coupler Detached (LG…²
## <dbl> <chr> <dbl> <chr>
## 1 1 11/02/23 01:00:00 22.3 Logged
## 2 2 11/02/23 01:10:00 22.2 <NA>
## 3 3 11/02/23 01:20:00 22.2 <NA>
## 4 4 11/02/23 01:30:00 22.2 <NA>
## 5 5 11/02/23 01:40:00 22.3 <NA>
## 6 6 11/02/23 01:50:00 22.2 <NA>
## # ℹ abbreviated names: ¹`Temp, °C (LGR S/N: 20970109, SEN S/N: 20970109)`,
## # ²`Coupler Detached (LGR S/N: 20970109)`
## # ℹ 4 more variables: `Coupler Attached (LGR S/N: 20970109)` <lgl>,
## # `Host Connected (LGR S/N: 20970109)` <lgl>,
## # `Stopped (LGR S/N: 20970109)` <chr>,
## # `End Of File (LGR S/N: 20970109)` <chr>
colnames(temp_kalauhaihai_garage)
## [1] "#"
## [2] "Date Time, GMT-10:00"
## [3] "Temp, °C (LGR S/N: 20970109, SEN S/N: 20970109)"
## [4] "Coupler Detached (LGR S/N: 20970109)"
## [5] "Coupler Attached (LGR S/N: 20970109)"
## [6] "Host Connected (LGR S/N: 20970109)"
## [7] "Stopped (LGR S/N: 20970109)"
## [8] "End Of File (LGR S/N: 20970109)"
You’ll notice the column names in our dataframes have capital letters and are seperated by spaces. This can cause errors in R, so we change all of our column names to lowercase letters separating words with underscores (ex. Date Time, GMT-10:00 to date_time)
The pseudo code here is very common:
temp_kalauhaihai_garage <- temp_kalauhaihai_garage %>%
clean_names()
temp_kanewai_auwai_under_footbridge <- temp_kanewai_auwai_under_footbridge %>%
clean_names()
temp_kanewai_fishpond_edge_norfolk <- temp_kanewai_fishpond_edge_norfolk %>%
clean_names()
temp_kanewai_rockstairs_by_wall <- temp_kanewai_rockstairs_by_wall %>%
clean_names()
temp_kanewai_spring_ledge_makai_end <- temp_kanewai_spring_ledge_makai_end %>%
clean_names()
Now let’s check out the difference
colnames(temp_kalauhaihai_garage)
## [1] "number"
## [2] "date_time_gmt_10_00"
## [3] "temp_c_lgr_s_n_20970109_sen_s_n_20970109"
## [4] "coupler_detached_lgr_s_n_20970109"
## [5] "coupler_attached_lgr_s_n_20970109"
## [6] "host_connected_lgr_s_n_20970109"
## [7] "stopped_lgr_s_n_20970109"
## [8] "end_of_file_lgr_s_n_20970109"
Great! These are now much easier to work with, but still very long
colnames(temp_kalauhaihai_garage)[colnames(temp_kalauhaihai_garage) == "date_time_gmt_10_00"] = "date_time"
colnames(temp_kanewai_auwai_under_footbridge)[colnames(temp_kanewai_auwai_under_footbridge) == "date_time_gmt_10_00"] ="date_time"
colnames(temp_kanewai_fishpond_edge_norfolk)[colnames(temp_kanewai_fishpond_edge_norfolk) == "date_time_gmt_10_00"] ="date_time"
colnames(temp_kanewai_rockstairs_by_wall)[colnames(temp_kanewai_rockstairs_by_wall) == "date_time_gmt_10_00"] ="date_time"
colnames(temp_kanewai_spring_ledge_makai_end)[colnames(temp_kanewai_spring_ledge_makai_end) == "date_time_gmt_10_00"] ="date_time"
Be careful with temperature because every dataframe has a slightly different column name for temeprature
colnames(temp_kalauhaihai_garage)[colnames(temp_kalauhaihai_garage) == "temp_c_lgr_s_n_20970109_sen_s_n_20970109"] ="temp_celcius"
colnames(temp_kanewai_auwai_under_footbridge)[colnames(temp_kanewai_auwai_under_footbridge) == "temp_c_lgr_s_n_21445021_sen_s_n_21445021"] ="temp_celcius"
colnames(temp_kanewai_fishpond_edge_norfolk)[colnames(temp_kanewai_fishpond_edge_norfolk) == "temp_c_lgr_s_n_20970123_sen_s_n_20970123"] ="temp_celcius"
colnames(temp_kanewai_rockstairs_by_wall)[colnames(temp_kanewai_rockstairs_by_wall) == "temp_c_lgr_s_n_20970115_sen_s_n_20970115"] ="temp_celcius"
colnames(temp_kanewai_spring_ledge_makai_end)[colnames(temp_kanewai_spring_ledge_makai_end) == "temp_c_lgr_s_n_20970094_sen_s_n_20970094"] ="temp_celcius"
Let’s check the results
colnames(temp_kalauhaihai_garage)
## [1] "number" "date_time"
## [3] "temp_celcius" "coupler_detached_lgr_s_n_20970109"
## [5] "coupler_attached_lgr_s_n_20970109" "host_connected_lgr_s_n_20970109"
## [7] "stopped_lgr_s_n_20970109" "end_of_file_lgr_s_n_20970109"
Perfect! Now let’s select the columns that are relevant to us using the select function
temp_kalauhaihai_garage <- temp_kalauhaihai_garage %>%
select(date_time, temp_celcius)
temp_kanewai_auwai_under_footbridge <- temp_kanewai_auwai_under_footbridge %>%
select(date_time, temp_celcius)
temp_kanewai_fishpond_edge_norfolk <- temp_kanewai_fishpond_edge_norfolk %>%
select(date_time, temp_celcius)
temp_kanewai_rockstairs_by_wall <- temp_kanewai_rockstairs_by_wall %>%
select(date_time, temp_celcius)
temp_kanewai_spring_ledge_makai_end <- temp_kanewai_spring_ledge_makai_end %>%
select(date_time, temp_celcius)
Let’s check the results
head(temp_kalauhaihai_garage)
## # A tibble: 6 × 2
## date_time temp_celcius
## <chr> <dbl>
## 1 11/02/23 01:00:00 22.3
## 2 11/02/23 01:10:00 22.2
## 3 11/02/23 01:20:00 22.2
## 4 11/02/23 01:30:00 22.2
## 5 11/02/23 01:40:00 22.3
## 6 11/02/23 01:50:00 22.2
Every object (something we assign a name to) has a class in R
The class of temp_kalauhaihai_garage is a dataframe
class(temp_kalauhaihai_garage)
## [1] "tbl_df" "tbl" "data.frame"
The class of the date_time column in temp_kalauhaihai_garage is a character
class(temp_kalauhaihai_garage$date_time)
## [1] "character"
We need R to recognize that this is a date, so we need to manually change the class using the mdy_hms() function from the lubridate package
temp_kalauhaihai_garage$date_time <- mdy_hms(temp_kalauhaihai_garage$date_time)
temp_kanewai_auwai_under_footbridge$date_time <- mdy_hms(temp_kanewai_auwai_under_footbridge$date_time)
temp_kanewai_fishpond_edge_norfolk$date_time <- mdy_hms(temp_kanewai_fishpond_edge_norfolk$date_time)
temp_kanewai_rockstairs_by_wall$date_time <- mdy_hms(temp_kanewai_rockstairs_by_wall$date_time)
temp_kanewai_spring_ledge_makai_end$date_time <- mdy_hms(temp_kanewai_spring_ledge_makai_end$date_time)
Let’s check the results (POSIXct is equivalent to date/time)
class(temp_kalauhaihai_garage$date_time)
## [1] "POSIXct" "POSIXt"
The last step before visualizing our temperature data is trimming the data to only include the time the sensor was in the water at each given location
Let’s make a plot to see what the data looks like untrimmed using the ggplot() function
the pseudo code for this is
ggplot(data = temp_kalauhaihai_garage, aes(x = date_time,
y = temp_celcius)) +
geom_line()
Let’s store and plot the start and end times Dr. Lupita has recorded as variables for the kalauhaihai garage sensor
kalauhaihai_garage_start_time <- as.POSIXct("11-02-2023 9:30:00", format = "%m-%d-%Y %H:%M:%S")
kalauhaihai_garage_end_time <- as.POSIXct("01-31-2024 9:33:00", format = "%m-%d-%Y %H:%M:%S")
ggplot(data = temp_kalauhaihai_garage, aes(x = date_time,
y = temp_celcius)) +
geom_line() +
geom_vline(xintercept = as.numeric(kalauhaihai_garage_start_time), linetype = "dashed", color = "darkgreen") +
geom_vline(xintercept = as.numeric(kalauhaihai_garage_end_time), linetype = "dashed", color = "darkred")
Let’s make a new dataframe called temp_kalauhaihai_garage_trimmed with the trimmed data
temp_kalauhaihai_garage_trimmed <- temp_kalauhaihai_garage %>%
filter(date_time >= kalauhaihai_garage_start_time & date_time <= kalauhaihai_garage_end_time)
Let’s plot the trimmed data (update the dataframe name)
ggplot(data = temp_kalauhaihai_garage_trimmed, aes(x = date_time,
y = temp_celcius)) +
geom_line()
Awesome! Now let’s cutomize the plot and make it interactive
temp_kalauhaihai_garage_plot <- ggplot(data = temp_kalauhaihai_garage_trimmed, aes(x = date_time,
y = temp_celcius)) +
geom_line(color = "steelblue") +
theme_minimal() +
labs(title = "Temperature Kalauhaihai Garage",
subtitle = "November 2023 - February 2024",
x = "Date",
y = "Temperature °C")
ggplotly(temp_kalauhaihai_garage_plot)
Question
Can you