Welcome to our Maunalua Fishpond Heritage Center Data Analysis Workshop 1!

In this R Markdown, we are going to:

Load Necessary Packages

We will begin by loading in the packages we will use in our analysis

Load Data into R

Our data is in the data folder in our R project

Here is the pseudo code

temp_kalauhaihai_garage <- read_csv(here("data/data_temp_NovFeb2024_KalauhaihaiGarage_20970109.csv"), skip = 1)

temp_kanewai_auwai_under_footbridge <- read_csv(here("data/data_temp_NovFeb2024_KanewaiAuwaiUnderFootBridge_21445021.csv"), skip = 1)

temp_kanewai_fishpond_edge_norfolk <- read_csv(here("data/data_temp_NovFeb2024_KanewaiFishpondEdgeNorfolk_20970123.csv"), skip = 1)

temp_kanewai_rockstairs_by_wall <- read_csv(here("data/data_temp_NovFeb2024_KanewaiRockStairsByWall_20970115.csv"), skip = 1)

temp_kanewai_spring_ledge_makai_end <- read_csv(here("data/data_temp_NovFeb2024_KanewaiSpringLedgeMakaiEnd_20970094.csv"), skip = 1)

Aweosme! You can now view the data by clicking on it in your environment (top right pane in R Studio) or by writing code like this to view the first 10 rows

head(temp_kalauhaihai_garage)
## # A tibble: 6 × 8
##     `#` `Date Time, GMT-10:00` Temp, °C (LGR S/N: 20970…¹ Coupler Detached (LG…²
##   <dbl> <chr>                                       <dbl> <chr>                 
## 1     1 11/02/23 01:00:00                            22.3 Logged                
## 2     2 11/02/23 01:10:00                            22.2 <NA>                  
## 3     3 11/02/23 01:20:00                            22.2 <NA>                  
## 4     4 11/02/23 01:30:00                            22.2 <NA>                  
## 5     5 11/02/23 01:40:00                            22.3 <NA>                  
## 6     6 11/02/23 01:50:00                            22.2 <NA>                  
## # ℹ abbreviated names: ¹​`Temp, °C (LGR S/N: 20970109, SEN S/N: 20970109)`,
## #   ²​`Coupler Detached (LGR S/N: 20970109)`
## # ℹ 4 more variables: `Coupler Attached (LGR S/N: 20970109)` <lgl>,
## #   `Host Connected (LGR S/N: 20970109)` <lgl>,
## #   `Stopped (LGR S/N: 20970109)` <chr>,
## #   `End Of File (LGR S/N: 20970109)` <chr>

Cleaning our Data

colnames(temp_kalauhaihai_garage)
## [1] "#"                                              
## [2] "Date Time, GMT-10:00"                           
## [3] "Temp, °C (LGR S/N: 20970109, SEN S/N: 20970109)"
## [4] "Coupler Detached (LGR S/N: 20970109)"           
## [5] "Coupler Attached (LGR S/N: 20970109)"           
## [6] "Host Connected (LGR S/N: 20970109)"             
## [7] "Stopped (LGR S/N: 20970109)"                    
## [8] "End Of File (LGR S/N: 20970109)"

You’ll notice the column names in our dataframes have capital letters and are seperated by spaces. This can cause errors in R, so we change all of our column names to lowercase letters separating words with underscores (ex. Date Time, GMT-10:00 to date_time)

The pseudo code here is very common:

temp_kalauhaihai_garage <- temp_kalauhaihai_garage %>% 
  clean_names()

temp_kanewai_auwai_under_footbridge <- temp_kanewai_auwai_under_footbridge %>% 
  clean_names()

temp_kanewai_fishpond_edge_norfolk <- temp_kanewai_fishpond_edge_norfolk %>% 
  clean_names()

temp_kanewai_rockstairs_by_wall <- temp_kanewai_rockstairs_by_wall %>% 
  clean_names()

temp_kanewai_spring_ledge_makai_end <- temp_kanewai_spring_ledge_makai_end %>% 
  clean_names()

Now let’s check out the difference

colnames(temp_kalauhaihai_garage)
## [1] "number"                                  
## [2] "date_time_gmt_10_00"                     
## [3] "temp_c_lgr_s_n_20970109_sen_s_n_20970109"
## [4] "coupler_detached_lgr_s_n_20970109"       
## [5] "coupler_attached_lgr_s_n_20970109"       
## [6] "host_connected_lgr_s_n_20970109"         
## [7] "stopped_lgr_s_n_20970109"                
## [8] "end_of_file_lgr_s_n_20970109"

Great! These are now much easier to work with, but still very long

colnames(temp_kalauhaihai_garage)[colnames(temp_kalauhaihai_garage) == "date_time_gmt_10_00"] = "date_time"

colnames(temp_kanewai_auwai_under_footbridge)[colnames(temp_kanewai_auwai_under_footbridge) == "date_time_gmt_10_00"] ="date_time"

colnames(temp_kanewai_fishpond_edge_norfolk)[colnames(temp_kanewai_fishpond_edge_norfolk) == "date_time_gmt_10_00"] ="date_time"

colnames(temp_kanewai_rockstairs_by_wall)[colnames(temp_kanewai_rockstairs_by_wall) == "date_time_gmt_10_00"] ="date_time"

colnames(temp_kanewai_spring_ledge_makai_end)[colnames(temp_kanewai_spring_ledge_makai_end) == "date_time_gmt_10_00"] ="date_time"

Be careful with temperature because every dataframe has a slightly different column name for temeprature

colnames(temp_kalauhaihai_garage)[colnames(temp_kalauhaihai_garage) == "temp_c_lgr_s_n_20970109_sen_s_n_20970109"] ="temp_celcius"

colnames(temp_kanewai_auwai_under_footbridge)[colnames(temp_kanewai_auwai_under_footbridge) == "temp_c_lgr_s_n_21445021_sen_s_n_21445021"] ="temp_celcius"

colnames(temp_kanewai_fishpond_edge_norfolk)[colnames(temp_kanewai_fishpond_edge_norfolk) == "temp_c_lgr_s_n_20970123_sen_s_n_20970123"] ="temp_celcius"

colnames(temp_kanewai_rockstairs_by_wall)[colnames(temp_kanewai_rockstairs_by_wall) == "temp_c_lgr_s_n_20970115_sen_s_n_20970115"] ="temp_celcius"

colnames(temp_kanewai_spring_ledge_makai_end)[colnames(temp_kanewai_spring_ledge_makai_end) == "temp_c_lgr_s_n_20970094_sen_s_n_20970094"] ="temp_celcius"

Let’s check the results

colnames(temp_kalauhaihai_garage)
## [1] "number"                            "date_time"                        
## [3] "temp_celcius"                      "coupler_detached_lgr_s_n_20970109"
## [5] "coupler_attached_lgr_s_n_20970109" "host_connected_lgr_s_n_20970109"  
## [7] "stopped_lgr_s_n_20970109"          "end_of_file_lgr_s_n_20970109"

Perfect! Now let’s select the columns that are relevant to us using the select function

temp_kalauhaihai_garage <- temp_kalauhaihai_garage %>% 
  select(date_time, temp_celcius)

temp_kanewai_auwai_under_footbridge <- temp_kanewai_auwai_under_footbridge %>% 
  select(date_time, temp_celcius)

temp_kanewai_fishpond_edge_norfolk <- temp_kanewai_fishpond_edge_norfolk %>% 
  select(date_time, temp_celcius)

temp_kanewai_rockstairs_by_wall <- temp_kanewai_rockstairs_by_wall %>% 
  select(date_time, temp_celcius)

temp_kanewai_spring_ledge_makai_end <- temp_kanewai_spring_ledge_makai_end %>% 
  select(date_time, temp_celcius)

Let’s check the results

head(temp_kalauhaihai_garage)
## # A tibble: 6 × 2
##   date_time         temp_celcius
##   <chr>                    <dbl>
## 1 11/02/23 01:00:00         22.3
## 2 11/02/23 01:10:00         22.2
## 3 11/02/23 01:20:00         22.2
## 4 11/02/23 01:30:00         22.2
## 5 11/02/23 01:40:00         22.3
## 6 11/02/23 01:50:00         22.2

Every object (something we assign a name to) has a class in R

The class of temp_kalauhaihai_garage is a dataframe

class(temp_kalauhaihai_garage)
## [1] "tbl_df"     "tbl"        "data.frame"

The class of the date_time column in temp_kalauhaihai_garage is a character

class(temp_kalauhaihai_garage$date_time)
## [1] "character"

We need R to recognize that this is a date, so we need to manually change the class using the mdy_hms() function from the lubridate package

temp_kalauhaihai_garage$date_time <- mdy_hms(temp_kalauhaihai_garage$date_time)

temp_kanewai_auwai_under_footbridge$date_time <- mdy_hms(temp_kanewai_auwai_under_footbridge$date_time)

temp_kanewai_fishpond_edge_norfolk$date_time <- mdy_hms(temp_kanewai_fishpond_edge_norfolk$date_time)

temp_kanewai_rockstairs_by_wall$date_time <- mdy_hms(temp_kanewai_rockstairs_by_wall$date_time)

temp_kanewai_spring_ledge_makai_end$date_time <- mdy_hms(temp_kanewai_spring_ledge_makai_end$date_time)

Let’s check the results (POSIXct is equivalent to date/time)

class(temp_kalauhaihai_garage$date_time)
## [1] "POSIXct" "POSIXt"

The last step before visualizing our temperature data is trimming the data to only include the time the sensor was in the water at each given location

Let’s make a plot to see what the data looks like untrimmed using the ggplot() function

the pseudo code for this is

ggplot(data = temp_kalauhaihai_garage, aes(x = date_time,
                                          y = temp_celcius)) +
  geom_line()

Let’s store and plot the start and end times Dr. Lupita has recorded as variables for the kalauhaihai garage sensor

kalauhaihai_garage_start_time <- as.POSIXct("11-02-2023 9:30:00", format = "%m-%d-%Y %H:%M:%S")

kalauhaihai_garage_end_time <- as.POSIXct("01-31-2024 9:33:00", format = "%m-%d-%Y %H:%M:%S")
ggplot(data = temp_kalauhaihai_garage, aes(x = date_time,
                                          y = temp_celcius)) +
  geom_line() +
  geom_vline(xintercept = as.numeric(kalauhaihai_garage_start_time), linetype = "dashed", color = "darkgreen") +
  geom_vline(xintercept = as.numeric(kalauhaihai_garage_end_time), linetype = "dashed", color = "darkred")

Let’s make a new dataframe called temp_kalauhaihai_garage_trimmed with the trimmed data

temp_kalauhaihai_garage_trimmed <- temp_kalauhaihai_garage %>%
  filter(date_time >= kalauhaihai_garage_start_time & date_time <= kalauhaihai_garage_end_time)

Let’s plot the trimmed data (update the dataframe name)

ggplot(data = temp_kalauhaihai_garage_trimmed, aes(x = date_time,
                                          y = temp_celcius)) +
  geom_line()

Awesome! Now let’s cutomize the plot and make it interactive

temp_kalauhaihai_garage_plot <- ggplot(data = temp_kalauhaihai_garage_trimmed, aes(x = date_time,
                                          y = temp_celcius)) +
  geom_line(color = "steelblue") +
  theme_minimal() +
  labs(title = "Temperature Kalauhaihai Garage",
       subtitle = "November 2023 - February 2024",
       x = "Date",
       y = "Temperature °C")

ggplotly(temp_kalauhaihai_garage_plot)

Question

Challenge

Can you

  1. Trim the data for the other 4 locations
  2. Visualize them