library(dplyr)
library(lubridate)
library(naniar)
library(knitr)

Data Cleanup

grndtmp619_920=read.csv("~/Desktop/Arctic/Season_GRNDTMP_6_19_22_9_20_22_2022_09_21_16_19_26_UTC_1.csv")

Cleaning up data:

#TNHA
names(grndtmp619_920) <- sub("....C..RX3000_BRW1", "", names(grndtmp619_920))
names(grndtmp619_920) <- sub("..RXW.GP6.", "", names(grndtmp619_920))
#SSMH
names(grndtmp619_920) <- sub("....C..RX3000_BRW4", "", names(grndtmp619_920))
names(grndtmp619_920) <- sub("..RXW.GP6.", "", names(grndtmp619_920))
#BUECI
names(grndtmp619_920) <- sub("....C..RX3000_BRW5", "", names(grndtmp619_920))
names(grndtmp619_920) <- sub("..RXW.GP6.", "", names(grndtmp619_920))
#BEO
names(grndtmp619_920) <- sub("....C..RX3000_BRW6", "", names(grndtmp619_920))
names(grndtmp619_920) <- sub("..RXW.GP6.", "", names(grndtmp619_920))

grndtmp619_920$Date <-as.POSIXct(grndtmp619_920$Date,format="%m/%d/%y %H:%M",tz="UTC")

Remove Sneaky Air Temperature!!

grndtmp619_920 = grndtmp619_920[,-14]

This is an air temp sensor, not a ground temp sensor.

Return Sensor ID of Each Sensor by Placement Number

NOTE: the sensor IDs do not match up for each serial number - this is an attempt to identify inconsistencies. :) **Not necessary to run, just for me to understand how many sensors there are!

c = 3
sensorID = NULL
columnNum = NULL
while(c <= ncol(grndtmp619_920)){
  sensorID = append(sensorID, names(grndtmp619_920)[c])
  columnNum = append(columnNum, c)
  c = c + 11
}
location = c(rep("TNHA",5),rep("SSMH",4),rep("BUECI",8),rep("BEO",2))
ID = c("BASE B07", "SC", "SB", "SA", "SD", "SB", "SD", "SA", "BASE B05", "SC", "SF-01", "SE", "SB", "SD", "BASE B05", "SA", "SF-02", "BASE B06", "BASE B05")
sensors = data.frame(columnNum, sensorID, location, ID)

kable(sensors)
columnNum sensorID location ID
3 Temperature21198259.21206939.7 TNHA BASE B07
14 Temperature21198259.21393047.7 TNHA SC
25 Temperature21198259.21398576.7 TNHA SB
36 Temperature21198259.21398593.7 TNHA SA
47 Temperature21198259.21398601.7 TNHA SD
58 Temperature21401801.21393044.7 SSMH SB
69 Temperature21401801.21393049.7 SSMH SD
80 Temperature21401801.21398599.7 SSMH SA
91 Temperature21401801.21398588.7 SSMH BASE B05
102 Temperature21401800.21393042.7 BUECI SC
113 Temperature21401800.21398578.7 BUECI SF-01
124 Temperature21401800.21398579.7 BUECI SE
135 Temperature21401800.21398583.7 BUECI SB
146 Temperature21401800.21398584.7 BUECI SD
157 Temperature21401800.21398585.7 BUECI BASE B05
168 Temperature21401800.21398590.7 BUECI SA
179 Temperature21401800.21398598.7 BUECI SF-02
190 Temperature21401803.21393048.7 BEO BASE B06
201 Temperature21401803.21398591.7 BEO BASE B05

Remove Zeroes

c = 3 #this is the first column of the sensors
zeroes = 0
while(c <= ncol(grndtmp619_920)-10){
  #select one sensor at a time at 11 depths
  sensorGroup = grndtmp619_920 %>% select(2,c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10)
  
  #for loop: goes through each row, checking for zeroes
  for(r in 1:nrow(sensorGroup)) {
    zeroes = 0
    col = 2
    
    #while loop: counts zeroes if they are present in the current row
    #could have used a for loop, but i was experiencing unrelated errors so now it's a while loop :)
    while(col <= 12){
      if(sensorGroup[r,col] == 0 | is.na(sensorGroup[r,col])) { #will not return T/F if value is N/A
        if(!is.na(sensorGroup[r,col])){
          zeroes = zeroes + 1 #count a zero
        }
      }
      col = col + 1
    }
    if(zeroes >= 3){ #if we detect a zero at 3 different depths, we count this as a false zero
      for(columnn in 2:ncol(sensorGroup)){ #go through each column, if zero then replace with NA
      if(sensorGroup[r,columnn] == 0 | is.na(sensorGroup[r,columnn])) { 
        #again, will not work if there is N/A ^
        grndtmp619_920[r,columnn+c] = NA
      }
    }
    }
  }
  c = c + 11 #each time, it increases by the number of depths (11)
}

Daily Temperature Average

How to get daily avg temp:

** average by day by site (take every sensor for all sites) Daily Average = (average high - average low)/2

take min and max temperature for each sensor average temperature for all sensors at given location (here, TNHA) at every day

TNHA Get Min and Max Temperatures by Day

i = 1
c = sensors[i,1]
  sensorGroup = grndtmp619_920 %>% select(2,c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10)
  names(sensorGroup)[2:12] = c("3.5 cm","10 cm","20 cm","30 cm","40 cm","50 cm","55 cm","65 cm","75 cm","85 cm","90 cm")
  sensorGroup$Date = as.Date(sensorGroup$Date)
  minimum = setNames(aggregate(sensorGroup[,2], by=list(sensorGroup$Date),min, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4], names(sensorGroup)[2],"Min")))
  maximum = setNames(aggregate(sensorGroup[,2], by=list(sensorGroup$Date),max, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4],names(sensorGroup)[2],"Max")))
  TNHA_minmax = minimum %>% inner_join(maximum, by="Date")
  
  for(col in 3:ncol(sensorGroup)){
    minimum = setNames(aggregate(sensorGroup[,col], by=list(sensorGroup$Date),min, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4],names(sensorGroup)[col],"Min")))
  maximum = setNames(aggregate(sensorGroup[,col], by=list(sensorGroup$Date),max, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4],names(sensorGroup)[col],"Max")))
  TNHA_minmax = TNHA_minmax %>% inner_join(minimum, by="Date") %>% inner_join(maximum, by="Date")
  }


i = 2
while(i <= 5){
  #select one sensor at a time at 11 depths
  c = sensors[i,1]
  sensorGroup = grndtmp619_920 %>% select(2,c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10)
  names(sensorGroup)[2:12] = c("3.5 cm","10 cm","20 cm","30 cm","40 cm","50 cm","55 cm","65 cm","75 cm","85 cm","90 cm")
  sensorGroup$Date = as.Date(sensorGroup$Date)
  
  for(col in 2:ncol(sensorGroup)){
    minimum = setNames(aggregate(sensorGroup[,col], by=list(sensorGroup$Date),min, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4],names(sensorGroup)[col],"Min")))
  maximum = setNames(aggregate(sensorGroup[,col], by=list(sensorGroup$Date),max, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4],names(sensorGroup)[col],"Max")))
  TNHA_minmax = TNHA_minmax %>% inner_join(minimum, by="Date") %>% inner_join(maximum, by="Date")
  }
  
  i = i+1  #each time, it increases by the number of depths (11)
}

#fix infinities
TNHA_minmax <- do.call(data.frame,                      # Replace Inf in data by NA
                   lapply(TNHA_minmax,
                          function(x) replace(x, is.infinite(x), NA)))

Average Temperature Min/Max TNHA Site by Depth

depths = c("3.5 cm","10 cm","20 cm","30 cm","40 cm","50 cm","55 cm","65 cm","75 cm","85 cm","90 cm")
TNHA_MinMax_Daily = TNHA_minmax[,1]
i=1
for(i in 1:length(depths)){
  avgMin = NULL
  avgMax = NULL
  for(r in 1:nrow(TNHA_minmax)){
    minimum = round(mean(c(TNHA_minmax[r,i+1],TNHA_minmax[r,i+23],TNHA_minmax[r,i+45],TNHA_minmax[r,i+67],TNHA_minmax[r,i+89]),na.rm=T),2)
    maximum = round(mean(c(TNHA_minmax[r,i+2],TNHA_minmax[r,i+24],TNHA_minmax[r,i+46],TNHA_minmax[r,i+68],TNHA_minmax[r,i+90]),na.rm=T),2)
    avgMin = append(avgMin, minimum)
    avgMax = append(avgMax, maximum)
  }
  
  TNHA_MinMax_Daily = data.frame(TNHA_MinMax_Daily, avgMin, avgMax)
  names(TNHA_MinMax_Daily)[ncol(TNHA_MinMax_Daily)-1] = paste("TNHA",depths[i],"min")
  names(TNHA_MinMax_Daily)[ncol(TNHA_MinMax_Daily)] = paste("TNHA",depths[i],"max")
  
}

glimpse(TNHA_MinMax_Daily)
## Rows: 94
## Columns: 23
## $ TNHA_MinMax_Daily <date> 2022-06-19, 2022-06-20, 2022-06-21, 2022-06-22, 202…
## $ TNHA.3.5.cm.min   <dbl> 1.02, 3.55, 4.92, 4.38, 4.75, 4.82, 4.15, 7.30, 9.12…
## $ TNHA.3.5.cm.max   <dbl> 7.95, 12.25, 13.43, 8.82, 7.38, 7.95, 10.05, 15.32, …
## $ TNHA.10.cm.min    <dbl> 7.95, 12.25, 13.43, 8.82, 7.38, 7.95, 10.05, 15.32, …
## $ TNHA.10.cm.max    <dbl> 0.80, 2.25, 3.65, 3.55, 3.88, 3.88, 3.62, 6.12, 7.92…
## $ TNHA.20.cm.min    <dbl> 0.80, 2.25, 3.65, 3.55, 3.88, 3.88, 3.62, 6.12, 7.92…
## $ TNHA.20.cm.max    <dbl> 4.75, 6.82, 8.70, 5.88, 5.28, 5.92, 7.08, 10.45, 11.…
## $ TNHA.30.cm.min    <dbl> 4.75, 6.82, 8.70, 5.88, 5.28, 5.92, 7.08, 10.45, 11.…
## $ TNHA.30.cm.max    <dbl> 0.25, 1.00, 1.75, 1.90, 2.17, 2.62, 2.58, 4.00, 5.58…
## $ TNHA.40.cm.min    <dbl> 0.25, 1.00, 1.75, 1.90, 2.17, 2.62, 2.58, 4.00, 5.58…
## $ TNHA.40.cm.max    <dbl> 1.77, 2.50, 4.08, 3.40, 3.17, 3.67, 4.12, 5.82, 7.30…
## $ TNHA.50.cm.min    <dbl> 1.77, 2.50, 4.08, 3.40, 3.17, 3.67, 4.12, 5.82, 7.30…
## $ TNHA.50.cm.max    <dbl> -1.38, -1.07, -0.42, -0.58, -0.40, -0.07, 0.00, 0.45…
## $ TNHA.55.cm.min    <dbl> -1.38, -1.07, -0.42, -0.58, -0.40, -0.07, 0.00, 0.45…
## $ TNHA.55.cm.max    <dbl> -0.55, -0.15, 0.55, 0.38, 0.45, 0.55, 0.70, 1.65, 3.…
## $ TNHA.65.cm.min    <dbl> -0.55, -0.15, 0.55, 0.38, 0.45, 0.55, 0.70, 1.65, 3.…
## $ TNHA.65.cm.max    <dbl> -1.50, -1.32, -0.92, -0.62, -0.55, -0.40, -0.25, -0.…
## $ TNHA.75.cm.min    <dbl> -1.50, -1.32, -0.92, -0.62, -0.55, -0.40, -0.25, -0.…
## $ TNHA.75.cm.max    <dbl> -0.92, -0.65, -0.12, -0.30, 0.00, -0.08, 0.08, 0.60,…
## $ TNHA.85.cm.min    <dbl> -0.92, -0.65, -0.12, -0.30, 0.00, -0.08, 0.08, 0.60,…
## $ TNHA.85.cm.max    <dbl> -2.12, -1.80, -1.68, -1.50, -1.25, -1.18, -1.15, -0.…
## $ `TNHA 90 cm min`  <dbl> -2.12, -1.80, -1.68, -1.50, -1.25, -1.18, -1.15, -0.…
## $ `TNHA 90 cm max`  <dbl> -1.55, -1.43, -1.00, -0.80, -0.80, -0.75, -0.42, -0.…

Average Daily Temperature

TNHA_Avg_Daily = TNHA_MinMax_Daily[,1]
c = 2
while(c<ncol(TNHA_MinMax_Daily)){
  avg = NULL
  for(r in 1:nrow(TNHA_MinMax_Daily)){
    minimum = TNHA_MinMax_Daily[r,c]
    minimum = TNHA_MinMax_Daily[r,c+1]
    avg = append(avg, round(mean(c(minimum, maximum), na.rm=T),2))
  }
  TNHA_Avg_Daily = data.frame(TNHA_Avg_Daily, avg)
  names(TNHA_Avg_Daily)[ncol(TNHA_Avg_Daily)] = paste("TNHA",depths[c/2],"daily average")
  c = c + 2
}

glimpse(TNHA_Avg_Daily)
## Rows: 94
## Columns: 12
## $ TNHA_Avg_Daily             <date> 2022-06-19, 2022-06-20, 2022-06-21, 2022-0…
## $ TNHA.3.5.cm.daily.average  <dbl> 4.10, 6.25, 6.84, 4.54, 3.82, 4.10, 5.15, 7…
## $ TNHA.10.cm.daily.average   <dbl> 0.52, 1.25, 1.95, 1.90, 2.06, 2.06, 1.94, 3…
## $ TNHA.20.cm.daily.average   <dbl> 2.50, 3.54, 4.47, 3.06, 2.77, 3.08, 3.66, 5…
## $ TNHA.30.cm.daily.average   <dbl> 0.25, 0.62, 1.00, 1.07, 1.21, 1.44, 1.42, 2…
## $ TNHA.40.cm.daily.average   <dbl> 1.01, 1.38, 2.16, 1.82, 1.71, 1.96, 2.18, 3…
## $ TNHA.50.cm.daily.average   <dbl> -0.56, -0.41, -0.08, -0.16, -0.08, 0.09, 0.…
## $ TNHA.55.cm.daily.average   <dbl> -0.15, 0.05, 0.40, 0.32, 0.35, 0.40, 0.48, …
## $ TNHA.65.cm.daily.average   <dbl> -0.62, -0.54, -0.34, -0.18, -0.15, -0.08, 0…
## $ TNHA.75.cm.daily.average   <dbl> -0.34, -0.20, 0.06, -0.02, 0.12, 0.08, 0.16…
## $ TNHA.85.cm.daily.average   <dbl> -0.94, -0.78, -0.72, -0.62, -0.50, -0.46, -…
## $ `TNHA 90 cm daily average` <dbl> -0.65, -0.59, -0.38, -0.28, -0.28, -0.25, -…

FOR LOOP: CHECK FOR GAPS

The only thing you need to change is the sensor ID! the rest should function without change

Except we need to figure out the increments between times :/

#Create sensor DF: select column with dates/times and selected sensor
sensor = grndtmp619_920 %>% select(Date,Temperature21198259.21206939.8)  #Insert Sensor ID here
#Select rows with NAs
row.has.na <- apply(sensor, 1, function(x){any(is.na(x))})
sensorNA = sensor[row.has.na,] #Create a dataframe

start = c(sensorNA[1,1]) #beginning of gap vector
ending = NULL #gap vector end
for(r in 1:nrow(sensorNA)) {   #for loop: for each row with NA
  date1 = as.POSIXct(sensorNA[r,1],format="%m/%d/%y %H:%M",tz="UTC")
  
  #Find difference between dates (time increments)
  dateNext = which(grndtmp619_920$Date == date1, arr.ind=TRUE)
  dateNext = dateNext + 1
  dateNext = as.POSIXct(grndtmp619_920[dateNext,2],format="%m/%d/%y %H:%M",tz="UTC")
  diff = as.numeric(dateNext - date1)
  date1 = date1 + diff*60
  
  date1 = as.POSIXct(date1,format="%m/%d/%y %H:%M",tz="UTC")
  date2 = as.POSIXct(sensorNA[r+1,1],format="%m/%d/%y %H:%M",tz="UTC")
  if (r == nrow(sensorNA)-1) {
    #if this is the last value with an NA, it's the end of the gap
    #minus 1 because date1 is one increment ahead
    ending = append(ending, date1)
  }
  else if (r == nrow(sensorNA)) {
    #if this is the very last row, do nothing - otherwise there is an error
  }
  else if (date1 == date2) {
    #if the dates are the same, it is still part of the same chunk of NAs; do nothing
  }
  else{
    #if all else fails, it is the end of a NA chunk; add to vectors then keep going!
    start = append(start,date2)
    ending = append(ending, date1)
  }
}

gaps = data.frame(start, ending) #create dataframe from start and end times
print(gaps) #output
##                  start              ending
## 1  2022-06-30 17:35:00 2022-06-30 18:00:00
## 2  2022-07-08 10:00:00 2022-08-15 19:00:00
## 3  2022-09-17 02:10:00 2022-09-17 02:30:00
## 4  2022-09-17 02:40:00 2022-09-17 03:00:00
## 5  2022-09-17 03:10:00 2022-09-17 03:30:00
## 6  2022-09-17 03:40:00 2022-09-17 04:00:00
## 7  2022-09-17 04:10:00 2022-09-17 04:30:00
## 8  2022-09-17 04:40:00 2022-09-17 05:00:00
## 9  2022-09-17 05:10:00 2022-09-17 05:30:00
## 10 2022-09-17 05:40:00 2022-09-17 06:00:00
## 11 2022-09-17 06:10:00 2022-09-17 06:30:00
## 12 2022-09-17 06:40:00 2022-09-17 07:00:00
## 13 2022-09-17 07:10:00 2022-09-17 07:30:00
## 14 2022-09-17 07:40:00 2022-09-17 08:00:00
## 15 2022-09-17 08:10:00 2022-09-17 08:30:00
## 16 2022-09-17 08:40:00 2022-09-17 09:00:00
## 17 2022-09-17 09:10:00 2022-09-17 09:30:00
## 18 2022-09-17 09:40:00 2022-09-17 10:00:00
## 19 2022-09-17 10:10:00 2022-09-17 10:30:00
## 20 2022-09-17 10:40:00 2022-09-17 11:00:00
## 21 2022-09-17 11:10:00 2022-09-17 11:30:00
## 22 2022-09-17 11:40:00 2022-09-17 12:00:00
## 23 2022-09-17 12:10:00 2022-09-17 12:30:00