library(dplyr)
library(lubridate)
library(naniar)
library(knitr)
grndtmp619_920=read.csv("~/Desktop/Arctic/Season_GRNDTMP_6_19_22_9_20_22_2022_09_21_16_19_26_UTC_1.csv")
#TNHA
names(grndtmp619_920) <- sub("....C..RX3000_BRW1", "", names(grndtmp619_920))
names(grndtmp619_920) <- sub("..RXW.GP6.", "", names(grndtmp619_920))
#SSMH
names(grndtmp619_920) <- sub("....C..RX3000_BRW4", "", names(grndtmp619_920))
names(grndtmp619_920) <- sub("..RXW.GP6.", "", names(grndtmp619_920))
#BUECI
names(grndtmp619_920) <- sub("....C..RX3000_BRW5", "", names(grndtmp619_920))
names(grndtmp619_920) <- sub("..RXW.GP6.", "", names(grndtmp619_920))
#BEO
names(grndtmp619_920) <- sub("....C..RX3000_BRW6", "", names(grndtmp619_920))
names(grndtmp619_920) <- sub("..RXW.GP6.", "", names(grndtmp619_920))
grndtmp619_920$Date <-as.POSIXct(grndtmp619_920$Date,format="%m/%d/%y %H:%M",tz="UTC")
grndtmp619_920 = grndtmp619_920[,-14]
This is an air temp sensor, not a ground temp sensor.
NOTE: the sensor IDs do not match up for each serial number - this is an attempt to identify inconsistencies. :) **Not necessary to run, just for me to understand how many sensors there are!
c = 3
sensorID = NULL
columnNum = NULL
while(c <= ncol(grndtmp619_920)){
sensorID = append(sensorID, names(grndtmp619_920)[c])
columnNum = append(columnNum, c)
c = c + 11
}
location = c(rep("TNHA",5),rep("SSMH",4),rep("BUECI",8),rep("BEO",2))
ID = c("BASE B07", "SC", "SB", "SA", "SD", "SB", "SD", "SA", "BASE B05", "SC", "SF-01", "SE", "SB", "SD", "BASE B05", "SA", "SF-02", "BASE B06", "BASE B05")
sensors = data.frame(columnNum, sensorID, location, ID)
kable(sensors)
| columnNum | sensorID | location | ID |
|---|---|---|---|
| 3 | Temperature21198259.21206939.7 | TNHA | BASE B07 |
| 14 | Temperature21198259.21393047.7 | TNHA | SC |
| 25 | Temperature21198259.21398576.7 | TNHA | SB |
| 36 | Temperature21198259.21398593.7 | TNHA | SA |
| 47 | Temperature21198259.21398601.7 | TNHA | SD |
| 58 | Temperature21401801.21393044.7 | SSMH | SB |
| 69 | Temperature21401801.21393049.7 | SSMH | SD |
| 80 | Temperature21401801.21398599.7 | SSMH | SA |
| 91 | Temperature21401801.21398588.7 | SSMH | BASE B05 |
| 102 | Temperature21401800.21393042.7 | BUECI | SC |
| 113 | Temperature21401800.21398578.7 | BUECI | SF-01 |
| 124 | Temperature21401800.21398579.7 | BUECI | SE |
| 135 | Temperature21401800.21398583.7 | BUECI | SB |
| 146 | Temperature21401800.21398584.7 | BUECI | SD |
| 157 | Temperature21401800.21398585.7 | BUECI | BASE B05 |
| 168 | Temperature21401800.21398590.7 | BUECI | SA |
| 179 | Temperature21401800.21398598.7 | BUECI | SF-02 |
| 190 | Temperature21401803.21393048.7 | BEO | BASE B06 |
| 201 | Temperature21401803.21398591.7 | BEO | BASE B05 |
c = 3 #this is the first column of the sensors
zeroes = 0
while(c <= ncol(grndtmp619_920)-10){
#select one sensor at a time at 11 depths
sensorGroup = grndtmp619_920 %>% select(2,c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10)
#for loop: goes through each row, checking for zeroes
for(r in 1:nrow(sensorGroup)) {
zeroes = 0
col = 2
#while loop: counts zeroes if they are present in the current row
#could have used a for loop, but i was experiencing unrelated errors so now it's a while loop :)
while(col <= 12){
if(sensorGroup[r,col] == 0 | is.na(sensorGroup[r,col])) { #will not return T/F if value is N/A
if(!is.na(sensorGroup[r,col])){
zeroes = zeroes + 1 #count a zero
}
}
col = col + 1
}
if(zeroes >= 3){ #if we detect a zero at 3 different depths, we count this as a false zero
for(columnn in 2:ncol(sensorGroup)){ #go through each column, if zero then replace with NA
if(sensorGroup[r,columnn] == 0 | is.na(sensorGroup[r,columnn])) {
#again, will not work if there is N/A ^
grndtmp619_920[r,columnn+c] = NA
}
}
}
}
c = c + 11 #each time, it increases by the number of depths (11)
}
How to get daily avg temp:
** average by day by site (take every sensor for all sites) Daily Average = (average high - average low)/2
take min and max temperature for each sensor average temperature for all sensors at given location (here, TNHA) at every day
i = 1
c = sensors[i,1]
sensorGroup = grndtmp619_920 %>% select(2,c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10)
names(sensorGroup)[2:12] = c("3.5 cm","10 cm","20 cm","30 cm","40 cm","50 cm","55 cm","65 cm","75 cm","85 cm","90 cm")
sensorGroup$Date = as.Date(sensorGroup$Date)
minimum = setNames(aggregate(sensorGroup[,2], by=list(sensorGroup$Date),min, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4], names(sensorGroup)[2],"Min")))
maximum = setNames(aggregate(sensorGroup[,2], by=list(sensorGroup$Date),max, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4],names(sensorGroup)[2],"Max")))
TNHA_minmax = minimum %>% inner_join(maximum, by="Date")
for(col in 3:ncol(sensorGroup)){
minimum = setNames(aggregate(sensorGroup[,col], by=list(sensorGroup$Date),min, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4],names(sensorGroup)[col],"Min")))
maximum = setNames(aggregate(sensorGroup[,col], by=list(sensorGroup$Date),max, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4],names(sensorGroup)[col],"Max")))
TNHA_minmax = TNHA_minmax %>% inner_join(minimum, by="Date") %>% inner_join(maximum, by="Date")
}
i = 2
while(i <= 5){
#select one sensor at a time at 11 depths
c = sensors[i,1]
sensorGroup = grndtmp619_920 %>% select(2,c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10)
names(sensorGroup)[2:12] = c("3.5 cm","10 cm","20 cm","30 cm","40 cm","50 cm","55 cm","65 cm","75 cm","85 cm","90 cm")
sensorGroup$Date = as.Date(sensorGroup$Date)
for(col in 2:ncol(sensorGroup)){
minimum = setNames(aggregate(sensorGroup[,col], by=list(sensorGroup$Date),min, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4],names(sensorGroup)[col],"Min")))
maximum = setNames(aggregate(sensorGroup[,col], by=list(sensorGroup$Date),max, na.rm=T), c("Date", paste(sensors[i, 3], sensors[i,4],names(sensorGroup)[col],"Max")))
TNHA_minmax = TNHA_minmax %>% inner_join(minimum, by="Date") %>% inner_join(maximum, by="Date")
}
i = i+1 #each time, it increases by the number of depths (11)
}
#fix infinities
TNHA_minmax <- do.call(data.frame, # Replace Inf in data by NA
lapply(TNHA_minmax,
function(x) replace(x, is.infinite(x), NA)))
depths = c("3.5 cm","10 cm","20 cm","30 cm","40 cm","50 cm","55 cm","65 cm","75 cm","85 cm","90 cm")
TNHA_MinMax_Daily = TNHA_minmax[,1]
i=1
for(i in 1:length(depths)){
avgMin = NULL
avgMax = NULL
for(r in 1:nrow(TNHA_minmax)){
minimum = round(mean(c(TNHA_minmax[r,i+1],TNHA_minmax[r,i+23],TNHA_minmax[r,i+45],TNHA_minmax[r,i+67],TNHA_minmax[r,i+89]),na.rm=T),2)
maximum = round(mean(c(TNHA_minmax[r,i+2],TNHA_minmax[r,i+24],TNHA_minmax[r,i+46],TNHA_minmax[r,i+68],TNHA_minmax[r,i+90]),na.rm=T),2)
avgMin = append(avgMin, minimum)
avgMax = append(avgMax, maximum)
}
TNHA_MinMax_Daily = data.frame(TNHA_MinMax_Daily, avgMin, avgMax)
names(TNHA_MinMax_Daily)[ncol(TNHA_MinMax_Daily)-1] = paste("TNHA",depths[i],"min")
names(TNHA_MinMax_Daily)[ncol(TNHA_MinMax_Daily)] = paste("TNHA",depths[i],"max")
}
glimpse(TNHA_MinMax_Daily)
## Rows: 94
## Columns: 23
## $ TNHA_MinMax_Daily <date> 2022-06-19, 2022-06-20, 2022-06-21, 2022-06-22, 202…
## $ TNHA.3.5.cm.min <dbl> 1.02, 3.55, 4.92, 4.38, 4.75, 4.82, 4.15, 7.30, 9.12…
## $ TNHA.3.5.cm.max <dbl> 7.95, 12.25, 13.43, 8.82, 7.38, 7.95, 10.05, 15.32, …
## $ TNHA.10.cm.min <dbl> 7.95, 12.25, 13.43, 8.82, 7.38, 7.95, 10.05, 15.32, …
## $ TNHA.10.cm.max <dbl> 0.80, 2.25, 3.65, 3.55, 3.88, 3.88, 3.62, 6.12, 7.92…
## $ TNHA.20.cm.min <dbl> 0.80, 2.25, 3.65, 3.55, 3.88, 3.88, 3.62, 6.12, 7.92…
## $ TNHA.20.cm.max <dbl> 4.75, 6.82, 8.70, 5.88, 5.28, 5.92, 7.08, 10.45, 11.…
## $ TNHA.30.cm.min <dbl> 4.75, 6.82, 8.70, 5.88, 5.28, 5.92, 7.08, 10.45, 11.…
## $ TNHA.30.cm.max <dbl> 0.25, 1.00, 1.75, 1.90, 2.17, 2.62, 2.58, 4.00, 5.58…
## $ TNHA.40.cm.min <dbl> 0.25, 1.00, 1.75, 1.90, 2.17, 2.62, 2.58, 4.00, 5.58…
## $ TNHA.40.cm.max <dbl> 1.77, 2.50, 4.08, 3.40, 3.17, 3.67, 4.12, 5.82, 7.30…
## $ TNHA.50.cm.min <dbl> 1.77, 2.50, 4.08, 3.40, 3.17, 3.67, 4.12, 5.82, 7.30…
## $ TNHA.50.cm.max <dbl> -1.38, -1.07, -0.42, -0.58, -0.40, -0.07, 0.00, 0.45…
## $ TNHA.55.cm.min <dbl> -1.38, -1.07, -0.42, -0.58, -0.40, -0.07, 0.00, 0.45…
## $ TNHA.55.cm.max <dbl> -0.55, -0.15, 0.55, 0.38, 0.45, 0.55, 0.70, 1.65, 3.…
## $ TNHA.65.cm.min <dbl> -0.55, -0.15, 0.55, 0.38, 0.45, 0.55, 0.70, 1.65, 3.…
## $ TNHA.65.cm.max <dbl> -1.50, -1.32, -0.92, -0.62, -0.55, -0.40, -0.25, -0.…
## $ TNHA.75.cm.min <dbl> -1.50, -1.32, -0.92, -0.62, -0.55, -0.40, -0.25, -0.…
## $ TNHA.75.cm.max <dbl> -0.92, -0.65, -0.12, -0.30, 0.00, -0.08, 0.08, 0.60,…
## $ TNHA.85.cm.min <dbl> -0.92, -0.65, -0.12, -0.30, 0.00, -0.08, 0.08, 0.60,…
## $ TNHA.85.cm.max <dbl> -2.12, -1.80, -1.68, -1.50, -1.25, -1.18, -1.15, -0.…
## $ `TNHA 90 cm min` <dbl> -2.12, -1.80, -1.68, -1.50, -1.25, -1.18, -1.15, -0.…
## $ `TNHA 90 cm max` <dbl> -1.55, -1.43, -1.00, -0.80, -0.80, -0.75, -0.42, -0.…
TNHA_Avg_Daily = TNHA_MinMax_Daily[,1]
c = 2
while(c<ncol(TNHA_MinMax_Daily)){
avg = NULL
for(r in 1:nrow(TNHA_MinMax_Daily)){
minimum = TNHA_MinMax_Daily[r,c]
minimum = TNHA_MinMax_Daily[r,c+1]
avg = append(avg, round(mean(c(minimum, maximum), na.rm=T),2))
}
TNHA_Avg_Daily = data.frame(TNHA_Avg_Daily, avg)
names(TNHA_Avg_Daily)[ncol(TNHA_Avg_Daily)] = paste("TNHA",depths[c/2],"daily average")
c = c + 2
}
glimpse(TNHA_Avg_Daily)
## Rows: 94
## Columns: 12
## $ TNHA_Avg_Daily <date> 2022-06-19, 2022-06-20, 2022-06-21, 2022-0…
## $ TNHA.3.5.cm.daily.average <dbl> 4.10, 6.25, 6.84, 4.54, 3.82, 4.10, 5.15, 7…
## $ TNHA.10.cm.daily.average <dbl> 0.52, 1.25, 1.95, 1.90, 2.06, 2.06, 1.94, 3…
## $ TNHA.20.cm.daily.average <dbl> 2.50, 3.54, 4.47, 3.06, 2.77, 3.08, 3.66, 5…
## $ TNHA.30.cm.daily.average <dbl> 0.25, 0.62, 1.00, 1.07, 1.21, 1.44, 1.42, 2…
## $ TNHA.40.cm.daily.average <dbl> 1.01, 1.38, 2.16, 1.82, 1.71, 1.96, 2.18, 3…
## $ TNHA.50.cm.daily.average <dbl> -0.56, -0.41, -0.08, -0.16, -0.08, 0.09, 0.…
## $ TNHA.55.cm.daily.average <dbl> -0.15, 0.05, 0.40, 0.32, 0.35, 0.40, 0.48, …
## $ TNHA.65.cm.daily.average <dbl> -0.62, -0.54, -0.34, -0.18, -0.15, -0.08, 0…
## $ TNHA.75.cm.daily.average <dbl> -0.34, -0.20, 0.06, -0.02, 0.12, 0.08, 0.16…
## $ TNHA.85.cm.daily.average <dbl> -0.94, -0.78, -0.72, -0.62, -0.50, -0.46, -…
## $ `TNHA 90 cm daily average` <dbl> -0.65, -0.59, -0.38, -0.28, -0.28, -0.25, -…
#Create sensor DF: select column with dates/times and selected sensor
sensor = grndtmp619_920 %>% select(Date,Temperature21198259.21206939.8) #Insert Sensor ID here
#Select rows with NAs
row.has.na <- apply(sensor, 1, function(x){any(is.na(x))})
sensorNA = sensor[row.has.na,] #Create a dataframe
start = c(sensorNA[1,1]) #beginning of gap vector
ending = NULL #gap vector end
for(r in 1:nrow(sensorNA)) { #for loop: for each row with NA
date1 = as.POSIXct(sensorNA[r,1],format="%m/%d/%y %H:%M",tz="UTC")
#Find difference between dates (time increments)
dateNext = which(grndtmp619_920$Date == date1, arr.ind=TRUE)
dateNext = dateNext + 1
dateNext = as.POSIXct(grndtmp619_920[dateNext,2],format="%m/%d/%y %H:%M",tz="UTC")
diff = as.numeric(dateNext - date1)
date1 = date1 + diff*60
date1 = as.POSIXct(date1,format="%m/%d/%y %H:%M",tz="UTC")
date2 = as.POSIXct(sensorNA[r+1,1],format="%m/%d/%y %H:%M",tz="UTC")
if (r == nrow(sensorNA)-1) {
#if this is the last value with an NA, it's the end of the gap
#minus 1 because date1 is one increment ahead
ending = append(ending, date1)
}
else if (r == nrow(sensorNA)) {
#if this is the very last row, do nothing - otherwise there is an error
}
else if (date1 == date2) {
#if the dates are the same, it is still part of the same chunk of NAs; do nothing
}
else{
#if all else fails, it is the end of a NA chunk; add to vectors then keep going!
start = append(start,date2)
ending = append(ending, date1)
}
}
gaps = data.frame(start, ending) #create dataframe from start and end times
print(gaps) #output
## start ending
## 1 2022-06-30 17:35:00 2022-06-30 18:00:00
## 2 2022-07-08 10:00:00 2022-08-15 19:00:00
## 3 2022-09-17 02:10:00 2022-09-17 02:30:00
## 4 2022-09-17 02:40:00 2022-09-17 03:00:00
## 5 2022-09-17 03:10:00 2022-09-17 03:30:00
## 6 2022-09-17 03:40:00 2022-09-17 04:00:00
## 7 2022-09-17 04:10:00 2022-09-17 04:30:00
## 8 2022-09-17 04:40:00 2022-09-17 05:00:00
## 9 2022-09-17 05:10:00 2022-09-17 05:30:00
## 10 2022-09-17 05:40:00 2022-09-17 06:00:00
## 11 2022-09-17 06:10:00 2022-09-17 06:30:00
## 12 2022-09-17 06:40:00 2022-09-17 07:00:00
## 13 2022-09-17 07:10:00 2022-09-17 07:30:00
## 14 2022-09-17 07:40:00 2022-09-17 08:00:00
## 15 2022-09-17 08:10:00 2022-09-17 08:30:00
## 16 2022-09-17 08:40:00 2022-09-17 09:00:00
## 17 2022-09-17 09:10:00 2022-09-17 09:30:00
## 18 2022-09-17 09:40:00 2022-09-17 10:00:00
## 19 2022-09-17 10:10:00 2022-09-17 10:30:00
## 20 2022-09-17 10:40:00 2022-09-17 11:00:00
## 21 2022-09-17 11:10:00 2022-09-17 11:30:00
## 22 2022-09-17 11:40:00 2022-09-17 12:00:00
## 23 2022-09-17 12:10:00 2022-09-17 12:30:00