Brief

Short description of this notebook’s purpose.

Created

2026-03-25 10:37:59.272629

Last Rendered

2026-03-25

TODO

TC1

Load Panel and Thermal Data

The panel data is compiled and averaged at ‘Derek Synology/MPJ Ref Panel Data/merge_panel_data.R’ which outputs panels_avg_temps_DATESAVED.csv

panel_dat <- read_csv(here("data/raw/panels_avg_temps_20260323.csv"))

## Rows: 33121 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (4): tc1, tc2, tc3_close, tc3_far
## dttm (1): timestamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

summary(panel_dat)

##    timestamp                        tc1              tc2         
##  Min.   :2025-03-27 13:30:00   Min.   :-9.961   Min.   :-10.659  
##  1st Qu.:2025-06-22 19:15:00   1st Qu.: 8.292   1st Qu.:  6.874  
##  Median :2025-09-17 01:15:00   Median :15.583   Median : 14.537  
##  Mean   :2025-09-17 16:03:26   Mean   :16.300   Mean   : 16.498  
##  3rd Qu.:2025-12-12 07:15:00   3rd Qu.:24.404   3rd Qu.: 25.621  
##  Max.   :2026-03-17 13:15:00   Max.   :47.246   Max.   : 50.208  
##                                NA's   :1908     NA's   :4        
##    tc3_close          tc3_far       
##  Min.   :-13.977   Min.   :-15.033  
##  1st Qu.:  4.317   1st Qu.:  4.425  
##  Median : 12.295   Median : 12.160  
##  Mean   : 13.490   Mean   : 13.577  
##  3rd Qu.: 20.593   3rd Qu.: 20.904  
##  Max.   : 48.278   Max.   : 48.745  
##  NA's   :1156      NA's   :1158

tc1_thermal <- read_csv("/Users/derekkober/Documents/School/UNM/Thesis/code/Python/correcTIR-main/output/tc1_processed.csv") |> 
  clean_names() |> 
  mutate(
    timestamp = round_date(timestamp, "15 minutes"))

## Rows: 12244 Columns: 260
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr    (2): filepath, filename_short
## dbl  (257): T_air, RH, sky_temp, LW_IN, VF_2, rho_v, tau, J20_mean_uncorrect...
## dttm   (1): Timestamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

colnames(tc1_thermal)

##   [1] "filepath"                      "filename_short"               
##   [3] "timestamp"                     "t_air"                        
##   [5] "rh"                            "sky_temp"                     
##   [7] "lw_in"                         "vf_2"                         
##   [9] "rho_v"                         "tau"                          
##  [11] "j20_mean_uncorrected"          "j20_mean_fully_corrected"     
##  [13] "j20_mean_tau1"                 "j20_mean_twin1"               
##  [15] "j20_mean_emiss1"               "j20_p1_uncorrected"           
##  [17] "j20_p1_fully_corrected"        "j20_p1_tau1"                  
##  [19] "j20_p1_twin1"                  "j20_p1_emiss1"                
##  [21] "j20_p5_uncorrected"            "j20_p5_fully_corrected"       
##  [23] "j20_p5_tau1"                   "j20_p5_twin1"                 
##  [25] "j20_p5_emiss1"                 "j20_p10_uncorrected"          
##  [27] "j20_p10_fully_corrected"       "j20_p10_tau1"                 
##  [29] "j20_p10_twin1"                 "j20_p10_emiss1"               
##  [31] "j20_p25_uncorrected"           "j20_p25_fully_corrected"      
##  [33] "j20_p25_tau1"                  "j20_p25_twin1"                
##  [35] "j20_p25_emiss1"                "j20_p50_uncorrected"          
##  [37] "j20_p50_fully_corrected"       "j20_p50_tau1"                 
##  [39] "j20_p50_twin1"                 "j20_p50_emiss1"               
##  [41] "j20_p75_uncorrected"           "j20_p75_fully_corrected"      
##  [43] "j20_p75_tau1"                  "j20_p75_twin1"                
##  [45] "j20_p75_emiss1"                "j20_p90_uncorrected"          
##  [47] "j20_p90_fully_corrected"       "j20_p90_tau1"                 
##  [49] "j20_p90_twin1"                 "j20_p90_emiss1"               
##  [51] "j20_p95_uncorrected"           "j20_p95_fully_corrected"      
##  [53] "j20_p95_tau1"                  "j20_p95_twin1"                
##  [55] "j20_p95_emiss1"                "j20_p99_uncorrected"          
##  [57] "j20_p99_fully_corrected"       "j20_p99_tau1"                 
##  [59] "j20_p99_twin1"                 "j20_p99_emiss1"               
##  [61] "j21_mean_uncorrected"          "j21_mean_fully_corrected"     
##  [63] "j21_mean_tau1"                 "j21_mean_twin1"               
##  [65] "j21_mean_emiss1"               "j21_p1_uncorrected"           
##  [67] "j21_p1_fully_corrected"        "j21_p1_tau1"                  
##  [69] "j21_p1_twin1"                  "j21_p1_emiss1"                
##  [71] "j21_p5_uncorrected"            "j21_p5_fully_corrected"       
##  [73] "j21_p5_tau1"                   "j21_p5_twin1"                 
##  [75] "j21_p5_emiss1"                 "j21_p10_uncorrected"          
##  [77] "j21_p10_fully_corrected"       "j21_p10_tau1"                 
##  [79] "j21_p10_twin1"                 "j21_p10_emiss1"               
##  [81] "j21_p25_uncorrected"           "j21_p25_fully_corrected"      
##  [83] "j21_p25_tau1"                  "j21_p25_twin1"                
##  [85] "j21_p25_emiss1"                "j21_p50_uncorrected"          
##  [87] "j21_p50_fully_corrected"       "j21_p50_tau1"                 
##  [89] "j21_p50_twin1"                 "j21_p50_emiss1"               
##  [91] "j21_p75_uncorrected"           "j21_p75_fully_corrected"      
##  [93] "j21_p75_tau1"                  "j21_p75_twin1"                
##  [95] "j21_p75_emiss1"                "j21_p90_uncorrected"          
##  [97] "j21_p90_fully_corrected"       "j21_p90_tau1"                 
##  [99] "j21_p90_twin1"                 "j21_p90_emiss1"               
## [101] "j21_p95_uncorrected"           "j21_p95_fully_corrected"      
## [103] "j21_p95_tau1"                  "j21_p95_twin1"                
## [105] "j21_p95_emiss1"                "j21_p99_uncorrected"          
## [107] "j21_p99_fully_corrected"       "j21_p99_tau1"                 
## [109] "j21_p99_twin1"                 "j21_p99_emiss1"               
## [111] "j4_mean_uncorrected"           "j4_mean_fully_corrected"      
## [113] "j4_mean_tau1"                  "j4_mean_twin1"                
## [115] "j4_mean_emiss1"                "j4_p1_uncorrected"            
## [117] "j4_p1_fully_corrected"         "j4_p1_tau1"                   
## [119] "j4_p1_twin1"                   "j4_p1_emiss1"                 
## [121] "j4_p5_uncorrected"             "j4_p5_fully_corrected"        
## [123] "j4_p5_tau1"                    "j4_p5_twin1"                  
## [125] "j4_p5_emiss1"                  "j4_p10_uncorrected"           
## [127] "j4_p10_fully_corrected"        "j4_p10_tau1"                  
## [129] "j4_p10_twin1"                  "j4_p10_emiss1"                
## [131] "j4_p25_uncorrected"            "j4_p25_fully_corrected"       
## [133] "j4_p25_tau1"                   "j4_p25_twin1"                 
## [135] "j4_p25_emiss1"                 "j4_p50_uncorrected"           
## [137] "j4_p50_fully_corrected"        "j4_p50_tau1"                  
## [139] "j4_p50_twin1"                  "j4_p50_emiss1"                
## [141] "j4_p75_uncorrected"            "j4_p75_fully_corrected"       
## [143] "j4_p75_tau1"                   "j4_p75_twin1"                 
## [145] "j4_p75_emiss1"                 "j4_p90_uncorrected"           
## [147] "j4_p90_fully_corrected"        "j4_p90_tau1"                  
## [149] "j4_p90_twin1"                  "j4_p90_emiss1"                
## [151] "j4_p95_uncorrected"            "j4_p95_fully_corrected"       
## [153] "j4_p95_tau1"                   "j4_p95_twin1"                 
## [155] "j4_p95_emiss1"                 "j4_p99_uncorrected"           
## [157] "j4_p99_fully_corrected"        "j4_p99_tau1"                  
## [159] "j4_p99_twin1"                  "j4_p99_emiss1"                
## [161] "p20_mean_uncorrected"          "p20_mean_fully_corrected"     
## [163] "p20_mean_tau1"                 "p20_mean_twin1"               
## [165] "p20_mean_emiss1"               "p20_p1_uncorrected"           
## [167] "p20_p1_fully_corrected"        "p20_p1_tau1"                  
## [169] "p20_p1_twin1"                  "p20_p1_emiss1"                
## [171] "p20_p5_uncorrected"            "p20_p5_fully_corrected"       
## [173] "p20_p5_tau1"                   "p20_p5_twin1"                 
## [175] "p20_p5_emiss1"                 "p20_p10_uncorrected"          
## [177] "p20_p10_fully_corrected"       "p20_p10_tau1"                 
## [179] "p20_p10_twin1"                 "p20_p10_emiss1"               
## [181] "p20_p25_uncorrected"           "p20_p25_fully_corrected"      
## [183] "p20_p25_tau1"                  "p20_p25_twin1"                
## [185] "p20_p25_emiss1"                "p20_p50_uncorrected"          
## [187] "p20_p50_fully_corrected"       "p20_p50_tau1"                 
## [189] "p20_p50_twin1"                 "p20_p50_emiss1"               
## [191] "p20_p75_uncorrected"           "p20_p75_fully_corrected"      
## [193] "p20_p75_tau1"                  "p20_p75_twin1"                
## [195] "p20_p75_emiss1"                "p20_p90_uncorrected"          
## [197] "p20_p90_fully_corrected"       "p20_p90_tau1"                 
## [199] "p20_p90_twin1"                 "p20_p90_emiss1"               
## [201] "p20_p95_uncorrected"           "p20_p95_fully_corrected"      
## [203] "p20_p95_tau1"                  "p20_p95_twin1"                
## [205] "p20_p95_emiss1"                "p20_p99_uncorrected"          
## [207] "p20_p99_fully_corrected"       "p20_p99_tau1"                 
## [209] "p20_p99_twin1"                 "p20_p99_emiss1"               
## [211] "tc1panel_mean_uncorrected"     "tc1panel_mean_fully_corrected"
## [213] "tc1panel_mean_tau1"            "tc1panel_mean_twin1"          
## [215] "tc1panel_mean_emiss1"          "tc1panel_p1_uncorrected"      
## [217] "tc1panel_p1_fully_corrected"   "tc1panel_p1_tau1"             
## [219] "tc1panel_p1_twin1"             "tc1panel_p1_emiss1"           
## [221] "tc1panel_p5_uncorrected"       "tc1panel_p5_fully_corrected"  
## [223] "tc1panel_p5_tau1"              "tc1panel_p5_twin1"            
## [225] "tc1panel_p5_emiss1"            "tc1panel_p10_uncorrected"     
## [227] "tc1panel_p10_fully_corrected"  "tc1panel_p10_tau1"            
## [229] "tc1panel_p10_twin1"            "tc1panel_p10_emiss1"          
## [231] "tc1panel_p25_uncorrected"      "tc1panel_p25_fully_corrected" 
## [233] "tc1panel_p25_tau1"             "tc1panel_p25_twin1"           
## [235] "tc1panel_p25_emiss1"           "tc1panel_p50_uncorrected"     
## [237] "tc1panel_p50_fully_corrected"  "tc1panel_p50_tau1"            
## [239] "tc1panel_p50_twin1"            "tc1panel_p50_emiss1"          
## [241] "tc1panel_p75_uncorrected"      "tc1panel_p75_fully_corrected" 
## [243] "tc1panel_p75_tau1"             "tc1panel_p75_twin1"           
## [245] "tc1panel_p75_emiss1"           "tc1panel_p90_uncorrected"     
## [247] "tc1panel_p90_fully_corrected"  "tc1panel_p90_tau1"            
## [249] "tc1panel_p90_twin1"            "tc1panel_p90_emiss1"          
## [251] "tc1panel_p95_uncorrected"      "tc1panel_p95_fully_corrected" 
## [253] "tc1panel_p95_tau1"             "tc1panel_p95_twin1"           
## [255] "tc1panel_p95_emiss1"           "tc1panel_p99_uncorrected"     
## [257] "tc1panel_p99_fully_corrected"  "tc1panel_p99_tau1"            
## [259] "tc1panel_p99_twin1"            "tc1panel_p99_emiss1"

Merge Panel and Thermal

tc1_panel <-  panel_dat |> 
  select(timestamp, tc1) |> 
  rename(
    tc1_panel_dat = tc1
  )

p <- tc1_panel |> 
  ggplot()+
  geom_line(aes(x = timestamp, y = tc1_panel_dat))

tc1_comp <- tc1_panel |>
  mutate(timestamp = ymd_hms(timestamp)) |>
  left_join(tc1_thermal, by = "timestamp")

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `timestamp = ymd_hms(timestamp)`.
## Caused by warning:
## !  343 failed to parse.

tc1_comp |>
  filter(tc1panel_mean_fully_corrected<70) |> 
  select(timestamp,tc1_panel_dat, tc1panel_mean_fully_corrected) |> 
  filter(month(timestamp)>9)

## # A tibble: 1,814 × 3
##    timestamp           tc1_panel_dat tc1panel_mean_fully_corrected
##    <dttm>                      <dbl>                         <dbl>
##  1 2025-10-03 11:30:00          33.5                          32.4
##  2 2025-10-03 11:45:00          33.6                          33.3
##  3 2025-10-03 12:00:00          33.0                          33.1
##  4 2025-10-03 12:15:00          33.3                          33.2
##  5 2025-10-03 12:30:00          33.9                          33.3
##  6 2025-10-03 12:45:00          32.3                          33.3
##  7 2025-10-03 13:00:00          30.9                          32.4
##  8 2025-10-03 13:15:00          29.9                          33.1
##  9 2025-10-03 13:30:00          29.6                          33.9
## 10 2025-10-03 13:45:00          31.0                          30.8
## # ℹ 1,804 more rows

p <- tc1_comp |> 
  ggplot(na.rm = FALSE)+
  geom_line(aes(x = timestamp, y = tc1_panel_dat), color = "red")+
  geom_line(aes(x = timestamp, y = tc1panel_mean_fully_corrected)) 

ggplotly(p)

tc1_comp <- tc1_comp |>
  mutate(
    month = lubridate::month(timestamp),
    time_of_day = format(timestamp, "%H:%M:%S"),
    hour = hour(timestamp),
    bias_tc1 = tc1panel_mean_fully_corrected - tc1_panel_dat
  )

tc1_comp |> 
  filter(tc1panel_mean_fully_corrected<70) |> 
  ggplot()+
  geom_point(aes(x = tc1panel_mean_fully_corrected, y = tc1_panel_dat, color = as.factor(hour)))+
  facet_wrap(~month, ncol = 1)

p <- tc1_comp |> 
  ggplot()+
  geom_line(aes(x = timestamp, y = bias_tc1))


ggplotly(
  tc1_comp |> 
    filter(tc1panel_mean_fully_corrected<70) |> 
    ggplot()+
    geom_line(aes(x = timestamp, y = bias_tc1))
)

Looking into 1hr offset

tc1_comp |> 
  filter(tc1panel_mean_fully_corrected<70) |> 
  ggplot()+
  geom_point(aes(x = tc1panel_mean_fully_corrected, y = tc1_panel_dat, color = as.factor(hour)))+
  facet_wrap(~month, ncol = 1)

tc1_comp |> 
  filter(tc1panel_mean_fully_corrected < 70) |> 
  ggplot() +
  geom_line(aes(x = timestamp, y = bias_tc1)) +
  geom_vline(
    xintercept = as.POSIXct("2025-11-02 02:00:00", tz = "America/Denver"),
    linetype = "dashed",
    color = "red"
  )

p <- tc1_comp |> 
  filter(tc1panel_mean_fully_corrected<70) |> 
  ggplot()+
  geom_line(aes(x = timestamp, y = tc1panel_mean_fully_corrected)) +
  geom_line(aes(x = timestamp, y = tc1_panel_dat), color = "red") 

ggplotly(p)

ggplotly(
  tc1_comp |> 
    filter(tc1panel_mean_fully_corrected<70) |> 
    ggplot()+
    geom_line(aes(x = timestamp, y = bias_tc1))
)

tc1_thermal_1hr <- read_csv("/Users/derekkober/Documents/School/UNM/Thesis/code/Python/correcTIR-main/output/tc1_processed.csv") |> 
  clean_names() |> 
  mutate(
    timestamp = round_date(timestamp, "15 minutes"),
    ts_og = timestamp,
    timestamp = timestamp - hours(1)
    )

## Rows: 12244 Columns: 260
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr    (2): filepath, filename_short
## dbl  (257): T_air, RH, sky_temp, LW_IN, VF_2, rho_v, tau, J20_mean_uncorrect...
## dttm   (1): Timestamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

tc1_comp_1hr <- tc1_panel |>
  mutate(timestamp = ymd_hms(timestamp)) |>
  left_join(tc1_thermal_1hr, by = "timestamp")

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `timestamp = ymd_hms(timestamp)`.
## Caused by warning:
## !  343 failed to parse.

tc1_comp_1hr |>
  filter(tc1panel_mean_fully_corrected<70) |> 
  select(timestamp,tc1_panel_dat, tc1panel_mean_fully_corrected) |> 
  filter(month(timestamp)>9)

## # A tibble: 1,814 × 3
##    timestamp           tc1_panel_dat tc1panel_mean_fully_corrected
##    <dttm>                      <dbl>                         <dbl>
##  1 2025-10-03 10:30:00          32.2                          32.4
##  2 2025-10-03 10:45:00          33.0                          33.3
##  3 2025-10-03 11:00:00          33.1                          33.1
##  4 2025-10-03 11:15:00          33.4                          33.2
##  5 2025-10-03 11:30:00          33.5                          33.3
##  6 2025-10-03 11:45:00          33.6                          33.3
##  7 2025-10-03 12:00:00          33.0                          32.4
##  8 2025-10-03 12:15:00          33.3                          33.1
##  9 2025-10-03 12:30:00          33.9                          33.9
## 10 2025-10-03 12:45:00          32.3                          30.8
## # ℹ 1,804 more rows

p <- tc1_comp_1hr |> 
  ggplot(na.rm = FALSE)+
  geom_line(aes(x = timestamp, y = tc1_panel_dat), color = "red")+
  geom_line(aes(x = timestamp, y = tc1panel_mean_fully_corrected)) 

ggplotly(p)

tc1_comp_1hr <- tc1_comp_1hr |>
  mutate(
    month = lubridate::month(timestamp),
    time_of_day = format(timestamp, "%H:%M:%S"),
    hour = hour(timestamp),
    bias_tc1 = tc1panel_mean_fully_corrected - tc1_panel_dat
  )

tc1_comp_1hr |> 
  filter(tc1panel_mean_fully_corrected<70) |> 
  ggplot()+
  geom_point(aes(x = tc1panel_mean_fully_corrected, y = tc1_panel_dat, color = as.factor(hour)))+
  facet_wrap(~month, ncol = 1)

p <- tc1_comp_1hr |> 
  ggplot()+
  geom_line(aes(x = timestamp, y = bias_tc1))

ggplotly(
  tc1_comp_1hr |> 
    filter(tc1panel_mean_fully_corrected<70) |> 
    ggplot()+
    geom_line(aes(x = timestamp, y = bias_tc1))
)

tc1_comp <- tc1_comp_1hr
tc1_thermal <- tc1_thermal_1hr

fixing september

start_date <- ymd("2025-09-18")
end_date   <- ymd("2025-09-26")

tc1_sept_off <- tc1_comp |> 
  select(timestamp,tc1_panel_dat, tc1panel_mean_fully_corrected) |> 
  filter(
    timestamp >= start_date,
    timestamp <= end_date)

write_csv(tc1_sept_off, file = here("data/segment.csv"))

p <- tc1_sept_off |> 
  ggplot(na.rm = FALSE)+
  geom_line(aes(x = timestamp, y = tc1_panel_dat), color = "red")+
  geom_line(aes(x = timestamp, y = tc1panel_mean_fully_corrected)) 


ggplotly(p)

# timecorrections <- read_csv(here("data/supp/september_mislabeledDays.csv")) |>
#   type_convert() |>
#   mutate(
#     peak_time_og_date = mdy(peak_time_og_date),
#     matched_panel_peak_date = mdy(matched_panel_peak_date),
#     sod_time_og_date = mdy(sod_time_og_date)
#   ) |>
#   mutate(
#     thermal_day_id = day_id,
#     thermal_peak_orig_timestamp =
#       as_datetime(peak_time_og_date) + peak_time_og_time - hours(1),
#     panel_peak_matched_timestamp =
#       as_datetime(matched_panel_peak_date) + matched_panel_peak_time - hours(1),
#     thermal_day_start_timestamp =
#       as_datetime(sod_time_og_date) + sod_time_og_time
#   ) |>
#   select(
#     thermal_day_id,
#     thermal_peak_orig_timestamp,
#     panel_peak_matched_timestamp,
#     thermal_day_start_timestamp
#   ) |>
#   na.omit()
# 
# saveRDS(timecorrections, file = here("data/supp/tc1_manual_day_peak_matches.rds"))

tc1_manual_day_peak_matches <- readRDS(file = here("data/supp/tc1_manual_day_peak_matches.rds"))

tc1_peak_matches <- readRDS(file = here("data/supp/tc1_manual_day_peak_matches.rds"))
timecorrections <- tc1_peak_matches |>
  arrange(thermal_day_start_timestamp) |>
  mutate(
    thermal_day_end_timestamp = lead(thermal_day_start_timestamp),
    shift = panel_peak_matched_timestamp - thermal_peak_orig_timestamp
  )

tc1_thermal_shifted <- tc1_thermal |>
  mutate(
    timestamp_shifted = timestamp
  )

for (i in seq_len(nrow(timecorrections) - 1)) {
  
  start_i <- timecorrections$thermal_day_start_timestamp[i]
  end_i   <- timecorrections$thermal_day_end_timestamp[i]
  shift_i <- timecorrections$shift[i]
  
  if (i == 1) {
    idx <- tc1_thermal_shifted$timestamp >= start_i &
      tc1_thermal_shifted$timestamp < end_i
  } else {
    idx <- tc1_thermal_shifted$timestamp > start_i &
      tc1_thermal_shifted$timestamp < end_i
  }
  
  tc1_thermal_shifted$timestamp_shifted[idx] <-
    tc1_thermal_shifted$timestamp[idx] + shift_i
}


timecorrections_check <- tc1_peak_matches |>
  arrange(thermal_day_start_timestamp) |>
  mutate(
    block_id = row_number(),
    thermal_day_end_timestamp = lead(thermal_day_start_timestamp),
    shift = panel_peak_matched_timestamp - thermal_peak_orig_timestamp
  ) |>
  filter(!is.na(thermal_day_end_timestamp)) |>
  mutate(
    shifted_start = thermal_day_start_timestamp + shift,
    shifted_end   = thermal_day_end_timestamp + shift
  ) |>
  select(
    block_id,
    thermal_day_start_timestamp,
    thermal_day_end_timestamp,
    thermal_peak_orig_timestamp,
    panel_peak_matched_timestamp,
    shift,
    shifted_start,
    shifted_end
  )

timecorrections_check

## # A tibble: 12 × 8
##    block_id thermal_day_start_ti…¹ thermal_day_end_time…² thermal_peak_orig_ti…³
##       <int> <dttm>                 <dttm>                 <dttm>                
##  1        1 2025-09-18 21:15:00    2025-09-19 10:00:00    2025-09-19 02:00:00   
##  2        2 2025-09-19 10:00:00    2025-09-19 21:30:00    2025-09-19 14:15:00   
##  3        3 2025-09-19 21:30:00    2025-09-20 10:00:00    2025-09-20 01:45:00   
##  4        4 2025-09-20 10:00:00    2025-09-20 21:15:00    2025-09-20 12:45:00   
##  5        5 2025-09-20 21:15:00    2025-09-21 10:00:00    2025-09-21 03:15:00   
##  6        6 2025-09-21 10:00:00    2025-09-21 21:15:00    2025-09-21 16:15:00   
##  7        7 2025-09-21 21:15:00    2025-09-22 10:00:00    2025-09-22 00:00:00   
##  8        8 2025-09-22 10:00:00    2025-09-22 21:15:00    2025-09-22 18:00:00   
##  9        9 2025-09-22 21:15:00    2025-09-23 10:00:00    2025-09-23 00:45:00   
## 10       10 2025-09-23 10:00:00    2025-09-23 21:15:00    2025-09-23 12:15:00   
## 11       11 2025-09-23 21:15:00    2025-09-24 10:00:00    2025-09-23 23:45:00   
## 12       12 2025-09-24 10:00:00    2025-09-24 21:15:00    2025-09-24 13:30:00   
## # ℹ abbreviated names: ¹thermal_day_start_timestamp,
## #   ²thermal_day_end_timestamp, ³thermal_peak_orig_timestamp
## # ℹ 4 more variables: panel_peak_matched_timestamp <dttm>, shift <drtn>,
## #   shifted_start <dttm>, shifted_end <dttm>

timecorrections_check |>
  mutate(
    next_shifted_start = lead(shifted_start),
    gap_mins = as.numeric(next_shifted_start - shifted_end, units = "mins")
  )

## # A tibble: 12 × 10
##    block_id thermal_day_start_ti…¹ thermal_day_end_time…² thermal_peak_orig_ti…³
##       <int> <dttm>                 <dttm>                 <dttm>                
##  1        1 2025-09-18 21:15:00    2025-09-19 10:00:00    2025-09-19 02:00:00   
##  2        2 2025-09-19 10:00:00    2025-09-19 21:30:00    2025-09-19 14:15:00   
##  3        3 2025-09-19 21:30:00    2025-09-20 10:00:00    2025-09-20 01:45:00   
##  4        4 2025-09-20 10:00:00    2025-09-20 21:15:00    2025-09-20 12:45:00   
##  5        5 2025-09-20 21:15:00    2025-09-21 10:00:00    2025-09-21 03:15:00   
##  6        6 2025-09-21 10:00:00    2025-09-21 21:15:00    2025-09-21 16:15:00   
##  7        7 2025-09-21 21:15:00    2025-09-22 10:00:00    2025-09-22 00:00:00   
##  8        8 2025-09-22 10:00:00    2025-09-22 21:15:00    2025-09-22 18:00:00   
##  9        9 2025-09-22 21:15:00    2025-09-23 10:00:00    2025-09-23 00:45:00   
## 10       10 2025-09-23 10:00:00    2025-09-23 21:15:00    2025-09-23 12:15:00   
## 11       11 2025-09-23 21:15:00    2025-09-24 10:00:00    2025-09-23 23:45:00   
## 12       12 2025-09-24 10:00:00    2025-09-24 21:15:00    2025-09-24 13:30:00   
## # ℹ abbreviated names: ¹thermal_day_start_timestamp,
## #   ²thermal_day_end_timestamp, ³thermal_peak_orig_timestamp
## # ℹ 6 more variables: panel_peak_matched_timestamp <dttm>, shift <drtn>,
## #   shifted_start <dttm>, shifted_end <dttm>, next_shifted_start <dttm>,
## #   gap_mins <dbl>

p <- ggplot() +
  geom_line(data = tc1_panel,
            aes(timestamp, tc1_panel_dat),
            color = "red") +
  geom_line(data = tc1_thermal_shifted,
            aes(timestamp_shifted, tc1panel_mean_fully_corrected),
            color = "black")

ggplotly(p)

start_date <- ymd("2025-09-25")
end_date   <- ymd("2025-09-26")

fff_thermal_shifted_test <-tc1_thermal_shifted |> 
  mutate(timestamp = timestamp_shifted) |> 
  select(timestamp, tc1panel_mean_fully_corrected)|>
  filter(
    timestamp >= start_date,
    timestamp <= end_date)

fff_thermal_shifted_test

## # A tibble: 100 × 2
##    timestamp           tc1panel_mean_fully_corrected
##    <dttm>                                      <dbl>
##  1 2025-09-25 05:45:00                          15.3
##  2 2025-09-25 06:00:00                          18.1
##  3 2025-09-25 06:15:00                          20.2
##  4 2025-09-25 06:30:00                          21.8
##  5 2025-09-25 06:45:00                          23.2
##  6 2025-09-25 07:00:00                          24.0
##  7 2025-09-25 07:15:00                          25.2
##  8 2025-09-25 07:30:00                          26.1
##  9 2025-09-25 07:45:00                          27.4
## 10 2025-09-25 08:00:00                          28.2
## # ℹ 90 more rows

show <- tc1_panel |>
  filter(
    timestamp >= start_date,
    timestamp <= end_date)

show

## # A tibble: 97 × 2
##    timestamp           tc1_panel_dat
##    <dttm>                      <dbl>
##  1 2025-09-25 00:00:00          9.04
##  2 2025-09-25 00:15:00          9.02
##  3 2025-09-25 00:30:00          8.54
##  4 2025-09-25 00:45:00          8.08
##  5 2025-09-25 01:00:00          8.23
##  6 2025-09-25 01:15:00          8.57
##  7 2025-09-25 01:30:00          8.73
##  8 2025-09-25 01:45:00          8.80
##  9 2025-09-25 02:00:00          8.81
## 10 2025-09-25 02:15:00          8.74
## # ℹ 87 more rows

fff_tc1_comp <- tc1_panel |>
  left_join(fff_thermal_shifted_test, by = "timestamp") |> 
  select(timestamp, tc1_panel_dat, tc1panel_mean_fully_corrected)|>
  filter(
    timestamp >= start_date,
    timestamp <= end_date)

fff_tc1_comp

## # A tibble: 130 × 3
##    timestamp           tc1_panel_dat tc1panel_mean_fully_corrected
##    <dttm>                      <dbl>                         <dbl>
##  1 2025-09-25 00:00:00          9.04                          36.5
##  2 2025-09-25 00:15:00          9.02                          37.6
##  3 2025-09-25 00:30:00          8.54                          37.8
##  4 2025-09-25 00:45:00          8.08                          38.1
##  5 2025-09-25 01:00:00          8.23                          38.7
##  6 2025-09-25 01:15:00          8.57                          39.0
##  7 2025-09-25 01:30:00          8.73                          39.4
##  8 2025-09-25 01:45:00          8.80                          38.7
##  9 2025-09-25 02:00:00          8.81                          38.4
## 10 2025-09-25 02:15:00          8.74                          38.0
## # ℹ 120 more rows

p <- tc1_comp |> 
  ggplot()+
  geom_line(aes(x = timestamp, y = bias_tc1))

ggplotly(p)

start_date <- ymd("2025-09-18")
end_date   <- ymd("2025-10-04")

fuck_this <- tc1_thermal_shifted |>
  select(timestamp_shifted, tc1panel_mean_fully_corrected) |>
  filter(
    timestamp_shifted >= start_date,
    timestamp_shifted <= end_date)

Can we correct the high values?

TC2

tc2_thermal <- read_csv("/Users/derekkober/Documents/School/UNM/Thesis/code/Python/correcTIR-main/output/tc2_processed.csv") |> 
  clean_names() |> 
  mutate(
    timestamp = round_date(timestamp, "15 minutes"),
    ts_og = timestamp,
    timestamp = timestamp - hours(1)
    )

## Rows: 11190 Columns: 260
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr    (2): filepath, filename_short
## dbl  (257): T_air, RH, sky_temp, LW_IN, VF_2, rho_v, tau, J22_mean_uncorrect...
## dttm   (1): Timestamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

colnames(tc2_thermal)

##   [1] "filepath"                      "filename_short"               
##   [3] "timestamp"                     "t_air"                        
##   [5] "rh"                            "sky_temp"                     
##   [7] "lw_in"                         "vf_2"                         
##   [9] "rho_v"                         "tau"                          
##  [11] "j22_mean_uncorrected"          "j22_mean_fully_corrected"     
##  [13] "j22_mean_tau1"                 "j22_mean_twin1"               
##  [15] "j22_mean_emiss1"               "j22_p1_uncorrected"           
##  [17] "j22_p1_fully_corrected"        "j22_p1_tau1"                  
##  [19] "j22_p1_twin1"                  "j22_p1_emiss1"                
##  [21] "j22_p5_uncorrected"            "j22_p5_fully_corrected"       
##  [23] "j22_p5_tau1"                   "j22_p5_twin1"                 
##  [25] "j22_p5_emiss1"                 "j22_p10_uncorrected"          
##  [27] "j22_p10_fully_corrected"       "j22_p10_tau1"                 
##  [29] "j22_p10_twin1"                 "j22_p10_emiss1"               
##  [31] "j22_p25_uncorrected"           "j22_p25_fully_corrected"      
##  [33] "j22_p25_tau1"                  "j22_p25_twin1"                
##  [35] "j22_p25_emiss1"                "j22_p50_uncorrected"          
##  [37] "j22_p50_fully_corrected"       "j22_p50_tau1"                 
##  [39] "j22_p50_twin1"                 "j22_p50_emiss1"               
##  [41] "j22_p75_uncorrected"           "j22_p75_fully_corrected"      
##  [43] "j22_p75_tau1"                  "j22_p75_twin1"                
##  [45] "j22_p75_emiss1"                "j22_p90_uncorrected"          
##  [47] "j22_p90_fully_corrected"       "j22_p90_tau1"                 
##  [49] "j22_p90_twin1"                 "j22_p90_emiss1"               
##  [51] "j22_p95_uncorrected"           "j22_p95_fully_corrected"      
##  [53] "j22_p95_tau1"                  "j22_p95_twin1"                
##  [55] "j22_p95_emiss1"                "j22_p99_uncorrected"          
##  [57] "j22_p99_fully_corrected"       "j22_p99_tau1"                 
##  [59] "j22_p99_twin1"                 "j22_p99_emiss1"               
##  [61] "j23_mean_uncorrected"          "j23_mean_fully_corrected"     
##  [63] "j23_mean_tau1"                 "j23_mean_twin1"               
##  [65] "j23_mean_emiss1"               "j23_p1_uncorrected"           
##  [67] "j23_p1_fully_corrected"        "j23_p1_tau1"                  
##  [69] "j23_p1_twin1"                  "j23_p1_emiss1"                
##  [71] "j23_p5_uncorrected"            "j23_p5_fully_corrected"       
##  [73] "j23_p5_tau1"                   "j23_p5_twin1"                 
##  [75] "j23_p5_emiss1"                 "j23_p10_uncorrected"          
##  [77] "j23_p10_fully_corrected"       "j23_p10_tau1"                 
##  [79] "j23_p10_twin1"                 "j23_p10_emiss1"               
##  [81] "j23_p25_uncorrected"           "j23_p25_fully_corrected"      
##  [83] "j23_p25_tau1"                  "j23_p25_twin1"                
##  [85] "j23_p25_emiss1"                "j23_p50_uncorrected"          
##  [87] "j23_p50_fully_corrected"       "j23_p50_tau1"                 
##  [89] "j23_p50_twin1"                 "j23_p50_emiss1"               
##  [91] "j23_p75_uncorrected"           "j23_p75_fully_corrected"      
##  [93] "j23_p75_tau1"                  "j23_p75_twin1"                
##  [95] "j23_p75_emiss1"                "j23_p90_uncorrected"          
##  [97] "j23_p90_fully_corrected"       "j23_p90_tau1"                 
##  [99] "j23_p90_twin1"                 "j23_p90_emiss1"               
## [101] "j23_p95_uncorrected"           "j23_p95_fully_corrected"      
## [103] "j23_p95_tau1"                  "j23_p95_twin1"                
## [105] "j23_p95_emiss1"                "j23_p99_uncorrected"          
## [107] "j23_p99_fully_corrected"       "j23_p99_tau1"                 
## [109] "j23_p99_twin1"                 "j23_p99_emiss1"               
## [111] "p2_mean_uncorrected"           "p2_mean_fully_corrected"      
## [113] "p2_mean_tau1"                  "p2_mean_twin1"                
## [115] "p2_mean_emiss1"                "p2_p1_uncorrected"            
## [117] "p2_p1_fully_corrected"         "p2_p1_tau1"                   
## [119] "p2_p1_twin1"                   "p2_p1_emiss1"                 
## [121] "p2_p5_uncorrected"             "p2_p5_fully_corrected"        
## [123] "p2_p5_tau1"                    "p2_p5_twin1"                  
## [125] "p2_p5_emiss1"                  "p2_p10_uncorrected"           
## [127] "p2_p10_fully_corrected"        "p2_p10_tau1"                  
## [129] "p2_p10_twin1"                  "p2_p10_emiss1"                
## [131] "p2_p25_uncorrected"            "p2_p25_fully_corrected"       
## [133] "p2_p25_tau1"                   "p2_p25_twin1"                 
## [135] "p2_p25_emiss1"                 "p2_p50_uncorrected"           
## [137] "p2_p50_fully_corrected"        "p2_p50_tau1"                  
## [139] "p2_p50_twin1"                  "p2_p50_emiss1"                
## [141] "p2_p75_uncorrected"            "p2_p75_fully_corrected"       
## [143] "p2_p75_tau1"                   "p2_p75_twin1"                 
## [145] "p2_p75_emiss1"                 "p2_p90_uncorrected"           
## [147] "p2_p90_fully_corrected"        "p2_p90_tau1"                  
## [149] "p2_p90_twin1"                  "p2_p90_emiss1"                
## [151] "p2_p95_uncorrected"            "p2_p95_fully_corrected"       
## [153] "p2_p95_tau1"                   "p2_p95_twin1"                 
## [155] "p2_p95_emiss1"                 "p2_p99_uncorrected"           
## [157] "p2_p99_fully_corrected"        "p2_p99_tau1"                  
## [159] "p2_p99_twin1"                  "p2_p99_emiss1"                
## [161] "p21_mean_uncorrected"          "p21_mean_fully_corrected"     
## [163] "p21_mean_tau1"                 "p21_mean_twin1"               
## [165] "p21_mean_emiss1"               "p21_p1_uncorrected"           
## [167] "p21_p1_fully_corrected"        "p21_p1_tau1"                  
## [169] "p21_p1_twin1"                  "p21_p1_emiss1"                
## [171] "p21_p5_uncorrected"            "p21_p5_fully_corrected"       
## [173] "p21_p5_tau1"                   "p21_p5_twin1"                 
## [175] "p21_p5_emiss1"                 "p21_p10_uncorrected"          
## [177] "p21_p10_fully_corrected"       "p21_p10_tau1"                 
## [179] "p21_p10_twin1"                 "p21_p10_emiss1"               
## [181] "p21_p25_uncorrected"           "p21_p25_fully_corrected"      
## [183] "p21_p25_tau1"                  "p21_p25_twin1"                
## [185] "p21_p25_emiss1"                "p21_p50_uncorrected"          
## [187] "p21_p50_fully_corrected"       "p21_p50_tau1"                 
## [189] "p21_p50_twin1"                 "p21_p50_emiss1"               
## [191] "p21_p75_uncorrected"           "p21_p75_fully_corrected"      
## [193] "p21_p75_tau1"                  "p21_p75_twin1"                
## [195] "p21_p75_emiss1"                "p21_p90_uncorrected"          
## [197] "p21_p90_fully_corrected"       "p21_p90_tau1"                 
## [199] "p21_p90_twin1"                 "p21_p90_emiss1"               
## [201] "p21_p95_uncorrected"           "p21_p95_fully_corrected"      
## [203] "p21_p95_tau1"                  "p21_p95_twin1"                
## [205] "p21_p95_emiss1"                "p21_p99_uncorrected"          
## [207] "p21_p99_fully_corrected"       "p21_p99_tau1"                 
## [209] "p21_p99_twin1"                 "p21_p99_emiss1"               
## [211] "tc2panel_mean_uncorrected"     "tc2panel_mean_fully_corrected"
## [213] "tc2panel_mean_tau1"            "tc2panel_mean_twin1"          
## [215] "tc2panel_mean_emiss1"          "tc2panel_p1_uncorrected"      
## [217] "tc2panel_p1_fully_corrected"   "tc2panel_p1_tau1"             
## [219] "tc2panel_p1_twin1"             "tc2panel_p1_emiss1"           
## [221] "tc2panel_p5_uncorrected"       "tc2panel_p5_fully_corrected"  
## [223] "tc2panel_p5_tau1"              "tc2panel_p5_twin1"            
## [225] "tc2panel_p5_emiss1"            "tc2panel_p10_uncorrected"     
## [227] "tc2panel_p10_fully_corrected"  "tc2panel_p10_tau1"            
## [229] "tc2panel_p10_twin1"            "tc2panel_p10_emiss1"          
## [231] "tc2panel_p25_uncorrected"      "tc2panel_p25_fully_corrected" 
## [233] "tc2panel_p25_tau1"             "tc2panel_p25_twin1"           
## [235] "tc2panel_p25_emiss1"           "tc2panel_p50_uncorrected"     
## [237] "tc2panel_p50_fully_corrected"  "tc2panel_p50_tau1"            
## [239] "tc2panel_p50_twin1"            "tc2panel_p50_emiss1"          
## [241] "tc2panel_p75_uncorrected"      "tc2panel_p75_fully_corrected" 
## [243] "tc2panel_p75_tau1"             "tc2panel_p75_twin1"           
## [245] "tc2panel_p75_emiss1"           "tc2panel_p90_uncorrected"     
## [247] "tc2panel_p90_fully_corrected"  "tc2panel_p90_tau1"            
## [249] "tc2panel_p90_twin1"            "tc2panel_p90_emiss1"          
## [251] "tc2panel_p95_uncorrected"      "tc2panel_p95_fully_corrected" 
## [253] "tc2panel_p95_tau1"             "tc2panel_p95_twin1"           
## [255] "tc2panel_p95_emiss1"           "tc2panel_p99_uncorrected"     
## [257] "tc2panel_p99_fully_corrected"  "tc2panel_p99_tau1"            
## [259] "tc2panel_p99_twin1"            "tc2panel_p99_emiss1"          
## [261] "ts_og"

tc2_panel <-  panel_dat |> 
  select(timestamp, tc2) |> 
  na.omit()

tc2_comp <- tc2_thermal |>
  mutate(timestamp = ymd_hms(timestamp)) |>
  left_join(tc2_panel, by = "timestamp")
 
tc2_comp <- tc2_comp |>
  mutate(
    month = lubridate::month(timestamp),
    time_of_day = format(timestamp, "%H:%M:%S"),
    hour = hour(timestamp),
    bias_tc2 = tc2panel_mean_fully_corrected - tc2
  )

tc2_comp |> 
  filter(tc2panel_mean_fully_corrected<70) |> 
  ggplot()+
  geom_point(aes(x = tc2panel_mean_fully_corrected, y = tc2, color = as.factor(hour)))+
  facet_wrap(~month, ncol = 1)

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

tc2_comp |> 
  filter(tc2panel_mean_fully_corrected<70) |> 
  ggplot()+
  geom_line(aes(x = timestamp, y = bias_tc2))

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

p <- tc2_comp |> 
  filter(tc2panel_mean_fully_corrected<70) |> 
  ggplot()+
  geom_line(aes(x = timestamp, y = tc2panel_mean_fully_corrected)) +
  geom_line(aes(x = timestamp, y = tc2), color = "red")

ggplotly(p)

ggplotly(
  tc2_comp |> 
    filter(tc2panel_mean_fully_corrected<70) |> 
    ggplot()+
    geom_line(aes(x = timestamp, y = bias_tc2))
)

lag analysis (trash?)

library(tidyverse)
library(lubridate)
library(janitor)
library(plotly)

#--------------------------------------------------
# Helper: robust timestamp parser
#--------------------------------------------------
fix_timestamp <- function(x, tz = "America/Denver") {
  if (inherits(x, c("POSIXct", "POSIXt"))) {
    return(with_tz(x, tzone = tz))
  }
  
  x_chr <- as.character(x)
  
  parsed <- suppressWarnings(
    parse_date_time(
      x_chr,
      orders = c(
        "Ymd HMS", "Ymd HM", "YmdHMS", "YmdHM",
        "ymd HMS", "ymd HM",
        "mdY HMS", "mdY HM",
        "m/d/Y H:M:S", "m/d/Y H:M"
      ),
      tz = tz
    )
  )
  
  parsed
}

#--------------------------------------------------
# Helper function: test multiple time shifts
# lag_h can be fractional
#--------------------------------------------------
check_time_shift <- function(thermal_df, panel_df,
                             thermal_col,
                             panel_col,
                             lag_hours = seq(-2, 2, by = 0.25),
                             temp_max = 70,
                             tz = "America/Denver") {
  
  thermal_df <- thermal_df |>
    mutate(timestamp = fix_timestamp(timestamp, tz = tz))
  
  panel_df <- panel_df |>
    mutate(timestamp = fix_timestamp(timestamp, tz = tz))
  
  map_dfr(lag_hours, function(lag_h) {
    
    comp <- thermal_df |>
      mutate(
        timestamp_shift = timestamp + dseconds(lag_h * 3600)
      ) |>
      left_join(panel_df, by = c("timestamp_shift" = "timestamp")) |>
      filter(
        !is.na(timestamp_shift),
        !is.na(.data[[thermal_col]]),
        !is.na(.data[[panel_col]]),
        .data[[thermal_col]] < temp_max
      ) |>
      mutate(
        diff = .data[[thermal_col]] - .data[[panel_col]]
      )
    
    tibble(
      lag_hours = lag_h,
      n = nrow(comp),
      mean_bias = ifelse(nrow(comp) > 0, mean(comp$diff, na.rm = TRUE), NA_real_),
      sd_bias   = ifelse(nrow(comp) > 1, sd(comp$diff, na.rm = TRUE), NA_real_),
      mae       = ifelse(nrow(comp) > 0, mean(abs(comp$diff), na.rm = TRUE), NA_real_),
      rmse      = ifelse(nrow(comp) > 0, sqrt(mean(comp$diff^2, na.rm = TRUE)), NA_real_),
      cor       = ifelse(nrow(comp) > 1,
                         cor(comp[[thermal_col]], comp[[panel_col]], use = "complete.obs"),
                         NA_real_)
    )
  })
}

#--------------------------------------------------
# Helper function: build matched comparison table
#--------------------------------------------------
make_comp_table <- function(thermal_df, panel_df,
                            thermal_col,
                            panel_col,
                            best_lag,
                            temp_max = 70,
                            tz = "America/Denver") {
  
  thermal_df <- thermal_df |>
    mutate(timestamp = fix_timestamp(timestamp, tz = tz))
  
  panel_df <- panel_df |>
    mutate(timestamp = fix_timestamp(timestamp, tz = tz))
  
  thermal_df |>
    mutate(
      ts_og = timestamp,
      timestamp = timestamp + dseconds(best_lag * 3600)
    ) |>
    left_join(panel_df, by = "timestamp") |>
    mutate(
      month = month(timestamp),
      time_of_day = format(timestamp, "%H:%M:%S"),
      hour = hour(timestamp),
      bias = .data[[thermal_col]] - .data[[panel_col]]
    ) |>
    filter(
      !is.na(timestamp),
      !is.na(.data[[thermal_col]]),
      !is.na(.data[[panel_col]]),
      .data[[thermal_col]] < temp_max
    )
}

#--------------------------------------------------
# Helper function: summarize matched data
#--------------------------------------------------
summarize_comp <- function(comp_df, thermal_col, panel_col) {
  comp_df |>
    summarize(
      n = n(),
      mean_bias = mean(bias, na.rm = TRUE),
      sd_bias = sd(bias, na.rm = TRUE),
      mae = mean(abs(bias), na.rm = TRUE),
      rmse = sqrt(mean(bias^2, na.rm = TRUE)),
      cor = cor(.data[[thermal_col]], .data[[panel_col]], use = "complete.obs")
    )
}

#--------------------------------------------------
# Helper function: paired t test
#--------------------------------------------------
run_paired_ttest <- function(comp_df, thermal_col, panel_col) {
  t.test(
    x = comp_df[[thermal_col]],
    y = comp_df[[panel_col]],
    paired = TRUE
  )
}

#--------------------------------------------------
# Helper function: linear model
#--------------------------------------------------
run_agreement_lm <- function(comp_df, thermal_col, panel_col) {
  formula <- as.formula(paste(panel_col, "~", thermal_col))
  lm(formula, data = comp_df)
}

#--------------------------------------------------
# Helper function: cross correlation
#--------------------------------------------------
run_ccf <- function(comp_df, thermal_col, panel_col, lag_max = 8) {
  ccf(
    x = comp_df[[thermal_col]],
    y = comp_df[[panel_col]],
    lag.max = lag_max,
    main = paste("Cross-correlation:", thermal_col, "vs", panel_col)
  )
}

#--------------------------------------------------
# Load thermal + panel data
#--------------------------------------------------
tc1_thermal <- read_csv("/Users/derekkober/Documents/School/UNM/Thesis/code/Python/correcTIR-main/output/tc1_processed.csv") |>
  clean_names() |>
  mutate(timestamp = round_date(fix_timestamp(timestamp), "15 minutes"))

## Rows: 12244 Columns: 260
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr    (2): filepath, filename_short
## dbl  (257): T_air, RH, sky_temp, LW_IN, VF_2, rho_v, tau, J20_mean_uncorrect...
## dttm   (1): Timestamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

tc2_thermal <- read_csv("/Users/derekkober/Documents/School/UNM/Thesis/code/Python/correcTIR-main/output/tc2_processed.csv") |>
  clean_names() |>
  mutate(timestamp = round_date(fix_timestamp(timestamp), "15 minutes"))

## Rows: 11190 Columns: 260
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr    (2): filepath, filename_short
## dbl  (257): T_air, RH, sky_temp, LW_IN, VF_2, rho_v, tau, J22_mean_uncorrect...
## dttm   (1): Timestamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

tc1_panel <- panel_dat |>
  select(timestamp, tc1) |>
  rename(tc1_panel_dat = tc1) |>
  mutate(timestamp = fix_timestamp(timestamp)) |>
  drop_na()

tc2_panel <- panel_dat |>
  select(timestamp, tc2) |>
  rename(tc2_panel_dat = tc2) |>
  mutate(timestamp = fix_timestamp(timestamp)) |>
  drop_na()

# quick check for bad timestamps
sum(is.na(tc1_thermal$timestamp))

## [1] 0

sum(is.na(tc2_thermal$timestamp))

## [1] 0

sum(is.na(tc1_panel$timestamp))

## [1] 0

sum(is.na(tc2_panel$timestamp))

## [1] 0

#--------------------------------------------------
# 1) Lag sweep: TC1
#--------------------------------------------------
tc1_lag_check <- check_time_shift(
  thermal_df  = tc1_thermal,
  panel_df    = tc1_panel,
  thermal_col = "tc1panel_mean_fully_corrected",
  panel_col   = "tc1_panel_dat",
  lag_hours   = seq(-2, 2, by = 0.25),
  temp_max    = 70
)

tc1_best_rmse <- tc1_lag_check |>
  arrange(rmse) |>
  slice(1)

tc1_best_cor <- tc1_lag_check |>
  arrange(desc(cor)) |>
  slice(1)

print(tc1_lag_check)

## # A tibble: 17 × 7
##    lag_hours     n mean_bias sd_bias   mae  rmse   cor
##        <dbl> <int>     <dbl>   <dbl> <dbl> <dbl> <dbl>
##  1     -2    10568    1.12      6.25  4.35  6.35 0.795
##  2     -1.75 10569    0.924     5.93  3.98  6.00 0.814
##  3     -1.5  10570    0.720     5.62  3.60  5.67 0.831
##  4     -1.25 10571    0.522     5.34  3.19  5.37 0.845
##  5     -1    10572    0.328     5.12  2.79  5.13 0.856
##  6     -0.75 10573    0.137     5.01  2.76  5.02 0.859
##  7     -0.5  10574   -0.0503    5.05  3.04  5.05 0.854
##  8     -0.25 10575   -0.244     5.18  3.31  5.19 0.844
##  9      0    10575   -0.433     5.40  3.55  5.42 0.827
## 10      0.25 10575   -0.615     5.72  3.92  5.75 0.802
## 11      0.5  10575   -0.788     6.12  4.43  6.17 0.770
## 12      0.75 10575   -0.961     6.56  4.92  6.63 0.731
## 13      1    10575   -1.13      7.01  5.39  7.10 0.687
## 14      1.25 10575   -1.30      7.47  5.84  7.58 0.637
## 15      1.5  10575   -1.45      7.94  6.28  8.07 0.582
## 16      1.75 10575   -1.58      8.42  6.72  8.57 0.522
## 17      2    10575   -1.69      8.89  7.15  9.05 0.460

print(tc1_best_rmse)

## # A tibble: 1 × 7
##   lag_hours     n mean_bias sd_bias   mae  rmse   cor
##       <dbl> <int>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1     -0.75 10573     0.137    5.01  2.76  5.02 0.859

print(tc1_best_cor)

## # A tibble: 1 × 7
##   lag_hours     n mean_bias sd_bias   mae  rmse   cor
##       <dbl> <int>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1     -0.75 10573     0.137    5.01  2.76  5.02 0.859

#--------------------------------------------------
# 2) Lag sweep: TC2
#--------------------------------------------------
tc2_lag_check <- check_time_shift(
  thermal_df  = tc2_thermal,
  panel_df    = tc2_panel,
  thermal_col = "tc2panel_mean_fully_corrected",
  panel_col   = "tc2_panel_dat",
  lag_hours   = seq(-2, 2, by = 0.25),
  temp_max    = 70
)

tc2_best_rmse <- tc2_lag_check |>
  arrange(rmse) |>
  slice(1)

tc2_best_cor <- tc2_lag_check |>
  arrange(desc(cor)) |>
  slice(1)

print(tc2_lag_check)

## # A tibble: 17 × 7
##    lag_hours     n mean_bias sd_bias   mae  rmse   cor
##        <dbl> <int>     <dbl>   <dbl> <dbl> <dbl> <dbl>
##  1     -2    10916    1.16      7.53  6.02  7.62 0.783
##  2     -1.75 10917    0.935     7.17  5.60  7.23 0.801
##  3     -1.5  10918    0.731     6.83  5.17  6.86 0.818
##  4     -1.25 10919    0.547     6.52  4.76  6.55 0.831
##  5     -1    10920    0.383     6.30  4.41  6.31 0.840
##  6     -0.75 10921    0.234     6.22  4.22  6.22 0.841
##  7     -0.5  10922    0.0991    6.33  4.24  6.33 0.832
##  8     -0.25 10924   -0.0243    6.57  4.42  6.57 0.816
##  9      0    10924   -0.139     6.90  4.75  6.90 0.794
## 10      0.25 10924   -0.245     7.29  5.19  7.29 0.766
## 11      0.5  10924   -0.344     7.72  5.69  7.73 0.734
## 12      0.75 10924   -0.439     8.18  6.22  8.19 0.697
## 13      1    10924   -0.529     8.66  6.76  8.67 0.657
## 14      1.25 10924   -0.612     9.15  7.31  9.17 0.613
## 15      1.5  10924   -0.686     9.64  7.83  9.67 0.566
## 16      1.75 10924   -0.749    10.1   8.33 10.2  0.515
## 17      2    10924   -0.800    10.6   8.82 10.7  0.462

print(tc2_best_rmse)

## # A tibble: 1 × 7
##   lag_hours     n mean_bias sd_bias   mae  rmse   cor
##       <dbl> <int>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1     -0.75 10921     0.234    6.22  4.22  6.22 0.841

print(tc2_best_cor)

## # A tibble: 1 × 7
##   lag_hours     n mean_bias sd_bias   mae  rmse   cor
##       <dbl> <int>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1     -0.75 10921     0.234    6.22  4.22  6.22 0.841

#--------------------------------------------------
# Choose lag to use
#--------------------------------------------------
best_lag_tc1 <- tc1_best_rmse$lag_hours
best_lag_tc2 <- tc2_best_rmse$lag_hours

print(best_lag_tc1)

## [1] -0.75

print(best_lag_tc2)

## [1] -0.75

#--------------------------------------------------
# 3) Build matched comparison tables
#--------------------------------------------------
tc1_comp <- make_comp_table(
  thermal_df  = tc1_thermal,
  panel_df    = tc1_panel,
  thermal_col = "tc1panel_mean_fully_corrected",
  panel_col   = "tc1_panel_dat",
  best_lag    = best_lag_tc1,
  temp_max    = 70
)

tc2_comp <- make_comp_table(
  thermal_df  = tc2_thermal,
  panel_df    = tc2_panel,
  thermal_col = "tc2panel_mean_fully_corrected",
  panel_col   = "tc2_panel_dat",
  best_lag    = best_lag_tc2,
  temp_max    = 70
)

#--------------------------------------------------
# 4) Summary stats
#--------------------------------------------------
tc1_stats <- summarize_comp(tc1_comp, "tc1panel_mean_fully_corrected", "tc1_panel_dat")
tc2_stats <- summarize_comp(tc2_comp, "tc2panel_mean_fully_corrected", "tc2_panel_dat")

print(tc1_stats)

## # A tibble: 1 × 6
##       n mean_bias sd_bias   mae  rmse   cor
##   <int>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 10573     0.137    5.01  2.76  5.02 0.859

print(tc2_stats)

## # A tibble: 1 × 6
##       n mean_bias sd_bias   mae  rmse   cor
##   <int>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 10921     0.234    6.22  4.22  6.22 0.841

#--------------------------------------------------
# 5) Paired t-tests
#--------------------------------------------------
tc1_ttest <- run_paired_ttest(tc1_comp, "tc1panel_mean_fully_corrected", "tc1_panel_dat")
tc2_ttest <- run_paired_ttest(tc2_comp, "tc2panel_mean_fully_corrected", "tc2_panel_dat")

print(tc1_ttest)

## 
##  Paired t-test
## 
## data:  comp_df[[thermal_col]] and comp_df[[panel_col]]
## t = 2.8033, df = 10572, p-value = 0.005067
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.04111353 0.23227771
## sample estimates:
## mean difference 
##       0.1366956

print(tc2_ttest)

## 
##  Paired t-test
## 
## data:  comp_df[[thermal_col]] and comp_df[[panel_col]]
## t = 3.9403, df = 10920, p-value = 8.189e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.1178234 0.3510984
## sample estimates:
## mean difference 
##       0.2344609

#--------------------------------------------------
# 6) Linear models for agreement
#--------------------------------------------------
tc1_lm <- run_agreement_lm(tc1_comp, "tc1panel_mean_fully_corrected", "tc1_panel_dat")
tc2_lm <- run_agreement_lm(tc2_comp, "tc2panel_mean_fully_corrected", "tc2_panel_dat")

summary(tc1_lm)

## 
## Call:
## lm(formula = formula, data = comp_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.8851  -1.1360   0.5607   1.6895  31.2010 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   1.941480   0.128122   15.15   <2e-16 ***
## tc1panel_mean_fully_corrected 0.907888   0.005264  172.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.943 on 10571 degrees of freedom
## Multiple R-squared:  0.7378, Adjusted R-squared:  0.7378 
## F-statistic: 2.975e+04 on 1 and 10571 DF,  p-value: < 2.2e-16

summary(tc2_lm)

## 
## Call:
## lm(formula = formula, data = comp_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.2275  -2.9915  -1.8882   0.0905  25.1612 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   1.256050   0.151319   8.301   <2e-16 ***
## tc2panel_mean_fully_corrected 0.938208   0.005773 162.509   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.186 on 10919 degrees of freedom
## Multiple R-squared:  0.7075, Adjusted R-squared:  0.7075 
## F-statistic: 2.641e+04 on 1 and 10919 DF,  p-value: < 2.2e-16

Reference Panel Comparison and Corrections

Derek Kober