Step 1: Merge and Fill OK’s Weather Data

Check the final weather data: show the first rows

setwd('C:\\Users\\Yang\\Desktop\\Research\\Work')
Weather_OK <- read.csv("Weather_OK_Final.csv")
head(Weather_OK)
##   X Year Month Day     Date Time   DateTime Tair_Org RH_Org Precip_Org
## 1 1 2012     8   8 8/8/2012 0:00 1344402000       NA     NA         NA
## 2 2 2012     8   8 8/8/2012 0:15 1344402900       NA     NA         NA
## 3 3 2012     8   8 8/8/2012 0:30 1344403800       NA     NA         NA
## 4 4 2012     8   8 8/8/2012 0:45 1344404700       NA     NA         NA
## 5 5 2012     8   8 8/8/2012 1:00 1344405600       NA     NA         NA
## 6 6 2012     8   8 8/8/2012 1:15 1344406500       NA     NA         NA
##   PAR_Org Tair_Ref RH_Ref Precip_Ref PAR_Ref Tair_Fill RH_Fill Precip_Fill
## 1      NA     23.9     87          0       0         1       1           1
## 2      NA     23.3     87          0       0         1       1           1
## 3      NA     23.3     89          0       0         1       1           1
## 4      NA     23.3     90          0       0         1       1           1
## 5      NA     22.8     90          0       0         1       1           1
## 6      NA     23.3     90          0       0         1       1           1
##   PAR_Fill Tair_Final RH_Final Precip_Final PAR_Final
## 1        1       23.9       87            0         0
## 2        1       23.3       87            0         0
## 3        1       23.3       89            0         0
## 4        1       23.3       90            0         0
## 5        1       22.8       90            0         0
## 6        1       23.3       90            0         0

**Note: _org data are original weather data, _Ref data are from BROK data, _Final data are final used data (BROK data if original data are missing)**

Step 2: Check the data accuracy

Make histogram for each variable

hist(Weather_OK$Tair_Org)

hist(Weather_OK$Tair_Ref)

hist(Weather_OK$RH_Org)

hist(Weather_OK$RH_Ref)

hist(Weather_OK$Precip_Org)

hist(Weather_OK$Precip_Ref)

Convert extrme negative data to missing (NA)

Weather_OK$Precip_Org[Weather_OK$Precip_Org<0] <- NA
Weather_OK$Precip_Ref[Weather_OK$Precip_Ref<0] <- NA
Weather_OK$RH_Org[Weather_OK$RH_Org<0] <- NA
Weather_OK$RH_Ref[Weather_OK$RH_Ref<0] <- NA

Fill Ref data to Final data if Org data are missing

Weather_OK$Tair_Fill <- NA
Weather_OK$Tair_Fill <- ifelse(is.na(Weather_OK$Tair_Org), 1, 0)

Weather_OK$RH_Fill <- NA
Weather_OK$RH_Fill <- ifelse(is.na(Weather_OK$RH_Org), 1, 0)

Weather_OK$Precip_Fill <- NA
Weather_OK$Precip_Fill <- ifelse(is.na(Weather_OK$Precip_Org), 1, 0)

Weather_OK$PAR_Fill <- NA
Weather_OK$PAR_Fill <- ifelse(is.na(Weather_OK$PAR_Org), 1, 0)


Weather_OK$Tair_Final <- NA
Weather_OK$Tair_Final <- ifelse(is.na(Weather_OK$Tair_Org), Weather_OK$Tair_Ref, Weather_OK$Tair_Org)


Weather_OK$RH_Final <- NA
Weather_OK$RH_Final <- ifelse(is.na(Weather_OK$RH_Org), Weather_OK$RH_Ref, Weather_OK$RH_Org)

Weather_OK$Precip_Final <- NA
Weather_OK$Precip_Final <- ifelse(is.na(Weather_OK$Precip_Org), Weather_OK$Precip_Ref, Weather_OK$Precip_Org)

Weather_OK$PAR_Final <- NA
Weather_OK$PAR_Final <- ifelse(is.na(Weather_OK$PAR_Org), Weather_OK$PAR_Ref, Weather_OK$PAR_Org)

Step 3: Calcualte how many filled data for each variable

table(Weather_OK$Tair_Fill)
## 
##     0     1 
## 84000 31552
table(Weather_OK$Tair_Fill)["1"]/table(Weather_OK$Tair_Fill)["0"]
##        1 
## 0.375619
table(Weather_OK$RH_Fill)
## 
##     0     1 
## 84042 31510
table(Weather_OK$RH_Fill)["1"]/table(Weather_OK$RH_Fill)["0"]
##         1 
## 0.3749316
table(Weather_OK$Precip_Fill)
## 
##     0     1 
## 93636 21916
table(Weather_OK$Precip_Fill)["1"]/table(Weather_OK$Precip_Fill)["0"]
##         1 
## 0.2340553
table(Weather_OK$PAR_Fill)
## 
##      0      1 
## 100726  14826
table(Weather_OK$PAR_Fill)["1"]/table(Weather_OK$PAR_Fill)["0"]
##         1 
## 0.1471914

Conclusion: There are 38% data are filled for Tair, 37% for RH, 23% for Precip and 15% data are filled for PAR.

Step 4: Check the correlation between original data and reference data

cor.test(Weather_OK$Tair_Org, Weather_OK$Tair_Ref)
## 
##  Pearson's product-moment correlation
## 
## data:  Weather_OK$Tair_Org and Weather_OK$Tair_Ref
## t = 555.4, df = 83829, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8852879 0.8881811
## sample estimates:
##       cor 
## 0.8867432
cor.test(Weather_OK$RH_Org, Weather_OK$RH_Ref)
## 
##  Pearson's product-moment correlation
## 
## data:  Weather_OK$RH_Org and Weather_OK$RH_Ref
## t = 287.16, df = 83873, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7006667 0.7074919
## sample estimates:
##       cor 
## 0.7040955
cor.test(Weather_OK$PAR_Org, Weather_OK$PAR_Ref)
## 
##  Pearson's product-moment correlation
## 
## data:  Weather_OK$PAR_Org and Weather_OK$PAR_Ref
## t = 386.82, df = 100650, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7707115 0.7756805
## sample estimates:
##       cor 
## 0.7732079
cor.test(Weather_OK$Precip_Org, Weather_OK$Precip_Ref)
## 
##  Pearson's product-moment correlation
## 
## data:  Weather_OK$Precip_Org and Weather_OK$Precip_Ref
## t = -1.6025, df = 92583, p-value = 0.1091
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.011707428  0.001174971
## sample estimates:
##          cor 
## -0.005266447
attach(Weather_OK)
PreOrg_Tly <- tapply(Precip_Org, Date, sum, na.rm=T)
PreRef_Tly <- tapply(Precip_Ref, Date, sum, na.rm=T)
cor.test(PreOrg_Tly, PreRef_Tly)
## 
##  Pearson's product-moment correlation
## 
## data:  PreOrg_Tly and PreRef_Tly
## t = -0.20663, df = 1202, p-value = 0.8363
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.06243433  0.05055268
## sample estimates:
##         cor 
## -0.00595985

Conclustion: Correlation for Tair is 89%; Correlation for RH is 70%; Correlation for PAR is 77%; Correlation for both hour precipitation and daily precipitation are very low

Step 5: Make plots

Tair_Final vs Time

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
library(zoo)
## Warning: package 'zoo' was built under R version 3.2.4
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
Weather_OK$ym2 <- as.yearmon(paste(Weather_OK$Year,Weather_OK$Month, sep="-"))


Tair_gh <- ggplot(Weather_OK,aes(x=factor(Weather_OK$ym2),y=Weather_OK$Tair_Final)) +  stat_summary(fun.y=mean,geom="bar", aes(width=0.5)) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

Tair_gh
## Warning: Removed 15 rows containing non-finite values (stat_summary).

Calculate annual Tair for 2013 and 2014

Weather13_DF <- Weather_OK[Weather_OK$Year==2013, ]
Weather14_DF <- Weather_OK[Weather_OK$Year==2014, ]
Tair13 <- mean(Weather13_DF$Tair_Final, na.rm=T)
Tair13
## [1] 15.51702
Tair14 <- mean(Weather14_DF$Tair_Final, na.rm=T)
Tair14
## [1] 14.58796

Precip_Final vs Time

Weather_OK$Precip_Final[Weather_OK$Precip_Final<0] <- NA

Precip_gh <- ggplot(Weather_OK,aes(x=factor(Weather_OK$ym2),y=Weather_OK$Precip_Final)) +  stat_summary(fun.y=mean,geom="bar", aes(width=0.5)) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

Precip_gh
## Warning: Removed 3567 rows containing non-finite values (stat_summary).

Calculate average annual precip for 2013 and 2014

Precip13 <- mean(Weather13_DF$Precip_Final, na.rm=T)
Precip13
## [1] 0.04442743
Precip14 <- mean(Weather14_DF$Precip_Final, na.rm=T)
Precip14
## [1] 0.03630001

Calculate VPD and Plot VPD vs Month_Year

Weather_OK$RH_Final[Weather_OK$RH_Final<0] <- NA

Weather_OK$SVP <- NA
Weather_OK$SVP <-  6.11 * exp((2.5e6 / 461) * (1 / 273 - 1 / (273 + Weather_OK$Tair_Final)))

Weather_OK$VPD <- NA
Weather_OK$VPD <- ((100 - Weather_OK$RH_Final) / 100) * Weather_OK$SVP


VPD_gh <- ggplot(Weather_OK,aes(x=factor(Weather_OK$ym2),y=Weather_OK$VPD)) +  stat_summary(fun.y=mean,geom="bar", aes(width=0.5)) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

VPD_gh
## Warning: Removed 15 rows containing non-finite values (stat_summary).

Calculate average VPD for 2013 and 2014

Weather13_DF <- Weather_OK[Weather_OK$Year==2013, ]
Weather14_DF <- Weather_OK[Weather_OK$Year==2014, ]
VPD13 <- mean(Weather13_DF$VPD, na.rm=T)
VPD13
## [1] 5.396428
VPD14 <- mean(Weather14_DF$VPD, na.rm=T)
VPD14
## [1] 4.142269

Export the final dataset

write.csv(Weather_OK, "OK_Weather_Filled.csv")