Check the final weather data: show the first rows
setwd('C:\\Users\\Yang\\Desktop\\Research\\Work')
Weather_OK <- read.csv("Weather_OK_Final.csv")
head(Weather_OK)
## X Year Month Day Date Time DateTime Tair_Org RH_Org Precip_Org
## 1 1 2012 8 8 8/8/2012 0:00 1344402000 NA NA NA
## 2 2 2012 8 8 8/8/2012 0:15 1344402900 NA NA NA
## 3 3 2012 8 8 8/8/2012 0:30 1344403800 NA NA NA
## 4 4 2012 8 8 8/8/2012 0:45 1344404700 NA NA NA
## 5 5 2012 8 8 8/8/2012 1:00 1344405600 NA NA NA
## 6 6 2012 8 8 8/8/2012 1:15 1344406500 NA NA NA
## PAR_Org Tair_Ref RH_Ref Precip_Ref PAR_Ref Tair_Fill RH_Fill Precip_Fill
## 1 NA 23.9 87 0 0 1 1 1
## 2 NA 23.3 87 0 0 1 1 1
## 3 NA 23.3 89 0 0 1 1 1
## 4 NA 23.3 90 0 0 1 1 1
## 5 NA 22.8 90 0 0 1 1 1
## 6 NA 23.3 90 0 0 1 1 1
## PAR_Fill Tair_Final RH_Final Precip_Final PAR_Final
## 1 1 23.9 87 0 0
## 2 1 23.3 87 0 0
## 3 1 23.3 89 0 0
## 4 1 23.3 90 0 0
## 5 1 22.8 90 0 0
## 6 1 23.3 90 0 0
**Note: _org data are original weather data, _Ref data are from BROK data, _Final data are final used data (BROK data if original data are missing)**
Make histogram for each variable
hist(Weather_OK$Tair_Org)
hist(Weather_OK$Tair_Ref)
hist(Weather_OK$RH_Org)
hist(Weather_OK$RH_Ref)
hist(Weather_OK$Precip_Org)
hist(Weather_OK$Precip_Ref)
Convert extrme negative data to missing (NA)
Weather_OK$Precip_Org[Weather_OK$Precip_Org<0] <- NA
Weather_OK$Precip_Ref[Weather_OK$Precip_Ref<0] <- NA
Weather_OK$RH_Org[Weather_OK$RH_Org<0] <- NA
Weather_OK$RH_Ref[Weather_OK$RH_Ref<0] <- NA
Fill Ref data to Final data if Org data are missing
Weather_OK$Tair_Fill <- NA
Weather_OK$Tair_Fill <- ifelse(is.na(Weather_OK$Tair_Org), 1, 0)
Weather_OK$RH_Fill <- NA
Weather_OK$RH_Fill <- ifelse(is.na(Weather_OK$RH_Org), 1, 0)
Weather_OK$Precip_Fill <- NA
Weather_OK$Precip_Fill <- ifelse(is.na(Weather_OK$Precip_Org), 1, 0)
Weather_OK$PAR_Fill <- NA
Weather_OK$PAR_Fill <- ifelse(is.na(Weather_OK$PAR_Org), 1, 0)
Weather_OK$Tair_Final <- NA
Weather_OK$Tair_Final <- ifelse(is.na(Weather_OK$Tair_Org), Weather_OK$Tair_Ref, Weather_OK$Tair_Org)
Weather_OK$RH_Final <- NA
Weather_OK$RH_Final <- ifelse(is.na(Weather_OK$RH_Org), Weather_OK$RH_Ref, Weather_OK$RH_Org)
Weather_OK$Precip_Final <- NA
Weather_OK$Precip_Final <- ifelse(is.na(Weather_OK$Precip_Org), Weather_OK$Precip_Ref, Weather_OK$Precip_Org)
Weather_OK$PAR_Final <- NA
Weather_OK$PAR_Final <- ifelse(is.na(Weather_OK$PAR_Org), Weather_OK$PAR_Ref, Weather_OK$PAR_Org)
table(Weather_OK$Tair_Fill)
##
## 0 1
## 84000 31552
table(Weather_OK$Tair_Fill)["1"]/table(Weather_OK$Tair_Fill)["0"]
## 1
## 0.375619
table(Weather_OK$RH_Fill)
##
## 0 1
## 84042 31510
table(Weather_OK$RH_Fill)["1"]/table(Weather_OK$RH_Fill)["0"]
## 1
## 0.3749316
table(Weather_OK$Precip_Fill)
##
## 0 1
## 93636 21916
table(Weather_OK$Precip_Fill)["1"]/table(Weather_OK$Precip_Fill)["0"]
## 1
## 0.2340553
table(Weather_OK$PAR_Fill)
##
## 0 1
## 100726 14826
table(Weather_OK$PAR_Fill)["1"]/table(Weather_OK$PAR_Fill)["0"]
## 1
## 0.1471914
Conclusion: There are 38% data are filled for Tair, 37% for RH, 23% for Precip and 15% data are filled for PAR.
cor.test(Weather_OK$Tair_Org, Weather_OK$Tair_Ref)
##
## Pearson's product-moment correlation
##
## data: Weather_OK$Tair_Org and Weather_OK$Tair_Ref
## t = 555.4, df = 83829, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8852879 0.8881811
## sample estimates:
## cor
## 0.8867432
cor.test(Weather_OK$RH_Org, Weather_OK$RH_Ref)
##
## Pearson's product-moment correlation
##
## data: Weather_OK$RH_Org and Weather_OK$RH_Ref
## t = 287.16, df = 83873, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7006667 0.7074919
## sample estimates:
## cor
## 0.7040955
cor.test(Weather_OK$PAR_Org, Weather_OK$PAR_Ref)
##
## Pearson's product-moment correlation
##
## data: Weather_OK$PAR_Org and Weather_OK$PAR_Ref
## t = 386.82, df = 100650, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7707115 0.7756805
## sample estimates:
## cor
## 0.7732079
cor.test(Weather_OK$Precip_Org, Weather_OK$Precip_Ref)
##
## Pearson's product-moment correlation
##
## data: Weather_OK$Precip_Org and Weather_OK$Precip_Ref
## t = -1.6025, df = 92583, p-value = 0.1091
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.011707428 0.001174971
## sample estimates:
## cor
## -0.005266447
attach(Weather_OK)
PreOrg_Tly <- tapply(Precip_Org, Date, sum, na.rm=T)
PreRef_Tly <- tapply(Precip_Ref, Date, sum, na.rm=T)
cor.test(PreOrg_Tly, PreRef_Tly)
##
## Pearson's product-moment correlation
##
## data: PreOrg_Tly and PreRef_Tly
## t = -0.20663, df = 1202, p-value = 0.8363
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06243433 0.05055268
## sample estimates:
## cor
## -0.00595985
Conclustion: Correlation for Tair is 89%; Correlation for RH is 70%; Correlation for PAR is 77%; Correlation for both hour precipitation and daily precipitation are very low
Tair_Final vs Time
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
library(zoo)
## Warning: package 'zoo' was built under R version 3.2.4
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
Weather_OK$ym2 <- as.yearmon(paste(Weather_OK$Year,Weather_OK$Month, sep="-"))
Tair_gh <- ggplot(Weather_OK,aes(x=factor(Weather_OK$ym2),y=Weather_OK$Tair_Final)) + stat_summary(fun.y=mean,geom="bar", aes(width=0.5)) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
Tair_gh
## Warning: Removed 15 rows containing non-finite values (stat_summary).
Calculate annual Tair for 2013 and 2014
Weather13_DF <- Weather_OK[Weather_OK$Year==2013, ]
Weather14_DF <- Weather_OK[Weather_OK$Year==2014, ]
Tair13 <- mean(Weather13_DF$Tair_Final, na.rm=T)
Tair13
## [1] 15.51702
Tair14 <- mean(Weather14_DF$Tair_Final, na.rm=T)
Tair14
## [1] 14.58796
Precip_Final vs Time
Weather_OK$Precip_Final[Weather_OK$Precip_Final<0] <- NA
Precip_gh <- ggplot(Weather_OK,aes(x=factor(Weather_OK$ym2),y=Weather_OK$Precip_Final)) + stat_summary(fun.y=mean,geom="bar", aes(width=0.5)) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
Precip_gh
## Warning: Removed 3567 rows containing non-finite values (stat_summary).
Calculate average annual precip for 2013 and 2014
Precip13 <- mean(Weather13_DF$Precip_Final, na.rm=T)
Precip13
## [1] 0.04442743
Precip14 <- mean(Weather14_DF$Precip_Final, na.rm=T)
Precip14
## [1] 0.03630001
Calculate VPD and Plot VPD vs Month_Year
Weather_OK$RH_Final[Weather_OK$RH_Final<0] <- NA
Weather_OK$SVP <- NA
Weather_OK$SVP <- 6.11 * exp((2.5e6 / 461) * (1 / 273 - 1 / (273 + Weather_OK$Tair_Final)))
Weather_OK$VPD <- NA
Weather_OK$VPD <- ((100 - Weather_OK$RH_Final) / 100) * Weather_OK$SVP
VPD_gh <- ggplot(Weather_OK,aes(x=factor(Weather_OK$ym2),y=Weather_OK$VPD)) + stat_summary(fun.y=mean,geom="bar", aes(width=0.5)) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
VPD_gh
## Warning: Removed 15 rows containing non-finite values (stat_summary).
Calculate average VPD for 2013 and 2014
Weather13_DF <- Weather_OK[Weather_OK$Year==2013, ]
Weather14_DF <- Weather_OK[Weather_OK$Year==2014, ]
VPD13 <- mean(Weather13_DF$VPD, na.rm=T)
VPD13
## [1] 5.396428
VPD14 <- mean(Weather14_DF$VPD, na.rm=T)
VPD14
## [1] 4.142269
Export the final dataset
write.csv(Weather_OK, "OK_Weather_Filled.csv")