library(gstat)
library(sp)
library(rgdal)
## rgdal: version: 0.8-16, (SVN revision 498)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.11.0, released 2014/04/16
## Path to GDAL shared files: C:/Users/Matthias/R/rgdal/gdal
## GDAL does not use iconv for recoding strings.
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: C:/Users/Matthias/R/rgdal/proj
library(spacetime)
library(lattice)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## Die folgenden Objekte sind maskiert from 'package:base':
##
## as.Date, as.Date.numeric
library(dismo)
## Loading required package: raster
wd = "C:/Users/Matthias/Documents/NORWEGEN/pred_game"
#dir.create(wd)
setwd(wd)
sp.theme(TRUE)
#####################
# 1 Data Processing
#####################
# process input data
##########################
#library(RCurl)
#curl <- getCurlHandle()
#options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE))
#curlSetOpt(.opts = list(proxy = 'proxyserver:port'), curl = curl)
#cat(getURL("https://docs.google.com/spreadsheets/d/13XvBnAjLkpcj_1gmiPkhNUqfHdNxkJqXjJhDy8yRrT4/export?gid=0&format=csv"), file="data.csv")
input <- read.csv("data.csv")
coordinates(input) <- ~ longitude + latitude
proj4string(input) <- CRS("+proj=utm +zone=11 +datum=NAD83 +units=m +no_defs")
input$Date = as.Date(input$Date,"%d-%m-%Y")
input=input[order(input$Date),] ## order by date
str(input)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 1950 obs. of 7 variables:
## .. ..$ Date : Date[1:1950], format: "2012-01-01" ...
## .. ..$ Port1VW : num [1:1950] 0.314 0.202 0.325 0.301 0.254 0.275 0.221 0.297 0.275 0.2 ...
## .. ..$ Port2VW : num [1:1950] 0.284 0.322 0.343 0.392 0.291 0.3 0.319 0.216 0.351 0.322 ...
## .. ..$ Port3VW : num [1:1950] 0.233 0.363 0.253 0.4 0.345 0.283 0.288 0.232 0.189 0.361 ...
## .. ..$ Port4VW : num [1:1950] 0.316 0.384 0.398 0.27 0.333 0.313 0.24 0.232 0.242 0.382 ...
## .. ..$ Port5VW : num [1:1950] NA NA 0.399 0.264 0.333 0.296 0.277 0.277 0.355 NA ...
## .. ..$ TAXSUSDA: Factor w/ 5 levels "Latah","Naff",..: 1 5 3 2 3 3 3 2 2 5 ...
## ..@ coords.nrs : int [1:2] 8 9
## ..@ coords : num [1:1950, 1:2] 493277 494117 493544 493672 493798 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : NULL
## .. .. ..$ : chr [1:2] "longitude" "latitude"
## ..@ bbox : num [1:2, 1:2] 493247 5180586 494117 5181087
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:2] "longitude" "latitude"
## .. .. ..$ : chr [1:2] "min" "max"
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slots
## .. .. ..@ projargs: chr "+proj=utm +zone=11 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
outKML = input
outKML=spTransform(outKML, CRSobj = CRS("+proj=longlat +datum=WGS84"))
# plot observations in google maps
plot(gmap("USA"))

points(Mercator(outKML),pch="+",cex=2)

plot(gmap(outKML,type="satellite"))

points(Mercator(outKML),pch="+",cex=2)

writeOGR(obj=outKML, dsn="points.kml", "points","KML", overwrite_layer = TRUE)
#unlink("points.kml")
rm("outKML")
# process validation data
##########################
validation <- read.csv("validation.csv")
coordinates(validation) <- ~ longitude + latitude
proj4string(validation) <- CRS("+proj=utm +zone=11 +datum=NAD83 +units=m +no_defs")
validation$Date = as.Date(validation$Date,"%d-%m-%Y")
str(validation)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 9217 obs. of 3 variables:
## .. ..$ Date : Date[1:9217], format: "2012-01-12" ...
## .. ..$ altitude: num [1:9217] -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 ...
## .. ..$ TAXSUSDA: Factor w/ 5 levels "Latah","Naff",..: 2 2 2 2 2 2 2 2 2 2 ...
## ..@ coords.nrs : int [1:2] 2 3
## ..@ coords : num [1:9217, 1:2] 493383 493383 493383 493383 493383 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : NULL
## .. .. ..$ : chr [1:2] "longitude" "latitude"
## ..@ bbox : num [1:2, 1:2] 493247 5180586 494117 5181087
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:2] "longitude" "latitude"
## .. .. ..$ : chr [1:2] "min" "max"
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slots
## .. .. ..@ projargs: chr "+proj=utm +zone=11 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
outKML = validation
outKML=spTransform(outKML, CRSobj = CRS("+proj=longlat +datum=WGS84"))
writeOGR(obj=outKML, dsn="validation.kml", "validation","KML",overwrite_layer = TRUE)
rm("outKML")
# process covariates
##########################
con <- url("http://geostat-course.org/system/files/cookfarm_covs.rda")
load(con)
str(cookfarm_covs)
## List of 4
## $ covs2D :'data.frame': 5858 obs. of 7 variables:
## ..$ MUSYM : Factor w/ 7 levels "Thatuna silt loam, 7 to 25 percent slopes",..: 1 1 1 6 6 6 6 6 6 7 ...
## ..$ longitude: num [1:5858] 493184 493194 493204 493214 493224 ...
## ..$ latitude : num [1:5858] 5181127 5181127 5181127 5181127 5181127 ...
## ..$ DEM : num [1:5858] 790 790 791 791 791 ...
## ..$ TWI : num [1:5858] 2.49 2.46 2.44 2.41 2.43 ...
## ..$ X2012 : Factor w/ 4 levels "SB","SL","SW",..: NA NA NA NA NA NA NA NA NA NA ...
## ..$ X2013 : Factor w/ 4 levels "SB","SL","SW",..: NA NA NA NA NA NA NA NA NA NA ...
## $ covs3D :List of 5
## ..$ :'data.frame': 5858 obs. of 4 variables:
## .. ..$ Bt : num [1:5858] 0 0 0 0 0 0 0 0 0 0 ...
## .. ..$ longitude: num [1:5858] 493184 493194 493204 493214 493224 ...
## .. ..$ latitude : num [1:5858] 5181127 5181127 5181127 5181127 5181127 ...
## .. ..$ altitude : num [1:5858] -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 ...
## ..$ :'data.frame': 5858 obs. of 4 variables:
## .. ..$ Bt : num [1:5858] 0 0 0 0 0 0 0 0 0 0 ...
## .. ..$ longitude: num [1:5858] 493184 493194 493204 493214 493224 ...
## .. ..$ latitude : num [1:5858] 5181127 5181127 5181127 5181127 5181127 ...
## .. ..$ altitude : num [1:5858] -0.6 -0.6 -0.6 -0.6 -0.6 -0.6 -0.6 -0.6 -0.6 -0.6 ...
## ..$ :'data.frame': 5858 obs. of 4 variables:
## .. ..$ Bt : num [1:5858] 0.00997 0.00981 0.01057 0 0 ...
## .. ..$ longitude: num [1:5858] 493184 493194 493204 493214 493224 ...
## .. ..$ latitude : num [1:5858] 5181127 5181127 5181127 5181127 5181127 ...
## .. ..$ altitude : num [1:5858] -0.9 -0.9 -0.9 -0.9 -0.9 -0.9 -0.9 -0.9 -0.9 -0.9 ...
## ..$ :'data.frame': 5858 obs. of 4 variables:
## .. ..$ Bt : num [1:5858] 0.1548 0.1545 0.1558 0.0519 0.0536 ...
## .. ..$ longitude: num [1:5858] 493184 493194 493204 493214 493224 ...
## .. ..$ latitude : num [1:5858] 5181127 5181127 5181127 5181127 5181127 ...
## .. ..$ altitude : num [1:5858] -1.2 -1.2 -1.2 -1.2 -1.2 -1.2 -1.2 -1.2 -1.2 -1.2 ...
## ..$ :'data.frame': 5858 obs. of 4 variables:
## .. ..$ Bt : num [1:5858] 0.267 0.267 0.268 0.134 0.137 ...
## .. ..$ longitude: num [1:5858] 493184 493194 493204 493214 493224 ...
## .. ..$ latitude : num [1:5858] 5181127 5181127 5181127 5181127 5181127 ...
## .. ..$ altitude : num [1:5858] -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 ...
## $ meteo :'data.frame': 2400 obs. of 4 variables:
## ..$ Date : Date[1:2400], format: "2007-04-20" ...
## ..$ Precip_wrcc: num [1:2400] 0 0 1 3.3 0 0 0 0 0 0 ...
## ..$ MaxT_wrcc : num [1:2400] 11.7 13.9 14.4 12.8 16.7 18.3 13.9 16.7 21.1 18.3 ...
## ..$ MinT_wrcc : num [1:2400] -1.1 3.3 5.6 6.7 3.3 5.6 0.6 5 9.4 5.6 ...
## $ proj4string: chr "+proj=utm +zone=11 +datum=NAD83 +units=m +no_defs"
rm("con")
# create grid for spatial predictions
#####################################
sp.grid = makegrid(input)
coordinates(sp.grid) <- ~x1 + x2
gridded(sp.grid) = TRUE
proj4string(sp.grid) <- CRS("+proj=utm +zone=11 +datum=NAD83 +units=m +no_defs")
## group locations
######################
## how many locations can be distinguished?
loc=as.factor(paste0(coordinates(input)[,1], coordinates(input)[,1]))
str(loc)
## Factor w/ 40 levels "493246.6493246.6",..: 2 40 8 16 23 9 33 3 20 40 ...
levels(loc) = paste0(rep("L",40), 1:40)
levels(loc)
## [1] "L1" "L2" "L3" "L4" "L5" "L6" "L7" "L8" "L9" "L10" "L11"
## [12] "L12" "L13" "L14" "L15" "L16" "L17" "L18" "L19" "L20" "L21" "L22"
## [23] "L23" "L24" "L25" "L26" "L27" "L28" "L29" "L30" "L31" "L32" "L33"
## [34] "L34" "L35" "L36" "L37" "L38" "L39" "L40"
input$loc=loc
rm("loc")
## reorganize data according to depth values
###########################################
dnames = c("Port1VW","Port2VW", "Port3VW", "Port4VW", "Port5VW")
depths = c(-0.3, -0.6, -0.9, -1.2, -1.5)
bigInput = data.frame();
for(i in 1:dim(input)[1]){
for(j in 1:length(depths) ){
row=as.data.frame(input[i,-2:-6])
row$altitude=depths[j]
row$port=as.factor(dnames[j])
levels(row$port) = dnames
row$water=input@data[i,dnames[j]]
bigInput=rbind(bigInput, row)
}
}
bigInput$cdays = as.numeric(input$Date)
# t-scale is 1.005205 --> temporal variable does not need to be scaled
tscale= (input@bbox[1,"max"]-input@bbox[1,"min"] + input@bbox[2,"max"] - input@bbox[2,"min"])/(2*(max(bigInput$cdays)-min(bigInput$cdays)));tscale
## [1] 1.005
names(bigInput)[1:2] <- c("longitude", "latitude")
coordinates(bigInput) <- c("longitude", "latitude", "cdays")
proj4string(bigInput) <- CRS(proj4string(input))
save.image("prepared.data.Rdata")
#########################
# 2 Data Exploration
#######################
## Simple interpolation over space for each depth
idw1 = idw(Port1VW~1, input,sp.grid)
## [inverse distance weighted interpolation]
idw2 = idw(Port2VW~1, input,sp.grid)
## [inverse distance weighted interpolation]
idw3 = idw(Port3VW~1, input[!is.na(input$Port3VW) ,] ,sp.grid)
## [inverse distance weighted interpolation]
idw4 = idw(Port4VW~1, input[!is.na(input$Port4VW) ,] ,sp.grid)
## [inverse distance weighted interpolation]
idw5 = idw(Port5VW~1, input[!is.na(input$Port5VW) ,] ,sp.grid)
## [inverse distance weighted interpolation]
idw.pred=data.frame(Port1.pred = idw1$var1.pred, Port2.pred = idw2$var1.pred,Port3.pred = idw3$var1.pred,Port4.pred = idw4$var1.pred,Port5.pred = idw5$var1.pred)
sp.grid=as(sp.grid, "SpatialPixelsDataFrame")
sp.grid@data = idw.pred
spplot(sp.grid[rev(1:5)],sp.layout= list(list("sp.points", input, col="black")))#,list("sp.text", coordinates(input), input$TAXSUSDA, cex=0.5)))

# plot space and time
cloud(cdays ~ longitude * latitude, as.data.frame(coordinates(bigInput)), col="grey")

# plot some observations at specific location over time and altitude
##############
st = "L12"
plot(Port1VW~Date, input[input$loc==st,], type="l",ylim = c(0, 0.5), main=st)
lines(Port2VW~Date, input[input$loc==st,], type="l", col="red")
lines(Port3VW~Date, input[input$loc==st,], type="l", col="blue")
lines(Port4VW~Date, input[input$loc==st,], type="l", col="green")
lines(Port5VW~Date, input[input$loc==st,], type="l", col="orange")

st = "L20"
plot(Port1VW~Date, input[input$loc==st,], type="l",ylim = c(0, 0.5), main=st)
lines(Port2VW~Date, input[input$loc==st,], type="l", col="red")
lines(Port3VW~Date, input[input$loc==st,], type="l", col="blue")
lines(Port4VW~Date, input[input$loc==st,], type="l", col="green")
lines(Port5VW~Date, input[input$loc==st,], type="l", col="orange")

st = "L40"
plot(Port1VW~Date, input[input$loc==st,], type="l",ylim = c(0, 0.5), main=st)
lines(Port2VW~Date, input[input$loc==st,], type="l", col="red")
lines(Port3VW~Date, input[input$loc==st,], type="l", col="blue")
lines(Port4VW~Date, input[input$loc==st,], type="l", col="green")
lines(Port5VW~Date, input[input$loc==st,], type="l", col="orange")

## Boxplot over altitude levels
#############
boxplot(input@data[,c("Port1VW","Port2VW", "Port3VW", "Port4VW", "Port5VW")])

#############################
### 3 Modelling
###############
## 3.1 Fit Seasonal Model
# Create timeseries from daily mean measurements (for all locations and depths)
tvect=as.Date("2012-01-01"):as.Date("2013-11-13")
class(tvect)="Date"
water.mean = c()
date = c()
for(timestamp in tvect){
sel=bigInput$Date == timestamp
if(sum(sel) > 0){
water.day = bigInput[sel,]
water.mean = c(water.mean, mean(water.day$water, na.rm = TRUE))
date = c(date, timestamp)
}
}
class(date)="Date"
ts.input = data.frame(Date=date, water = water.mean)
# fit sinusordidal function as seasonal model
# (stl and loess do not work with the data because it's less than two periods)
theta = min(bigInput$cdays)
seas.mod = function(a,b, c,d, input, theta.=theta){
values=a*cos((b*(theta-as.numeric(input$Date))-c) *pi/180)+d
estimates = data.frame(values)
if(!is.null(input$water)){
estimates$res = input$water - values #compute residuals
}
estimates
}
# wrapper function arround seasonal model to compute mse for optimization
fopt = function(x, ts.input.=ts.input, theta.=theta, seas.mod.= seas.mod){
estim=seas.mod(x[1],x[2],x[3],x[4],ts.input, theta)
mse = sum( (estim$res)^2 )/length(estim$values) # mean square error
mse
}
# compute optimal parameters by minimizing MSE (start parameters determined by trial and error)
out = optim(c(-0.03950991, -1.07975811, -77.13643554,0.3), fopt, control=list(maxit=5000));out
## $par
## [1] -0.06469 -1.05754 -71.64585 0.30438
##
## $value
## [1] 0.001136
##
## $counts
## function gradient
## 425 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
estim= seas.mod( out$par[1],out$par[2],out$par[3],out$par[4], ts.input, theta)
plot(ts.input)
lines(ts.input$Date,estim$values, col="red")

#####
# compute estimates and residuals from seasonal models for all data
###
estim = seas.mod( out$par[1],out$par[2],out$par[3],out$par[4], input=bigInput, theta)
plot(water~Date, bigInput)
lines(bigInput$Date,estim$values, col="red")

bigInput$res1 = estim$res
## 3.2 Fit Regression Model
#############################
theta = min(bigInput$cdays)
lm.game = lm(res1 ~ altitude+longitude+latitude + TAXSUSDA, data = bigInput);
summary(lm.game) #all predictors are significant!!!
##
## Call:
## lm(formula = res1 ~ altitude + longitude + latitude + TAXSUSDA,
## data = bigInput)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24266 -0.04292 -0.00158 0.04034 0.29428
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.56e+02 2.92e+01 -8.77 < 2e-16 ***
## altitude -6.76e-02 1.62e-03 -41.62 < 2e-16 ***
## longitude -7.57e-06 3.63e-06 -2.09 0.037 *
## latitude 5.01e-05 5.69e-06 8.81 < 2e-16 ***
## TAXSUSDANaff 3.38e-02 4.17e-03 8.09 6.9e-16 ***
## TAXSUSDAPalouse -7.65e-03 4.11e-03 -1.86 0.063 .
## TAXSUSDAStaley -4.41e-02 5.49e-03 -8.03 1.1e-15 ***
## TAXSUSDAThatuna 2.02e-02 4.07e-03 4.97 6.9e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.066 on 9219 degrees of freedom
## (523 observations deleted due to missingness)
## Multiple R-squared: 0.205, Adjusted R-squared: 0.204
## F-statistic: 339 on 7 and 9219 DF, p-value: <2e-16
p=predict(lm.game, data.frame(bigInput))
plot(res1~Date,bigInput[1000:1500,])
points(bigInput$Date, p, col="red")

##########################################
# 4. Prediction
#######################################
lm.pred=predict(lm.game, data.frame(validation))
estimates = lm.pred + seas.mod( out$par[1],out$par[2],out$par[3],out$par[4], input=validation, theta)$values
### compare with results
preds=read.csv("Predicted_VW.csv")
preds$estimates=estimates
#plot(results$Measured)
plot(preds$Measured[1:500], type="l", lwd=2)
#lines(results$Matthias, col="red")
lines(preds$estimates,col="red")

pred.ts=as.xts(preds,order.by = as.Date(preds$Date))
plot(pred.ts$Measured)
#lines(results$Matthias, col="red")
lines(pred.ts$estimates,col="red")

lapply(preds[,5:10], function(x){sqrt(mean((preds$Measured - x)^2, na.rm = TRUE))})
## $Measured
## [1] 0
##
## $RH
## [1] 0.1172
##
## $Hanna
## [1] 0.02346
##
## $eBaldo
## [1] 0.07938
##
## $Matthias
## [1] 0.06821
##
## $estimates
## [1] 0.06526