library(readxl) # do wczytywania xls/xlsx
library(dplyr) # wiadomo :)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# na poczatek ustawienie katalogu roboczego dla ERA-Interim:
setwd("~/Dokumenty/publikacje_własne/2017/pol_polar_res/dane_stacje/era_stacje/")
pliki <- dir(recursive = T) # pobieram liste plikow do wektora

era <- NULL # tworze pusty obiekt na zaczytane wyniki

# petla wczytujaca pliki
for (i in 1:length(pliki)){
    tmp <- read_xlsx(pliki[i], col_names = F)
    colnames(tmp) <- c("data","stacja","t2m","dpt","ws","u","v","press","slp")
    head(tmp)
    era <- rbind(era,tmp)
    print(i/length(pliki)) # zeby monitorowac progres
}
## [1] 0.05
## [1] 0.1
## [1] 0.15
## [1] 0.2
## [1] 0.25
## [1] 0.3
## [1] 0.35
## [1] 0.4
## [1] 0.45
## [1] 0.5
## [1] 0.55
## [1] 0.6
## [1] 0.65
## [1] 0.7
## [1] 0.75
## [1] 0.8
## [1] 0.85
## [1] 0.9
## [1] 0.95
## [1] 1
# dodatkowa konwersja jednostek
era$t2m <- era$t2m-273.15
era$dpt <- era$dpt-273.15
era$press <- era$press/100
era$slp <- era$slp/100
tail(era) # podglad czy wszystko ok
## # A tibble: 6 x 9
##                  data           stacja    t2m   dpt    ws     u      v
##                <dttm>            <chr>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 2009-01-31 18:00:00 Svalbard_Airport -3.478 -4.06  2.03 1.573 -1.277
## 2 2009-01-31 19:00:00 Svalbard_Airport     NA    NA    NA    NA     NA
## 3 2009-01-31 20:00:00 Svalbard_Airport     NA    NA    NA    NA     NA
## 4 2009-01-31 21:00:00 Svalbard_Airport     NA    NA    NA    NA     NA
## 5 2009-01-31 22:00:00 Svalbard_Airport     NA    NA    NA    NA     NA
## 6 2009-01-31 23:00:00 Svalbard_Airport     NA    NA    NA    NA     NA
## # ... with 2 more variables: press <dbl>, slp <dbl>
# koniec ery:
########################

# obserwacje
# praktycznie tak samo jak wyzej...
setwd("~/Dokumenty/publikacje_własne/2017/pol_polar_res/dane_stacje/obserwacje_stacje/")
pliki <- dir(recursive = T, pattern="xlsx")
stacje <- NULL

for (i in 1:length(pliki)){
  tmp <- read_xlsx(pliki[i], col_names = T)
  tmp <- dplyr::select(tmp, date_time, station, temp, dewpt, wind_spd, U, V, slp)
  colnames(tmp) <- c("data","stacja","t2m","dpt","ws","u","v","slp")
  head(tmp)
  
  stacje <- rbind(stacje,tmp)
  print(i/length(pliki))
}
## [1] 0.05
## [1] 0.1
## [1] 0.15
## [1] 0.2
## [1] 0.25
## [1] 0.3
## [1] 0.35
## [1] 0.4
## [1] 0.45
## [1] 0.5
## [1] 0.55
## [1] 0.6
## [1] 0.65
## [1] 0.7
## [1] 0.75
## [1] 0.8
## [1] 0.85
## [1] 0.9
## [1] 0.95
## [1] 1
rm(tmp)
tail(stacje)
## # A tibble: 6 x 8
##                  data           stacja   t2m   dpt    ws          u
##                <dttm>            <chr> <dbl> <dbl> <dbl>      <dbl>
## 1 2009-01-31 18:00:00 Svalbard_Airport  -3.5  -3.6     0  0.0000000
## 2 2009-01-31 19:00:00 Svalbard_Airport  -3.3  -3.5     0  0.0000000
## 3 2009-01-31 20:00:00 Svalbard_Airport  -3.9  -4.1     1 -0.3746066
## 4 2009-01-31 21:00:00 Svalbard_Airport  -4.4  -4.6     2 -1.4142136
## 5 2009-01-31 22:00:00 Svalbard_Airport  -4.6  -5.0     1 -0.7071068
## 6 2009-01-31 23:00:00 Svalbard_Airport  -5.1  -5.6     4 -3.7087354
## # ... with 2 more variables: v <dbl>, slp <dbl>
# koniec wczytywania obserwacji
################


# rozpoczecie wczytu danych dla polarwrfa:
setwd("~/Dokumenty/publikacje_własne/2017/pol_polar_res/dane_stacje/PolarWRF/")
pliki <- dir(recursive = T)
wrf <- NULL

for (i in 1:length(pliki)){
  tmp <- read.csv(pliki[i], header=F)
  colnames(tmp) <- c("stacja","lon","lat","x","y","data","hgt","lu","t2m","u","v","dpt","slp","RAINC","RAINNC")
  tmp$ws <- sqrt(tmp$u^2+tmp$v^2)
  tmp$slp <- tmp$slp*exp(9.81*tmp$hgt/(287.04*(tmp$t2m+273.15)))/100
  tmp$t2m <- tmp$t2m-273.15
  tmp <- dplyr::select(tmp, data, stacja, t2m, dpt, ws, u, v, slp)
  tmp$id <- substr(x=pliki[i],start = nchar(pliki[i])-5,nchar(pliki[i])-4) # w ten sposob sobie generuje kolumne z ID z nazwy pliku
  
  wrf <- rbind(wrf,tmp)
  print(i/length(pliki))
}
## [1] 0.002941176
## [1] 0.005882353
## [1] 0.008823529
## [1] 0.01176471
## [1] 0.01470588
## [1] 0.01764706
## [1] 0.02058824
## [1] 0.02352941
## [1] 0.02647059
## [1] 0.02941176
## [1] 0.03235294
## [1] 0.03529412
## [1] 0.03823529
## [1] 0.04117647
## [1] 0.04411765
## [1] 0.04705882
## [1] 0.05
## [1] 0.05294118
## [1] 0.05588235
## [1] 0.05882353
## [1] 0.06176471
## [1] 0.06470588
## [1] 0.06764706
## [1] 0.07058824
## [1] 0.07352941
## [1] 0.07647059
## [1] 0.07941176
## [1] 0.08235294
## [1] 0.08529412
## [1] 0.08823529
## [1] 0.09117647
## [1] 0.09411765
## [1] 0.09705882
## [1] 0.1
## [1] 0.1029412
## [1] 0.1058824
## [1] 0.1088235
## [1] 0.1117647
## [1] 0.1147059
## [1] 0.1176471
## [1] 0.1205882
## [1] 0.1235294
## [1] 0.1264706
## [1] 0.1294118
## [1] 0.1323529
## [1] 0.1352941
## [1] 0.1382353
## [1] 0.1411765
## [1] 0.1441176
## [1] 0.1470588
## [1] 0.15
## [1] 0.1529412
## [1] 0.1558824
## [1] 0.1588235
## [1] 0.1617647
## [1] 0.1647059
## [1] 0.1676471
## [1] 0.1705882
## [1] 0.1735294
## [1] 0.1764706
## [1] 0.1794118
## [1] 0.1823529
## [1] 0.1852941
## [1] 0.1882353
## [1] 0.1911765
## [1] 0.1941176
## [1] 0.1970588
## [1] 0.2
## [1] 0.2029412
## [1] 0.2058824
## [1] 0.2088235
## [1] 0.2117647
## [1] 0.2147059
## [1] 0.2176471
## [1] 0.2205882
## [1] 0.2235294
## [1] 0.2264706
## [1] 0.2294118
## [1] 0.2323529
## [1] 0.2352941
## [1] 0.2382353
## [1] 0.2411765
## [1] 0.2441176
## [1] 0.2470588
## [1] 0.25
## [1] 0.2529412
## [1] 0.2558824
## [1] 0.2588235
## [1] 0.2617647
## [1] 0.2647059
## [1] 0.2676471
## [1] 0.2705882
## [1] 0.2735294
## [1] 0.2764706
## [1] 0.2794118
## [1] 0.2823529
## [1] 0.2852941
## [1] 0.2882353
## [1] 0.2911765
## [1] 0.2941176
## [1] 0.2970588
## [1] 0.3
## [1] 0.3029412
## [1] 0.3058824
## [1] 0.3088235
## [1] 0.3117647
## [1] 0.3147059
## [1] 0.3176471
## [1] 0.3205882
## [1] 0.3235294
## [1] 0.3264706
## [1] 0.3294118
## [1] 0.3323529
## [1] 0.3352941
## [1] 0.3382353
## [1] 0.3411765
## [1] 0.3441176
## [1] 0.3470588
## [1] 0.35
## [1] 0.3529412
## [1] 0.3558824
## [1] 0.3588235
## [1] 0.3617647
## [1] 0.3647059
## [1] 0.3676471
## [1] 0.3705882
## [1] 0.3735294
## [1] 0.3764706
## [1] 0.3794118
## [1] 0.3823529
## [1] 0.3852941
## [1] 0.3882353
## [1] 0.3911765
## [1] 0.3941176
## [1] 0.3970588
## [1] 0.4
## [1] 0.4029412
## [1] 0.4058824
## [1] 0.4088235
## [1] 0.4117647
## [1] 0.4147059
## [1] 0.4176471
## [1] 0.4205882
## [1] 0.4235294
## [1] 0.4264706
## [1] 0.4294118
## [1] 0.4323529
## [1] 0.4352941
## [1] 0.4382353
## [1] 0.4411765
## [1] 0.4441176
## [1] 0.4470588
## [1] 0.45
## [1] 0.4529412
## [1] 0.4558824
## [1] 0.4588235
## [1] 0.4617647
## [1] 0.4647059
## [1] 0.4676471
## [1] 0.4705882
## [1] 0.4735294
## [1] 0.4764706
## [1] 0.4794118
## [1] 0.4823529
## [1] 0.4852941
## [1] 0.4882353
## [1] 0.4911765
## [1] 0.4941176
## [1] 0.4970588
## [1] 0.5
## [1] 0.5029412
## [1] 0.5058824
## [1] 0.5088235
## [1] 0.5117647
## [1] 0.5147059
## [1] 0.5176471
## [1] 0.5205882
## [1] 0.5235294
## [1] 0.5264706
## [1] 0.5294118
## [1] 0.5323529
## [1] 0.5352941
## [1] 0.5382353
## [1] 0.5411765
## [1] 0.5441176
## [1] 0.5470588
## [1] 0.55
## [1] 0.5529412
## [1] 0.5558824
## [1] 0.5588235
## [1] 0.5617647
## [1] 0.5647059
## [1] 0.5676471
## [1] 0.5705882
## [1] 0.5735294
## [1] 0.5764706
## [1] 0.5794118
## [1] 0.5823529
## [1] 0.5852941
## [1] 0.5882353
## [1] 0.5911765
## [1] 0.5941176
## [1] 0.5970588
## [1] 0.6
## [1] 0.6029412
## [1] 0.6058824
## [1] 0.6088235
## [1] 0.6117647
## [1] 0.6147059
## [1] 0.6176471
## [1] 0.6205882
## [1] 0.6235294
## [1] 0.6264706
## [1] 0.6294118
## [1] 0.6323529
## [1] 0.6352941
## [1] 0.6382353
## [1] 0.6411765
## [1] 0.6441176
## [1] 0.6470588
## [1] 0.65
## [1] 0.6529412
## [1] 0.6558824
## [1] 0.6588235
## [1] 0.6617647
## [1] 0.6647059
## [1] 0.6676471
## [1] 0.6705882
## [1] 0.6735294
## [1] 0.6764706
## [1] 0.6794118
## [1] 0.6823529
## [1] 0.6852941
## [1] 0.6882353
## [1] 0.6911765
## [1] 0.6941176
## [1] 0.6970588
## [1] 0.7
## [1] 0.7029412
## [1] 0.7058824
## [1] 0.7088235
## [1] 0.7117647
## [1] 0.7147059
## [1] 0.7176471
## [1] 0.7205882
## [1] 0.7235294
## [1] 0.7264706
## [1] 0.7294118
## [1] 0.7323529
## [1] 0.7352941
## [1] 0.7382353
## [1] 0.7411765
## [1] 0.7441176
## [1] 0.7470588
## [1] 0.75
## [1] 0.7529412
## [1] 0.7558824
## [1] 0.7588235
## [1] 0.7617647
## [1] 0.7647059
## [1] 0.7676471
## [1] 0.7705882
## [1] 0.7735294
## [1] 0.7764706
## [1] 0.7794118
## [1] 0.7823529
## [1] 0.7852941
## [1] 0.7882353
## [1] 0.7911765
## [1] 0.7941176
## [1] 0.7970588
## [1] 0.8
## [1] 0.8029412
## [1] 0.8058824
## [1] 0.8088235
## [1] 0.8117647
## [1] 0.8147059
## [1] 0.8176471
## [1] 0.8205882
## [1] 0.8235294
## [1] 0.8264706
## [1] 0.8294118
## [1] 0.8323529
## [1] 0.8352941
## [1] 0.8382353
## [1] 0.8411765
## [1] 0.8441176
## [1] 0.8470588
## [1] 0.85
## [1] 0.8529412
## [1] 0.8558824
## [1] 0.8588235
## [1] 0.8617647
## [1] 0.8647059
## [1] 0.8676471
## [1] 0.8705882
## [1] 0.8735294
## [1] 0.8764706
## [1] 0.8794118
## [1] 0.8823529
## [1] 0.8852941
## [1] 0.8882353
## [1] 0.8911765
## [1] 0.8941176
## [1] 0.8970588
## [1] 0.9
## [1] 0.9029412
## [1] 0.9058824
## [1] 0.9088235
## [1] 0.9117647
## [1] 0.9147059
## [1] 0.9176471
## [1] 0.9205882
## [1] 0.9235294
## [1] 0.9264706
## [1] 0.9294118
## [1] 0.9323529
## [1] 0.9352941
## [1] 0.9382353
## [1] 0.9411765
## [1] 0.9441176
## [1] 0.9470588
## [1] 0.95
## [1] 0.9529412
## [1] 0.9558824
## [1] 0.9588235
## [1] 0.9617647
## [1] 0.9647059
## [1] 0.9676471
## [1] 0.9705882
## [1] 0.9735294
## [1] 0.9764706
## [1] 0.9794118
## [1] 0.9823529
## [1] 0.9852941
## [1] 0.9882353
## [1] 0.9911765
## [1] 0.9941176
## [1] 0.9970588
## [1] 1
rm(list = c("tmp","i","pliki")) # usuniecie smieci po petli
wrf$data <- as.POSIXct(wrf$data, tz="UTC")
wrf$stacja <- as.character(wrf$stacja)
tail(wrf)
##                       data           stacja       t2m         dpt
## 248875 2009-01-31 18:00:00 Svalbard_Airport -13.59547 0.002244911
## 248876 2009-01-31 19:00:00 Svalbard_Airport -12.31513 0.001783979
## 248877 2009-01-31 20:00:00 Svalbard_Airport -12.18195 0.001651207
## 248878 2009-01-31 21:00:00 Svalbard_Airport -14.47904 0.002170612
## 248879 2009-01-31 22:00:00 Svalbard_Airport -14.05662 0.002161198
## 248880 2009-01-31 23:00:00 Svalbard_Airport -14.73914 0.002192110
##               ws         u          v      slp id
## 248875 2.2402361 1.7665825  1.3776226 1002.096 35
## 248876 2.6244291 2.2766891  1.3054942 1002.286 35
## 248877 2.8399431 2.5749545  1.1978674 1002.246 35
## 248878 1.4721311 1.4495904  0.2566272 1002.190 35
## 248879 0.3568476 0.3243237 -0.1488433 1001.814 35
## 248880 1.2333844 0.2691096 -1.2036682 1001.437 35
############ ROZPOCZECIE GLOWNEJ CZESCI OBLICZENIOWEJ: ############

# ujednoznaczenie nazw kolumn:
colnames(stacje)[-1:-2] <- paste0(colnames(stacje)[-1:-2],"_obs")
colnames(era)[-1:-2] <- paste0(colnames(era)[-1:-2],"_era")
colnames(wrf)[-1:-2] <- paste0(colnames(wrf)[-1:-2],"_wrf")

stacje_era <- left_join(stacje, era)
## Joining, by = c("data", "stacja")
stacje_era$mm <- lubridate::month(stacje_era$data)

## przykladowe staty dla ery:
library(hydroGOF)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
a <- stacje_era %>% group_by(stacja,mm) %>%
  summarise(mae_t2m=mae(sim=t2m_era, obs=t2m_obs, na.rm = T ),
            mae_slp=mae(sim=slp_era, obs=slp_obs, na.rm = T ),
            mae_ws=mae(sim=ws_era, obs=ws_obs, na.rm = T ),
            mae_u=mae(sim=u_era, obs=v_obs, na.rm = T ),
            mae_v=mae(sim=u_era, obs=v_obs, na.rm = T )
  ) %>% filter(mm==1 | mm==6) %>%  arrange(mm)


a <- data.frame(a[,1:2],round(a[,-1:-2],2))
a
##              stacja mm mae_t2m mae_slp mae_ws mae_u mae_v
## 1       Adventdalen  1    1.98    3.54   1.84  5.05  5.05
## 2          Breinosa  1    2.23    1.01   1.61  3.40  3.40
## 3      Gruvefjellet  1    1.73    0.85   1.32  4.73  4.73
## 4             Hopen  1    0.84    0.44   2.96  3.41  3.41
## 5          Hornsund  1    3.05    2.06   2.60  3.61  3.61
## 6          Kapp_Lee  1    1.84    0.63   2.44  4.97  4.97
## 7    Lodowiec_Hansa  1    1.86     NaN   2.16  3.09  3.09
## 8      Longyearbyen  1    1.96    0.57   2.24  6.25  6.25
## 9        Ny_Alesund  1    0.95    0.57   2.66  5.32  5.32
## 10 Svalbard_Airport  1    1.95    0.57   1.75  5.68  5.68
## 11      Adventdalen  6    1.13    3.91   2.34  4.49  4.49
## 12         Breinosa  6    2.92    0.60   1.34  2.54  2.54
## 13     Gruvefjellet  6    2.33    1.35   1.37  3.70  3.70
## 14            Hopen  6     NaN     NaN    NaN   NaN   NaN
## 15         Hornsund  6    0.77    1.63   2.50  3.28  3.28
## 16         Kapp_Lee  6     NaN     NaN    NaN   NaN   NaN
## 17   Lodowiec_Hansa  6     NaN     NaN    NaN   NaN   NaN
## 18     Longyearbyen  6    1.53    0.43   2.53  4.00  4.00
## 19       Ny_Alesund  6    1.51    0.44   1.78  3.99  3.99
## 20 Svalbard_Airport  6    1.47    0.42   2.00  3.86  3.86
## 21             <NA>  6     NaN     NaN    NaN   NaN   NaN
# dobra, koniec zabawy z era; wyglada ze wszystko jest w miare ok

# uzyskiwanie finalnego ksztaltu bazy do obliczen
# tj. dolaczenie wrfa z pozostalymi bazami:
# mozna sie pokusic o zlaczenie ery analogicznie jak wrfa:
colnames(era) <- c("data", "stacja", "t2m_wrf", "dpt_wrf", "ws_wrf", "u_wrf", 
                   "v_wrf", "press_wrf", "slp_wrf")
era$id_wrf <- "era" # dodajemy zamiast identyfikatora symulacji id ery
stacje_era <- left_join(stacje, era) # laczymy bazy danych po wspolnych nazwach kolumn
## Joining, by = c("data", "stacja")
stacje_era$data <- as.POSIXct(stacje_era$data, tz='UTC') # na wszelki wypadek ujednolicenie traktowania czasu

modele <- rbind(wrf, select(era, -press_wrf)) # laczymy ere i wrfa
calosc <- left_join(modele, stacje) # laczymy dane z ery, wrfa ze stacjami
## Joining, by = c("data", "stacja")
calosc <- calosc %>% select(-matches("dpt")) %>% select(data:stacja, id_wrf, t2m_obs:slp_obs, t2m_wrf:slp_wrf) # usuniecie punktu rosy i uszeregowanie kolumn wg jakiegos schematu
head(calosc) # podglad
##                  data      stacja id_wrf t2m_obs ws_obs     u_obs    v_obs
## 1 2008-06-01 00:00:00 Adventdalen     00    1.92   4.70 -3.437362 3.205392
## 2 2008-06-01 01:00:00 Adventdalen     00    2.08   4.50 -2.287305 3.875337
## 3 2008-06-01 02:00:00 Adventdalen     00    1.95   4.37 -2.885926 3.281514
## 4 2008-06-01 03:00:00 Adventdalen     00    1.98   3.67 -2.878938 2.276097
## 5 2008-06-01 04:00:00 Adventdalen     00    1.98   3.15 -2.383456 2.059523
## 6 2008-06-01 05:00:00 Adventdalen     00    2.30   3.92 -2.835449 2.706775
##   slp_obs   t2m_wrf     ws_wrf       u_wrf       v_wrf  slp_wrf
## 1 1006.40  0.993158 1.36191535  0.59025860  1.22735822 1006.896
## 2 1006.50 -1.271765 0.49069828  0.18460323  0.45464981 1006.749
## 3 1006.30  1.075311 0.22586197 -0.22419886 -0.02735868 1006.747
## 4 1006.10  1.264581 0.08294875  0.05566693 -0.06149542 1006.713
## 5 1006.07  1.231683 0.72903679  0.58969724 -0.42866281 1006.710
## 6 1005.88  1.350000 1.21966494  0.73624754 -0.97237962 1006.726
calosc$mm <- lubridate::month(calosc$data)

# mozemy przejsc do obliczen/wizualizacji
library(ggplot2)
calosc %>% filter(mm==1) %>% 
  ggplot( aes(x=stacja,y=t2m_obs-t2m_wrf))+geom_boxplot() + facet_wrap(c("id_wrf")) +  coord_flip()
## Warning: Removed 15139 rows containing non-finite values (stat_boxplot).

calosc %>% filter(mm==1 | mm==6) %>%  # wybieram tylko styczen i czerwiec
  ggplot( aes(x=stacja,y=t2m_obs-t2m_wrf))+geom_boxplot(outlier.shape = NA) + facet_wrap(c("id_wrf","mm")) +  coord_flip() 
## Warning: Removed 59333 rows containing non-finite values (stat_boxplot).

# mozna sobie takze np. zliczyc staty do ramki danych
a <- calosc %>% group_by(stacja,mm, id_wrf) %>%
  summarise(mae_t2m=mae(sim=t2m_wrf, obs=t2m_obs, na.rm = T ),
            mae_slp=mae(sim=slp_wrf, obs=slp_obs, na.rm = T ),
            mae_ws=mae(sim=ws_wrf, obs=ws_obs, na.rm = T ),
            mae_u=mae(sim=u_wrf, obs=v_obs, na.rm = T ),
            mae_v=mae(sim=u_wrf, obs=v_obs, na.rm = T )
  ) %>% filter(mm==1 | mm==6) %>%  arrange(mm)

a
## # A tibble: 360 x 8
## # Groups:   stacja, mm [20]
##         stacja    mm id_wrf  mae_t2m  mae_slp   mae_ws    mae_u    mae_v
##          <chr> <dbl>  <chr>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
##  1 Adventdalen     1     00 5.544357 1.888337 2.551127 5.922227 5.922227
##  2 Adventdalen     1     01 4.242564 1.802582 2.548730 6.097382 6.097382
##  3 Adventdalen     1     02 3.399957 1.904034 2.412707 6.370124 6.370124
##  4 Adventdalen     1     04 5.662779 1.851709 2.556105 5.909611 5.909611
##  5 Adventdalen     1     05 7.608811 1.842353 2.691962 6.000887 6.000887
##  6 Adventdalen     1     11 5.917842 1.838987 2.542772 5.791721 5.791721
##  7 Adventdalen     1     12 5.478259 1.761452 2.595052 5.917485 5.917485
##  8 Adventdalen     1     14 5.282592 1.888914 2.650367 6.086337 6.086337
##  9 Adventdalen     1     15 5.554804 1.773603 2.623743 5.890153 5.890153
## 10 Adventdalen     1     21 5.548429 1.885036 2.539080 5.915324 5.915324
## # ... with 350 more rows