Packages used

library(ncdf4)
library(stringr)
library(assertthat)
library(abind)

File reader

This function reads the data from the netcdf files and reorganizes the data into an array that we can work with more easily because it has months, years, and grid cells on separate dimensions.

### Function to read the monthly data from a file
### :param filename: Name of a netcdf file
### :param varname: Name of the variable to read from the file
### :return: matrix[month, year, gridcell] of values
readfile <- function(filename, varname=NULL) {
  if(is.null(varname)) {
    ## Extract the variable name from the filename.  It's between the first and second underscores.
    uscores <- str_locate_all(filename,'_')[[1]][,1]
    vstrt <- uscores[1]+1
    vend <- uscores[2]-1
    varname <- substr(filename, vstrt, vend)
  }
  
  nc <- nc_open(filename)
  data <- ncvar_get(nc, varname)
  lat <- ncvar_get(nc, 'lat')
  lon <- ncvar_get(nc, 'lon')
  time <- ncvar_get(nc, 'time')
  nc_close(nc)
  
  nlat <- length(lat)
  nlon <- length(lon)
  ntime <- length(time)
  
  assert_that(all(dim(data) == c(nlon, nlat, ntime)))
  assert_that(ntime %% 12 == 0, msg='Non-integer number of years in data')
  nyear <- ntime / 12
  
  data <- aperm(data, c(3,2,1))
  dim(data) <- c(12, nyear, nlat*nlon)
  data
}

Read and verify the data

## Read data for the ipsl model
prfiles <- list.files(pattern='^monthly_pr.*ipsl.*\\.nc4$')
tasfiles <- list.files(pattern='^monthly_tas.*ipsl.*\\.nc4$')
## Read the data for each file and combine along the year dimension
prdata <- do.call(abind, c(lapply(prfiles, readfile), along=2))
tasdata <- do.call(abind, c(lapply(tasfiles, readfile), along=2))
## Find the valid cells
goodcells_pr <- which(apply(prdata, 3, function(v) {!is.na(v[1,1])}))
goodcells_tas <- which(apply(tasdata, 3, function(v) {!is.na(v[1,1])}))
## Check that we got the right cells and didn't miss any, and that cells are
## consistent betweent the two variables.
assert_that(all.equal(goodcells_pr, goodcells_tas))
[1] TRUE
goodcells <- goodcells_pr
assert_that(!any(is.na(prdata[ , ,goodcells])))
[1] TRUE
assert_that(all(is.na(prdata[ , , -goodcells])))
[1] TRUE
assert_that(!any(is.na(tasdata[ , ,goodcells])))
[1] TRUE
assert_that(all(is.na(tasdata[ , , -goodcells])))
[1] TRUE
prdata <- prdata[, , goodcells]
tasdata <- tasdata[, , goodcells]

Spot check temperature in a few grid cells

## Find some grid cells to test
set.seed(867-5309)
ngrid <- dim(prdata)[3]
checkcells <- sample(1:ngrid, 8)
checkcells
[1] 51621 33613 17080 23504  8214 24277 28768 55900
tasdata[ , 1:10, checkcells] - 273.15
, , 1

            [,1]        [,2]       [,3]        [,4]       [,5]       [,6]       [,7]        [,8]      [,9]
 [1,] -12.799567 -12.1538147 -16.184210 -15.0005859 -14.419989 -12.285590 -12.104254 -15.1442322 -8.672491
 [2,] -11.472571  -8.6476501  -5.485541 -11.4637817  -6.770178 -11.508948  -8.510718  -7.9589600 -8.629523
 [3,]  -2.937811  -0.7818054  -1.583563  -0.7570251  -5.392187  -6.357062  -1.202429  -0.8085083 -2.657843
 [4,]   3.996820   3.2163635   7.418756   2.1435181   6.618921   4.709222   5.925439   5.3807922  5.562189
 [5,]  12.259241  11.0139404  14.310938  11.6847778  12.283838  11.152948   9.762018  15.0373779 10.860315
 [6,]  15.626855  15.7772156  17.558221  16.3012329  16.325159  15.875269  16.603052  17.2030273 17.934229
 [7,]  18.491449  18.3375488  17.445215  17.3489014  18.627924  17.897638  19.607263  17.6905151 18.295923
 [8,]  17.164331  16.6813904  18.318781  18.7339722  16.883600  17.322351  18.318658  17.6065002 16.812189
 [9,]  10.764612  12.6709839  12.110132  10.8958069  11.924799  11.714227  11.169916  11.9279724 11.829553
[10,]   1.975458   2.7708984   3.571954   6.0674377   1.312494   4.290338   7.160699   5.0315796  5.260522
[11,]  -6.481543  -3.2768311  -7.061774  -2.4011902  -4.906073  -4.108008  -2.307867  -4.3980469 -2.907874
[12,]  -8.817969 -11.6453918 -15.031439 -13.7018188 -10.545233  -8.633093  -7.227820  -7.0191101 -9.358466
           [,10]
 [1,] -10.347723
 [2,]  -5.930151
 [3,]  -5.060645
 [4,]   3.450922
 [5,]  10.069269
 [6,]  15.656824
 [7,]  17.396509
 [8,]  18.395868
 [9,]  12.079645
[10,]   5.727136
[11,]  -1.560339
[12,]  -7.561774

, , 2

          [,1]     [,2]     [,3]     [,4]      [,5]     [,6]     [,7]     [,8]     [,9]    [,10]
 [1,] 13.18801 10.22607 12.80239 10.32424  9.792291 11.57580 11.41693 12.59042 11.99545 12.71743
 [2,] 14.27313 11.57714 13.20629 10.90774 13.374567 14.28677 14.18731 15.14953 14.40130 14.28784
 [3,] 15.90414 15.32318 17.98937 18.35336 16.410425 16.92959 17.83587 19.61755 17.05898 19.90331
 [4,] 21.00652 22.60568 20.82427 21.69500 22.202936 21.81405 23.78317 23.96188 21.30490 21.15142
 [5,] 24.61871 24.54858 25.97561 27.52471 27.699274 25.26760 26.71493 26.94164 28.88909 27.55523
 [6,] 29.92950 31.94830 30.70867 30.04339 29.763300 31.50219 30.17391 29.64550 30.27902 30.40103
 [7,] 27.40606 29.12792 32.26110 30.66653 30.840295 32.43282 29.37374 30.46200 29.58792 29.67489
 [8,] 27.80831 29.78433 28.03030 29.39675 28.944177 29.53039 30.30187 29.41442 30.01379 31.24914
 [9,] 27.27514 27.60052 29.20870 29.47772 26.793085 27.69464 28.92962 26.48306 28.24713 27.46707
[10,] 24.05480 23.52224 24.23217 26.38592 22.510004 24.64459 23.19366 22.55718 22.72943 25.10043
[11,] 21.88049 15.26376 14.02651 18.03939 16.388086 18.53695 17.76278 17.83270 14.65447 17.32449
[12,] 15.95913 13.23336 11.74221 13.21957 12.634180 12.28085 12.30316 11.86996 11.78594 13.78988

, , 3

           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]      [,8]      [,9]     [,10]
 [1,] 22.103143 21.869806 26.386194 24.711694 24.284479 23.610101 26.375421 24.439905 23.001520 24.251123
 [2,] 23.021478 22.436731 22.905176 21.347589 24.303400 22.878748 25.596765 25.311548 22.606653 24.422815
 [3,] 20.373224 19.594476 19.084039 22.846521 18.042841 18.766748 19.253839 21.936487 20.054926 19.953302
 [4,] 15.224268 15.019250 16.019861 15.640466 14.427698 16.228113 12.654962 17.092035 14.390955 14.748254
 [5,] 13.175226 10.271173  9.529443 12.485376 13.350946 11.277399 10.517877 11.361627 10.068506 11.441705
 [6,] 11.235162  9.447534  9.719446  6.816675  8.794763  7.458765  5.596063  9.883508  7.452234 10.280359
 [7,]  7.063623  8.170251  9.502466  5.827448  6.347559  8.581659  8.038721  7.431848  8.413110  9.101526
 [8,] 11.165399 11.308496  9.705194  9.908472  9.782587  9.627924  9.869958 10.558099  9.911310 11.139581
 [9,] 11.951654 12.893213 10.884271 12.223047 10.369989 11.794122 10.765771 14.623438 11.498956 13.766443
[10,] 15.763452 17.358759 16.010583 13.372858 17.952722 14.433771 18.279504 15.583368 18.954095 17.331232
[11,] 22.767877 20.723657 17.035852 19.773187 21.862360 17.133630 22.457635 22.107202 19.227075 20.172998
[12,] 21.926813 22.204675 22.552454 22.610284 22.235773 25.512598 25.643579 22.590479 24.460901 25.632562

, , 4

             [,1]        [,2]        [,3]        [,4]       [,5]        [,6]        [,7]        [,8]
 [1,] -33.6313538 -29.3143372 -26.3357300 -24.6707214 -30.563498 -23.7054199 -33.2916931 -27.2484039
 [2,] -33.1270355 -27.7742523 -31.8763794 -24.9780640 -31.076086 -27.1748718 -24.5893158 -28.8053192
 [3,] -29.1069244 -31.2393707 -31.5282654 -28.6844086 -28.870581 -28.0946564 -26.3307556 -34.3378357
 [4,] -25.1035370 -24.9294647 -24.3090729 -27.7613586 -23.900931 -24.3996185 -22.8265442 -25.1477112
 [5,] -14.0846008 -12.2489380  -9.7831177 -13.1740784 -13.105322 -12.7059387 -11.3988403 -13.6859192
 [6,]  -2.5898804  -3.8542236  -1.5285400  -1.9291748  -1.489752  -4.8165649  -3.0311035   0.1267334
 [7,]   1.8012939   1.6356140   3.7914673   1.3885742   2.996790   0.9989258   1.2104736   2.8548218
 [8,]  -0.9898437  -0.4204773  -0.3556885  -0.8329834   1.574579  -0.3756775  -0.8776917  -0.5966553
 [9,] -10.3003296 -11.8067078 -11.5928711 -10.0042480  -6.280127  -9.9061951  -7.0754456  -9.1221375
[10,] -15.3567566 -21.2694611 -20.9263672 -21.4471497 -18.086325 -15.9616760 -21.6697144 -20.5519928
[11,] -21.5197662 -27.3742889 -24.6880707 -27.1953339 -25.337027 -27.0295929 -26.6817078 -29.7614655
[12,] -28.2580475 -26.1998810 -27.2421173 -27.9916748 -27.691275 -33.0913605 -27.9937042 -29.8626007
             [,9]      [,10]
 [1,] -25.9381012 -28.150961
 [2,] -25.1406311 -27.123846
 [3,] -29.7124390 -28.485144
 [4,] -24.1135162 -20.486807
 [5,] -10.9186462 -14.434607
 [6,]  -0.6385864  -3.999792
 [7,]   3.1830383   4.372369
 [8,]   0.2711121  -1.268958
 [9,]  -9.1829590  -6.984045
[10,] -21.8982605 -18.479803
[11,] -24.7462219 -24.091040
[12,] -25.6929993 -29.309790

, , 5

             [,1]        [,2]       [,3]        [,4]       [,5]        [,6]       [,7]       [,8]       [,9]
 [1,] -27.7480682 -36.2027496 -36.219702 -33.2593750 -27.497061 -29.9943756 -34.330832 -31.836661 -29.982199
 [2,] -33.8331360 -39.2057861 -33.413458 -36.9811615 -32.442313 -35.4577087 -34.164450 -34.387900 -31.629263
 [3,] -30.0424408 -33.7422241 -27.591467 -34.0764832 -35.603445 -30.5772919 -32.790793 -31.601584 -23.643790
 [4,] -21.8845428 -18.9785675 -20.131552 -21.1582550 -18.098105 -20.5220551 -18.364417 -21.085364 -19.833975
 [5,] -11.5590271  -9.6874146  -8.029150 -10.6750854  -9.316260 -10.2965454 -11.359869  -8.134528  -8.967505
 [6,]   0.3212524   1.5031372   6.438593   0.5478455   5.338953   1.8598572   3.466913   4.840112   3.809625
 [7,]   9.7647339  10.8160034   6.000146   8.2136475   7.671381   9.8912292   8.864099  10.340448   8.326440
 [8,]   2.8432861   7.8349854   5.926233   8.6156860   6.791956   4.5991760   8.625909   8.848535   6.085535
 [9,]  -0.1546692   0.6241089  -2.736884  -0.9128174   2.072992  -0.9088501   3.692285   5.194940   1.976556
[10,] -15.0421509  -7.0554260 -12.645514  -9.1838745 -13.624060 -11.0942139  -6.751532 -10.748450  -8.233557
[11,] -23.5939697 -22.4696106 -25.844168 -17.3750671 -23.935309 -16.6133789 -24.654669 -20.186987 -18.180228
[12,] -22.0501923 -33.2021240 -27.204428 -28.9107574 -29.484381 -30.1382507 -28.813528 -27.975012 -27.538138
           [,10]
 [1,] -28.674490
 [2,] -34.087180
 [3,] -29.415793
 [4,] -19.437399
 [5,]  -7.120276
 [6,]   1.616571
 [7,]   8.857324
 [8,]   7.540308
 [9,]  -3.815985
[10,]  -8.772498
[11,] -26.576361
[12,] -31.591635

, , 6

          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]     [,9]    [,10]
 [1,] 17.38854 16.25332 14.87393 13.33087 17.76898 18.87713 15.31719 16.21505 16.36450 17.06500
 [2,] 19.10461 15.27444 17.74682 16.48843 17.85964 17.72326 16.53378 18.13616 17.24380 16.98287
 [3,] 20.14657 19.26211 21.35272 20.75689 22.20001 21.49963 19.61328 18.22457 21.34576 17.56387
 [4,] 22.32311 22.07458 22.25512 24.35095 22.90997 21.74093 21.31405 21.68356 25.77017 22.94381
 [5,] 24.74371 27.65673 24.21554 25.57021 24.51867 26.73284 25.51629 23.23074 23.66848 24.20452
 [6,] 26.65444 25.34979 25.19079 25.77130 24.52728 24.31005 27.81680 26.04122 27.93267 26.56191
 [7,] 33.01196 28.69561 28.41964 30.56390 29.50582 27.60122 28.39309 30.40194 32.10995 30.25305
 [8,] 30.17306 28.90133 29.38656 30.69021 31.67687 30.87142 29.74935 30.94875 30.94647 30.23516
 [9,] 26.43105 26.86166 27.30978 27.51080 28.08514 28.14172 27.07342 28.98000 28.43344 29.41140
[10,] 21.83425 23.41540 24.38043 23.21847 23.46438 22.86251 24.49978 24.19827 25.17324 24.96234
[11,] 17.83270 19.77883 19.72805 20.22097 18.21975 20.11947 18.62393 19.81899 17.75375 20.31970
[12,] 14.34277 15.29418 14.65487 16.83923 15.17635 14.79937 16.08895 15.42104 16.80291 15.91653

, , 7

           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]       [,7]       [,8]       [,9]     [,10]
 [1,]  2.518091  3.758966  2.900629  1.454675  1.175500  4.326898  0.6553284  0.6517883  0.4093567  2.419489
 [2,]  6.746698  2.587122  6.964105  3.768884  4.969110  2.585260  0.3418823  1.2149292  0.2343079  1.802606
 [3,]  6.564935  6.570398  5.920374  5.458582  8.958002  6.929742  3.2356812  6.0108887  5.5053345  4.730096
 [4,]  8.822290 14.051965  6.351801 11.782251  5.654138  8.091516  8.9031921 10.7729431 11.4668213  9.429315
 [5,] 11.941461 10.554437 16.074976 13.736810 12.689233 14.620569 14.9134766 15.9477478 14.3851257 16.840448
 [6,] 16.432947 18.701593 17.810388 18.218408 18.825647 18.309442 18.9470459 17.0850464 18.8597046 18.623041
 [7,] 18.970819 19.323328 20.298029 20.695398 20.295343 20.521234 21.0657898 19.8062683 19.3378845 19.629480
 [8,] 18.336053 19.334711 19.622491 19.325464 19.339227 18.648340 19.9094177 19.5520874 18.6457764 18.346704
 [9,] 14.019281 13.682550 16.664392 15.125757 14.972314 18.097467 14.9400269 13.4916931 13.0979553 15.800439
[10,] 10.861963  9.730005 12.143213 10.701105  8.050928  9.526910 11.0841919 10.5413452 12.2250916  8.777734
[11,]  2.155786  7.880060  7.174677  5.917261  6.476099  8.300348  5.8945251  5.3227478  6.5251709  6.585596
[12,]  2.049951 -1.501593  3.413843  4.170343  2.051538  1.668665  2.0196472  4.1581360  4.6232544  4.798181

, , 8

            [,1]         [,2]       [,3]        [,4]       [,5]       [,6]       [,7]        [,8]
 [1,] -24.834799 -22.43311157 -29.557501 -27.3402618 -29.254691 -27.917273 -23.046530 -28.6671204
 [2,] -21.470206 -22.09418335 -13.571539 -24.7863068 -18.146689 -25.881369 -21.199026 -17.9229187
 [3,] -13.058112  -9.37622681  -9.363318 -13.2405762 -12.652167 -13.996405 -11.226782 -11.2320923
 [4,]  -1.987402  -0.08005371   2.573724   0.1818787   1.243402  -2.371771  -3.106818   0.1498047
 [5,]   8.809900   7.63417969  13.851587  12.3221680   9.718042  10.531061   9.949640  11.2518860
 [6,]  15.094659  16.05553589  14.021814  14.0384766  18.470392  13.909570  15.673639  14.1318604
 [7,]  17.364282  19.33382568  17.111414  18.6786743  19.112360  17.765100  16.806879  16.0960938
 [8,]  13.320886  16.44228516  15.483087  16.3385254  15.928705  17.126794  16.425073  14.7654663
 [9,]   8.348260   9.02785645   7.948755   8.0263000   9.646173  10.729211   7.343835   8.6740967
[10,]  -2.037543  -0.68594971   1.887811  -2.0905823  -4.599097  -2.880377   1.253320   0.7329956
[11,] -17.972052 -10.20117798 -11.504462  -9.8047241 -11.262305 -12.007269 -11.320776  -9.1051697
[12,] -25.374091 -25.57451477 -23.268179 -21.8786530 -23.117438 -16.474646 -18.320013 -18.9731354
             [,9]       [,10]
 [1,] -21.1802277 -16.3342041
 [2,] -23.9454712 -17.8260406
 [3,] -13.3922791 -15.0399841
 [4,]   0.3087402  -4.4210571
 [5,]   8.3093811   5.7476440
 [6,]  15.8435913  16.9756409
 [7,]  18.1469055  17.3795105
 [8,]  15.3530823  17.5196472
 [9,]   8.3970276   9.5290466
[10,]  -0.9396729   0.5836426
[11,] -13.9675659 -13.6851562
[12,] -20.6519073 -21.6846222

These grid cells appear to show the expected annual variation. Several of them are clearly arctic (like cell 4 which only gets above freezing in NH summer). Number 3 appears to be in the southern hemisphere. All of them show a discernible seasonal pattern. We can check their locations from the coordinate table stored in the an2month package.

coord <- an2month::frac_ipsl_cm5a_lr$coordinates[goodcells,]
coord[checkcells,]

We can load the corresponding monthly fractions from an2month for comparison. I’ll focus on cell 8, just to make the matrix manipulations easier. (Note these are the original fractions, computed as a mean over all years.)

cell8tas <- tasdata[ , 1:10, checkcells[8]]
ipsltasfrac <- an2month::frac_ipsl_cm5a_lr$tas[ , goodcells]
cell8tasfrac <- matrix(ipsltasfrac[, checkcells[8]], ncol=1)
## Get annual sums for cell 8
cell8annual <- matrix(apply(cell8tas, 2, mean), nrow=1)
cell8tasfrac %*% cell8annual - 273.15
            [,1]        [,2]        [,3]        [,4]        [,5]        [,6]        [,7]        [,8]
 [1,] -26.433405 -24.7669113 -24.1898112 -25.3469728 -24.9928602 -25.4928583 -24.6765879 -24.6255261
 [2,] -22.587103 -20.8946291 -20.3085321 -21.4837337 -21.1241005 -21.6318936 -20.8028975 -20.7510397
 [3,] -13.474248 -11.7202196 -11.1128065 -12.3307497 -11.9580368 -12.4842980 -11.6251518 -11.5714079
 [4,]  -2.617093  -0.7897277  -0.1569184  -1.4257843  -1.0374881  -1.5857526  -0.6906851  -0.6346942
 [5,]   7.430737   9.3259715   9.9822840   8.6662912   9.0690091   8.5003816   9.4286927   9.4867632
 [6,]  13.745640  15.6835304  16.3546142  15.0090030  15.4207846  14.8393593  15.7885635  15.8479409
 [7,]  16.257260  18.2121156  18.8890744  17.5316831  17.9470696  17.3605542  18.3180682  18.3779655
 [8,]  14.356309  16.2983242  16.9708364  15.6223610  16.0350191  15.4523562  16.4035808  16.4630847
 [9,]   7.298502   9.1928439   9.8488470   8.5334745   8.9360025   8.3676430   9.2955166   9.3535597
[10,]  -1.635904   0.1980888   0.8331933  -0.4402747  -0.0505702  -0.6008232   0.2974906   0.3536847
[11,] -13.680165 -11.9275281 -11.3205967 -12.5375741 -12.1651567 -12.6910007 -11.8325357 -11.7788344
[12,] -22.456748 -20.7633935 -20.1769916 -21.3528046 -20.9929843 -21.5010416 -20.6716142 -20.6197294
             [,9]       [,10]
 [1,] -25.2068552 -24.7138450
 [2,] -21.3414317 -20.8407355
 [3,] -12.1832722 -11.6643659
 [4,]  -1.2721408  -0.7315388
 [5,]   8.8256412   9.3863217
 [6,]  15.1719394  15.7452388
 [7,]  17.6960459  18.2743643
 [8,]  15.7856442  16.3601640
 [9,]   8.6927493   9.2531656
[10,]  -0.2860739   0.2564888
[11,] -12.3902136 -11.8717187
[12,] -21.2104286 -20.7094719

These values look plausible. Note, however, that if you convert to Celsius first and then try to apply the monthly fractions, you get the wrong answer.

cell8tasdegC <- tasdata[ , 1:10, checkcells[8]] - 273.15
cell8annualdegC <- matrix(apply(cell8tasdegC, 2, mean), nrow=1)
cell8tasfrac %*% cell8annualdegC
           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]      [,8]      [,9]     [,10]
 [1,] -3.341138 -1.674645 -1.097545 -2.254706 -1.900594 -2.400592 -1.584321 -1.533260 -2.114589 -1.621579
 [2,] -3.393226 -1.700753 -1.114656 -2.289857 -1.930224 -2.438017 -1.609021 -1.557163 -2.147555 -1.646859
 [3,] -3.516636 -1.762608 -1.155195 -2.373138 -2.000425 -2.526687 -1.667540 -1.613796 -2.225661 -1.706754
 [4,] -3.663668 -1.836304 -1.203494 -2.472360 -2.084064 -2.632328 -1.737261 -1.681270 -2.318717 -1.778115
 [5,] -3.799740 -1.904505 -1.248193 -2.564186 -2.161468 -2.730095 -1.801784 -1.743714 -2.404836 -1.844155
 [6,] -3.885259 -1.947369 -1.276286 -2.621897 -2.210115 -2.791540 -1.842336 -1.782959 -2.458960 -1.885661
 [7,] -3.919273 -1.964417 -1.287459 -2.644850 -2.229463 -2.815979 -1.858465 -1.798568 -2.480487 -1.902169
 [8,] -3.893529 -1.951514 -1.279002 -2.627478 -2.214819 -2.797482 -1.846258 -1.786754 -2.464194 -1.889675
 [9,] -3.797950 -1.903608 -1.247605 -2.562977 -2.160449 -2.728809 -1.800935 -1.742892 -2.403702 -1.843286
[10,] -3.676956 -1.842964 -1.207859 -2.481327 -2.091623 -2.641876 -1.743562 -1.687368 -2.327126 -1.784564
[11,] -3.513848 -1.761210 -1.154279 -2.371256 -1.998839 -2.524683 -1.666218 -1.612517 -2.223896 -1.705401
[12,] -3.394992 -1.701637 -1.115235 -2.291048 -1.931228 -2.439285 -1.609858 -1.557973 -2.148672 -1.647716

What about the new fractions

The monthly fractions we applied above were not the ones we were trying to use to get the Dirichlet parameters. We can try again with those and see how they compare.

ipslnewfracs_all <- readRDS('tas_ipsl-cm5a-lr_rcp4p5_monthlyFrac.rds')
assert_that(!any(is.na(ipslnewfracs_all[ , ,goodcells])))
[1] TRUE
assert_that(all(is.na(ipslnewfracs_all[ , , -goodcells])))
[1] TRUE
ipslnewfracs <- ipslnewfracs_all[ , , goodcells]
cell8tasnewfrac <- ipslnewfracs[ , , checkcells[8]]

Let’s see how the mean fraction compares to our old mean, and let’s check how much it varies from year to year.

cell8newmeanfrac <- apply(cell8tasnewfrac, 2, mean)
cell8newsdfrac <- apply(cell8tasnewfrac, 2, sd)
cat('old mean fraction:\n')
old mean fraction:
as.vector(cell8tasfrac)
 [1] 0.9154594 0.9297314 0.9635453 1.0038315 1.0411147 1.0645466 1.0738661 1.0668125 1.0406240 1.0074723
[11] 0.9627812 0.9302151
cat('new mean fraction:\n')
new mean fraction:
cell8newmeanfrac
  January  February     March     April       May      June      July    August September   October 
1.0009690 0.9998011 0.9984396 0.9985206 1.0006349 1.0016347 1.0010667 0.9994387 0.9982732 0.9988878 
 November  December 
1.0005484 1.0017852 
cat('\nold monthly sd (Kelvin):\n')

old monthly sd (Kelvin):
apply(cell8tas, 1, sd)
 [1] 4.299809 3.803029 1.903470 2.146344 2.351389 1.501208 1.037879 1.215380 0.990869 2.044929 2.581853
[12] 2.981257
cat('new monthly sd (converted to Kelvin):\n')
new monthly sd (converted to Kelvin):
cell8newsdfrac * apply(cell8tas, 1, mean)
  January  February     March     April       May      June      July    August September   October 
 14.23060  14.38582  14.60422  15.02430  16.14654  16.12148  16.74372  15.59831  15.63319  15.42668 
 November  December 
 14.39879  13.65653 
matrix(cell8newmeanfrac, ncol=1) %*% cell8annual - 273.15
           [,1]      [,2]       [,3]      [,4]      [,5]      [,6]      [,7]      [,8]      [,9]     [,10]
 [1,] -3.388529 -1.566375 -0.9353697 -2.200617 -1.813428 -2.360130 -1.467614 -1.411783 -2.047412 -1.508352
 [2,] -3.703283 -1.883255 -1.2529864 -2.516758 -2.130021 -2.676084 -1.784610 -1.728844 -2.363731 -1.825300
 [3,] -4.070217 -2.252667 -1.6232569 -2.885307 -2.499097 -3.044416 -2.154157 -2.098466 -2.732489 -2.194791
 [4,] -4.048375 -2.230677 -1.6012161 -2.863369 -2.477127 -3.022491 -2.132159 -2.076464 -2.710538 -2.172796
 [5,] -3.478590 -1.657044 -1.0262501 -2.291075 -1.904016 -2.450534 -1.558317 -1.502504 -2.137921 -1.599041
 [6,] -3.209122 -1.385756 -0.7543315 -2.020421 -1.632974 -2.180039 -1.286930 -1.231062 -1.867113 -1.327694
 [7,] -3.362196 -1.539864 -0.9087980 -2.174169 -1.786942 -2.333697 -1.441095 -1.385258 -2.020949 -1.481836
 [8,] -3.800963 -1.981594 -1.3515544 -2.614868 -2.228271 -2.774136 -1.882985 -1.827239 -2.461896 -1.923660
 [9,] -4.115045 -2.297799 -1.6684933 -2.930333 -2.544187 -3.089416 -2.199305 -2.143624 -2.777541 -2.239932
[10,] -3.949420 -2.131055 -1.5013622 -2.763979 -2.377595 -2.923160 -2.032500 -1.976785 -2.611092 -2.073153
[11,] -3.501897 -1.680508 -1.0497688 -2.314485 -1.927459 -2.473930 -1.581790 -1.525982 -2.161344 -1.622510
[12,] -3.168583 -1.344944 -0.7134243 -1.979704 -1.592199 -2.139346 -1.246103 -1.190226 -1.826373 -1.286873

These results are pretty strange. The monthly pattern in the mean is all messed up. Interestingly, the variability in the new monthly temperature values actually seems to be higher than what we were seeing in the old data. Apparently, monthly mean temperatures are pretty consistent from year to year, having a standard deviaiton (for this grid cell) of about 1-4 K, depending on the month.

Results from applying the mean of the new fractions look more similar to the badly Celsius converted results than they do to the correct results, suggesting that there is a bad conversion in here somewhere.

Tentative Conclusions

The shift in the zero point between Celsius and Kelvin invalidates the monthly fractions. So, if we convert units before we apply (or, equally, compute) the monthly fractions, we will get bogus results. Note that this should not be a problem with pr unit conversion because that function applies only a multiplicative scale, which will not invalidate the monthly fractions. Generally, I think we should avoid Celsius conversions in our calculation. It’s easy enough to convert on the fly if we want to display in Celsius, or if another code requires Celsius values, so it’s not worth the risk of getting screwy results.

That said, there is still something off about the latest round of monthly values. They don’t match up with the old values at all, and they don’t fit the expected seasonal pattern for the grid cell we are looking at.

Finally, it looks as if the low year to year variability is real. If anything, the year to year variability in the latest round of calculations seems too high, about 5 times what we are seeing in the raw monthly temperatures. This is a little annoying because the large \(\alpha\) values implied by the low variability seem to take a lot longer to fit in the stan sampler. (Fitting precipitation distributions, on the other hand, goes very quickly.)

Where do we go from here?

There is something very weird in the latest new monthly values. At this point, unless there’s an obvious explanation for it, I’m not sure it’s worth trying to track down what is causing it. We have just about everything we need here to create the input to the Stan calculation directly from the netcdf files of monthly data, so maybe we get back on track quicker if we just write those data off and start over.

I’m not crazy about the slow Stan fits for temperature. Right now it takes about 10 minutes to do 64 grid cells on 4 processors. That’s about 27 hours per model (I think) on a single PIC node. We could parallelize over more nodes and get it done faster, but it’s still not ideal. I think I might be able to speed the calculation along by specifying a better prior, but it will require some experimenting, and at least initially those experiments could be a little slow.

Converting to monthly fractions

Normalize the tas and pr data to produce monthly fractions (instead of averages).

mean2frac <- function(a) {
  ## Find sum of months for each year, grid
  yrgridfacs <- 1/apply(a, c(2,3), sum)
  ## Permute the array to put the months in the last dimension
  a <- aperm(a, c(2,3,1))
  ## Multiply by normalization factor.  Vector recycling ensures that each month for a single
  ## year and grid gets multipled by the same constant.
  a <- a * as.vector(yrgridfacs)
  ## Repermute back into the original dimension
  aperm(a, c(3,1,2))
}
tasfrac <- mean2frac(tasdata)
prfrac <- mean2frac(prdata)
tasfrac_kd <- readfile('../output-L1/tas_ipsl-cm5a-lr_rcp4p5_monthlyFrac.nc', 'tas')
prfrac_kd <- readfile('../output-L1/pr_ipsl-cm5a-lr_rcp4p5_monthlyFrac.nc', 'pr')
assert_that(!any(is.na(tasfrac_kd[ , ,goodcells])))
[1] TRUE
assert_that(all(is.na(tasfrac_kd[ , , -goodcells])))
[1] TRUE
##assert_that(!any(is.na(prfrac_kd[ , ,goodcells])))   # not a good test due to div by zero values.
assert_that(all(is.na(prfrac_kd[ , , -goodcells])))
[1] TRUE
tasfrac_kd <- tasfrac_kd[ , , goodcells]/12
prfrac_kd <- prfrac_kd[ , , goodcells]/12
tasfrac[ , 1:8, checkcells[8]]
            [,1]       [,2]       [,3]       [,4]       [,5]       [,6]       [,7]       [,8]
 [1,] 0.07678259 0.07700509 0.07464347 0.07567463 0.07497811 0.07554147 0.07678876 0.07504766
 [2,] 0.07782297 0.07710918 0.07954201 0.07646089 0.07839293 0.07616861 0.07735599 0.07834575
 [3,] 0.08042411 0.08101537 0.08083153 0.08001534 0.08008205 0.07982965 0.08041775 0.08039960
 [4,] 0.08384733 0.08387059 0.08448937 0.08414756 0.08435382 0.08341050 0.08291081 0.08389344
 [5,] 0.08718601 0.08623994 0.08794522 0.08788505 0.08695908 0.08738509 0.08691950 0.08730139
 [6,] 0.08912935 0.08882647 0.08799738 0.08841343 0.08964972 0.08842580 0.08867693 0.08818544
 [7,] 0.08983115 0.08983337 0.08894412 0.08984196 0.08984708 0.08961346 0.08902487 0.08878839
 [8,] 0.08858088 0.08894526 0.08844516 0.08912152 0.08886836 0.08941684 0.08890764 0.08837993
 [9,] 0.08704327 0.08666799 0.08613643 0.08656253 0.08693699 0.08744613 0.08611945 0.08651010
[10,] 0.08383183 0.08368450 0.08427919 0.08344796 0.08255772 0.08325383 0.08424949 0.08407246
[11,] 0.07890465 0.08076200 0.08017542 0.08107310 0.08050932 0.08044238 0.08038889 0.08105249
[12,] 0.07661584 0.07604024 0.07657070 0.07735603 0.07686482 0.07906625 0.07823993 0.07802337
tasfrac_kd[ , 1:8, checkcells[8]]
            [,1]       [,2]       [,3]       [,4]       [,5]       [,6]       [,7]       [,8]
 [1,] 0.07678260 0.07700508 0.07464347 0.07567463 0.07497812 0.07554147 0.07678875 0.07504765
 [2,] 0.07782298 0.07710918 0.07954201 0.07646089 0.07839293 0.07616861 0.07735599 0.07834575
 [3,] 0.08042412 0.08101537 0.08083153 0.08001534 0.08008205 0.07982965 0.08041775 0.08039959
 [4,] 0.08384733 0.08387059 0.08448937 0.08414756 0.08435381 0.08341050 0.08291081 0.08389344
 [5,] 0.08718602 0.08623994 0.08794521 0.08788505 0.08695908 0.08738509 0.08691951 0.08730138
 [6,] 0.08912936 0.08882647 0.08799738 0.08841343 0.08964973 0.08842581 0.08867693 0.08818544
 [7,] 0.08983115 0.08983337 0.08894412 0.08984195 0.08984708 0.08961347 0.08902486 0.08878839
 [8,] 0.08858088 0.08894526 0.08844515 0.08912152 0.08886836 0.08941684 0.08890764 0.08837993
 [9,] 0.08704328 0.08666799 0.08613642 0.08656253 0.08693699 0.08744613 0.08611944 0.08651009
[10,] 0.08383184 0.08368449 0.08427918 0.08344796 0.08255772 0.08325383 0.08424949 0.08407245
[11,] 0.07890466 0.08076199 0.08017542 0.08107310 0.08050932 0.08044238 0.08038889 0.08105248
[12,] 0.07661584 0.07604023 0.07657069 0.07735604 0.07686482 0.07906626 0.07823993 0.07802336
t(ipslnewfracs[1:8, , checkcells[8]]) /12
                2006       2007       2008       2009       2010       2011       2012       2013
January   0.07678260 0.07782298 0.08042412 0.08384733 0.08718602 0.08912936 0.08983115 0.08858088
February  0.08105248 0.07802336 0.07752721 0.07667639 0.07992344 0.08413903 0.08660071 0.08891886
March     0.08719485 0.08365611 0.07940445 0.07704489 0.07625715 0.07702764 0.07917999 0.08349788
April     0.08928821 0.08939549 0.08628352 0.08402712 0.07948862 0.07770739 0.07682852 0.07689978
May       0.08675984 0.08849782 0.08949405 0.08918690 0.08744078 0.08365825 0.08178261 0.07762498
June      0.08093305 0.08435172 0.08709607 0.08886290 0.08971066 0.08891948 0.08695312 0.08297429
July      0.07699189 0.07755658 0.07987810 0.08264881 0.08637239 0.08817541 0.08928084 0.08877810
August    0.08038037 0.07793007 0.07741694 0.07728166 0.08090703 0.08263167 0.08760008 0.08810310
September 0.08682813 0.08302083 0.08016596 0.07935913 0.07606222 0.07672331 0.07966132 0.08289779
October   0.08912340 0.08932826 0.08743685 0.08431101 0.08166896 0.07769856 0.07624961 0.07714472
November  0.08684355 0.08812558 0.08968371 0.08881089 0.08689810 0.08465164 0.08079976 0.07913265
December  0.07920772 0.08317811 0.08621902 0.08854709 0.08943227 0.08909937 0.08699197 0.08494909

So, to summarize, my calculation of monthly fractions and that stored in the L1 data agree; however, the rds file of the same data is garbled.

Final Conclusions

The question of Celsius converison appears to be a red herring (though we do need to make sure that it is not applied in the monthly downscaling until after we have performed the downscaling calculation).
The fundamental problem appears to be that the monthly fractions were garbled in the translation to rds format. We can easily fix this by reading directly from the netcdf version.

We still need to try to find a good way to speed up the stan fitting for temperature. We might be able to fix this with suitable priors, but it will take some experimenting to figure out something that works.

---
title: "Analysis of monthly temperature and precipitation fractions"
output: html_notebook
---


## Packages used

```{r libs}
library(ncdf4)
library(stringr)
library(assertthat)
library(abind)
```

## File reader

This function reads the data from the netcdf files and reorganizes the data into an array that we 
can work with more easily because it has months, years, and grid cells on separate dimensions.

```{r readfile}
### Function to read the monthly data from a file
### :param filename: Name of a netcdf file
### :param varname: Name of the variable to read from the file
### :return: matrix[month, year, gridcell] of values
readfile <- function(filename, varname=NULL) {
  if(is.null(varname)) {
    ## Extract the variable name from the filename.  It's between the first and second underscores.
    uscores <- str_locate_all(filename,'_')[[1]][,1]
    vstrt <- uscores[1]+1
    vend <- uscores[2]-1
    varname <- substr(filename, vstrt, vend)
  }
  
  nc <- nc_open(filename)
  data <- ncvar_get(nc, varname)
  lat <- ncvar_get(nc, 'lat')
  lon <- ncvar_get(nc, 'lon')
  time <- ncvar_get(nc, 'time')
  nc_close(nc)
  
  nlat <- length(lat)
  nlon <- length(lon)
  ntime <- length(time)
  
  assert_that(all(dim(data) == c(nlon, nlat, ntime)))
  assert_that(ntime %% 12 == 0, msg='Non-integer number of years in data')
  nyear <- ntime / 12
  
  data <- aperm(data, c(3,2,1))
  dim(data) <- c(12, nyear, nlat*nlon)
  data
}
```

## Read and verify the data

```{r readdata}
## Read data for the ipsl model
prfiles <- list.files(pattern='^monthly_pr.*ipsl.*\\.nc4$')
tasfiles <- list.files(pattern='^monthly_tas.*ipsl.*\\.nc4$')

## Read the data for each file and combine along the year dimension
prdata <- do.call(abind, c(lapply(prfiles, readfile), along=2))
tasdata <- do.call(abind, c(lapply(tasfiles, readfile), along=2))
```

```{r checkfilter}
## Find the valid cells
goodcells_pr <- which(apply(prdata, 3, function(v) {!is.na(v[1,1])}))
goodcells_tas <- which(apply(tasdata, 3, function(v) {!is.na(v[1,1])}))

## Check that we got the right cells and didn't miss any, and that cells are
## consistent betweent the two variables.
assert_that(all.equal(goodcells_pr, goodcells_tas))
goodcells <- goodcells_pr
assert_that(!any(is.na(prdata[ , ,goodcells])))
assert_that(all(is.na(prdata[ , , -goodcells])))
assert_that(!any(is.na(tasdata[ , ,goodcells])))
assert_that(all(is.na(tasdata[ , , -goodcells])))

prdata <- prdata[, , goodcells]
tasdata <- tasdata[, , goodcells]
```

## Spot check temperature in a few grid cells

```{r checkcells}
## Find some grid cells to test
set.seed(867-5309)
ngrid <- dim(prdata)[3]
checkcells <- sample(1:ngrid, 8)
checkcells
tasdata[ , 1:10, checkcells] - 273.15
```

These grid cells appear to show the expected annual variation.  Several of them are clearly arctic (like cell
4 which only gets above freezing in NH summer).  Number 3 appears to be in the southern hemisphere.  All of
them show a discernible seasonal pattern.  We can check their locations from the coordinate table stored
in the `an2month` package.
```{r celllocation}
coord <- an2month::frac_ipsl_cm5a_lr$coordinates[goodcells,]
coord[checkcells,]
```

We can load the corresponding monthly fractions from `an2month` for comparison.  I'll focus on cell 8, just
to make the matrix manipulations easier.  (Note these are the _original_ fractions, computed as a mean over
all years.)
```{r tascmp}
cell8tas <- tasdata[ , 1:10, checkcells[8]]
ipsltasfrac <- an2month::frac_ipsl_cm5a_lr$tas[ , goodcells]
cell8tasfrac <- matrix(ipsltasfrac[, checkcells[8]], ncol=1)

## Get annual sums for cell 8
cell8annual <- matrix(apply(cell8tas, 2, mean), nrow=1)

cell8tasfrac %*% cell8annual - 273.15
```

These values look plausible.  Note, however, that if you convert to Celsius _first_ and then try to apply the 
monthly fractions, you get the wrong answer.

```{r tascmpdegC}
cell8tasdegC <- tasdata[ , 1:10, checkcells[8]] - 273.15
cell8annualdegC <- matrix(apply(cell8tasdegC, 2, mean), nrow=1)

cell8tasfrac %*% cell8annualdegC
```

## What about the new fractions

The monthly fractions we applied above were not the ones we were trying to use to get the Dirichlet 
parameters.  We can try again with those and see how they compare.

```{r newfracs}
ipslnewfracs_all <- readRDS('tas_ipsl-cm5a-lr_rcp4p5_monthlyFrac.rds')
assert_that(!any(is.na(ipslnewfracs_all[ , ,goodcells])))
assert_that(all(is.na(ipslnewfracs_all[ , , -goodcells])))
ipslnewfracs <- ipslnewfracs_all[ , , goodcells]
cell8tasnewfrac <- ipslnewfracs[ , , checkcells[8]]
```

Let's see how the mean fraction compares to our old mean, and let's check how much it varies from
year to year.

```{r checknewfracs}
cell8newmeanfrac <- apply(cell8tasnewfrac, 2, mean)
cell8newsdfrac <- apply(cell8tasnewfrac, 2, sd)

cat('old mean fraction:\n')
as.vector(cell8tasfrac)
cat('new mean fraction:\n')
cell8newmeanfrac
cat('\nold monthly sd (Kelvin):\n')
apply(cell8tas, 1, sd)
cat('new monthly sd (converted to Kelvin):\n')
cell8newsdfrac * apply(cell8tas, 1, mean)

matrix(cell8newmeanfrac, ncol=1) %*% cell8annual - 273.15
```

These results are pretty strange.  The monthly pattern in the mean is all messed up.  Interestingly, the
variability in the new monthly temperature values actually seems to be _higher_ than what we were seeing
in the old data.  Apparently, monthly mean temperatures are pretty consistent from year to year, having
a standard deviaiton (for this grid cell) of about 1-4 K, depending on the month.  

Results from applying the mean of the new fractions look more similar to the badly Celsius converted results 
than they do to the correct results, suggesting that there is a bad conversion in here somewhere.


## Tentative Conclusions

The shift in the zero point between Celsius and Kelvin invalidates the monthly fractions.  So, if we 
convert units _before_ we apply (or, equally, compute) the monthly fractions, we will 
get bogus results.  Note that this should _not_ be a problem with `pr` unit conversion because 
that function applies only a multiplicative scale, which will not invalidate the monthly fractions.
Generally, I think we should avoid Celsius conversions in our calculation.  It's easy enough to convert
on the fly if we want to display in Celsius, or if another code requires Celsius values, so it's not
worth the risk of getting screwy results.

That said, there is still something off about the latest round of monthly values.  They don't match up
with the old values at all, and they don't fit the expected seasonal pattern for the grid cell we are
looking at.  

Finally, it looks as if the low year to year variability is real.  If anything, the year to year variability 
in the latest round of calculations seems too high, about 5 times what we are seeing in the raw monthly 
temperatures.  This is a little annoying because the large $\alpha$ values implied by the low variability
seem to take a lot longer to fit in the stan sampler.  (Fitting precipitation distributions, on the other
hand, goes very quickly.)

## Where do we go from here?

There is something very weird in the latest new monthly values.  At this point, unless there's an obvious
explanation for it, I'm not sure it's worth trying to track down what is causing it.  We have just about 
everything we need here to create the input to the Stan calculation directly from the netcdf files of
monthly data, so maybe we get back on track quicker if we just write those data off and start over.

I'm not crazy about the slow Stan fits for temperature. Right now it takes about 10 minutes to do 64
grid cells on 4 processors.  That's about 27 hours per model (I think) on a single PIC node.  We could 
parallelize over more nodes and get it done faster, but it's still not ideal. I think I might be able 
to speed the calculation along by specifying a better prior, but it will require some experimenting, and 
at least initially those experiments could be a little slow.

## Converting to monthly fractions

Normalize the `tas` and `pr` data to produce monthly fractions (instead of averages).

```{r}
mean2frac <- function(a) {
  ## Find sum of months for each year, grid
  yrgridfacs <- 1/apply(a, c(2,3), sum)
  ## Permute the array to put the months in the last dimension
  a <- aperm(a, c(2,3,1))
  ## Multiply by normalization factor.  Vector recycling ensures that each month for a single
  ## year and grid gets multipled by the same constant.
  a <- a * as.vector(yrgridfacs)
  ## Repermute back into the original dimension
  aperm(a, c(3,1,2))
}
```

```{r}
tasfrac <- mean2frac(tasdata)
prfrac <- mean2frac(prdata)
```

```{r}
tasfrac_kd <- readfile('../output-L1/tas_ipsl-cm5a-lr_rcp4p5_monthlyFrac.nc', 'tas')
prfrac_kd <- readfile('../output-L1/pr_ipsl-cm5a-lr_rcp4p5_monthlyFrac.nc', 'pr')
assert_that(!any(is.na(tasfrac_kd[ , ,goodcells])))
assert_that(all(is.na(tasfrac_kd[ , , -goodcells])))
##assert_that(!any(is.na(prfrac_kd[ , ,goodcells])))   # not a good test due to div by zero values.
assert_that(all(is.na(prfrac_kd[ , , -goodcells])))

tasfrac_kd <- tasfrac_kd[ , , goodcells]/12
prfrac_kd <- prfrac_kd[ , , goodcells]/12
```

```{r}
tasfrac[ , 1:8, checkcells[8]]
tasfrac_kd[ , 1:8, checkcells[8]]
t(ipslnewfracs[1:8, , checkcells[8]]) /12
```

So, to summarize, my calculation of monthly fractions and that stored in the L1 data agree; however, the
rds file of the same data is garbled.

## Final Conclusions

The question of Celsius converison appears to be a red herring (though we do need to make sure that it
is not applied in the monthly downscaling until after we have performed the downscaling calculation).  
The fundamental problem appears to be that the monthly fractions were garbled in the translation to 
rds format.  We can easily fix this by reading directly from the netcdf version.

We still need to try to find a good way to speed up the stan fitting for temperature.  We might be able
to fix this with suitable priors, but it will take some experimenting to figure out something that works.
