Chapter 1: Basics of Climate Data Arrays and

Their Statistics and Visualization

# R plot Fig. 1.1: A simple line graph of data
# go to your working directory
setwd("/rCode")
# read the data file from the folder named "data"
#install.packages('knitr')
NOAAtemp = read.table(
  "data/aravg.ann.land_ocean.90S.90N.v4.0.1.201907.txt",
  header=FALSE) #Read from the data folder
# check the data matrix dimension

dim(NOAAtemp)
## [1] 140   6
#[1] 140   6 
#140 years from 1880 to 2019
#2019 will be excluded since data only up to July 2019
#col1 is year, col2 is anomalies, col3-6 are data errors
# set the plot margins and the positions of labels
par(mar=c(3.5,3.5,2.5,1), mgp=c(2,0.8,0))
plot(NOAAtemp[1:139,1], NOAAtemp[1:139,2],
     type ="o", col="brown", lwd=3,
     main ="Global Land-Ocean Average Annual Mean 
     Surface Temperature Anomalies: 1880-2018",
     cex.lab=1.2,cex.axis=1.2,
     xlab="Year", 
     ylab=expression(
       paste("Temperature Anomaly [", degree,"C]"))
)

#R plot Fig. 1.2: Staircase chart of data
plot(NOAAtemp[1:139,1], NOAAtemp[1:139,2],
     type="s", #staircase curve for data
     col="black", lwd=2,
     main="Global Land-Ocean Average Annual Mean 
     Surface Temperature Anomalies: 1880-2018",
     cex.lab=1.2,cex.axis=1.2,
     xlab="year",
     ylab=expression(paste(
       "Temperature Anomaly [", degree,"C]"))
)

# R plot Fig. 1.3: A color bar chart of data
x <- NOAAtemp[,1]
y <- NOAAtemp[,2]
z <- rep(-99, length(x))
# compute 5-point moving average
for (i in 3:length(x)-2) z[i] = 
  mean(c(y[i-2],y[i-1],y[i],y[i+1],y[i+2]))
n1 <- which(y>=0); x1 <- x[n1]; y1 <- y[n1]
n2 <- which(y<0); x2 <- x[n2]; y2 <- y[n2]
x3 <- x[2:length(x)-2]
y3 <- z[2:length(x)-2]
plot(x1, y1, type="h", #bars for data
     xlim = c(1880,2016), lwd=3, 
     tck = 0.02,  #tck>0 makes ticks inside the plot
     ylim = c(-0.7,0.7), xlab="Year", col="red",
     ylab = expression(paste(
       "Temperature Anomaly [", degree,"C]")),
     main ="Global Land-Ocean Average Annual Mean 
     Surface Temperature Anomalies: 1880-2018",
     cex.lab = 1.2, cex.axis = 1.2)
lines(x2, y2, type="h",
      lwd = 3, tck = -0.02, col = "blue")
lines(x3, y3, lwd = 2)

#
setwd("/rCode")
#R code for computing statistical indices
NOAAtemp = read.table(
  "data/aravg.ann.land_ocean.90S.90N.v4.0.1.201907.txt",
  header=FALSE)
temp2018=NOAAtemp[1:139,2] #use the temp data up to 2018
head(temp2018) #show the first six values
## [1] -0.370221 -0.319993 -0.320088 -0.396044 -0.458355 -0.470374
#[1] -0.370221 -0.319993 -0.320088 -0.396044 -0.458355 -0.470374
mean(temp2018) #mean
## [1] -0.1858632
#[1] -0.1858632
sd(temp2018) #standard deviation
## [1] 0.324757
#[1] 0.324757
var(temp2018) #variance
## [1] 0.1054671
#[1] 0.1054671
library(e1071) 
#This R library is needed to compute the following parameters
#install.packages("e1071") #if it is not in your computer
skewness(temp2018) 
## [1] 0.7742704
#[1] 0.7742704
kurtosis(temp2018)
## [1] -0.2619131
#[1] -0.2619131
median(temp2018)
## [1] -0.274434
#[1] -0.274434
quantile(temp2018,probs= c(0.05, 0.25, 0.75, 0.95))
##         5%        25%        75%        95% 
## -0.5764861 -0.4119770  0.0155245  0.4132383
#       5%        25%        75%        95% 
#  -0.5764861 -0.4119770  0.0155245  0.4132383 

#
#R plot Fig. 1.4. Histogram nad its fit
par(mar=c(3.5,3.5,2.5,1), mgp=c(2,0.8,0))
h <- hist(NOAAtemp[1:139, 2], 
         main="Histogram of 1880-2018 Temperature Anomalies",
         xlab=expression(paste(
           "Temperature anomalies [", degree, "C]")), 
         xlim=c(-1,1), ylim=c(0,30),
         breaks=10, cex.lab=1.2, cex.axis=1.2) 
xfit <- seq(-1, 1, length=100)
areat <- sum((h$counts)*diff(h$breaks[1:2]))#Normalization area
yfit <- areat*dnorm(xfit, 
                    mean=mean(NOAAtemp[1:139,2]), 
                    sd=sd(NOAAtemp[1:139,2]))
#Plot the normal fit on the histogram
lines(xfit, yfit, col="blue", lwd=3) 

#
# R plot Fig. 1.5: Box plot
boxplot(NOAAtemp[1:139, 2], ylim = c(-0.8, 0.8), 
        ylab=expression(paste(
          "Temperature anomalies [", degree, "C]")),
        width=NULL, cex.lab=1.2, cex.axis=1.2)

#
# R plot Fig. 1.6: Q-Q plot for the standardized 
# global average annual mean temperature anomalies
temp2018 <- NOAAtemp[1:139,2]
tstand <- (temp2018 - mean(temp2018))/sd(temp2018)
set.seed(101)
qn <- rnorm(139) #simulate 139 points by N(0,1)
qns <- sort(qn) # sort the points
qq2 <- qqnorm(qns,col="blue",lwd = 2)

setEPS() #Automatically saves the .eps file
postscript("fig0106.eps", height=7, width=7)
par(mar = c(4.5,5,2.5,1), xaxs = "i", yaxs = "i")
qt = qqnorm(tstand, 
  main = "Q-Q plot for the Standardized Global Average 
  Annual Mean Temperature Anomalies vs N(0,1)", 
   ylab="Quantile of Temperature Anomalies", 
   xlab="Quantile of N(0,1)", 
  xlim=c(-3,3), ylim = c(-3,3),
          cex.lab = 1.3, cex.axis = 1.3)
qqline(tstand, col = "red", lwd=3)
points(qq2$x, qq2$y, pch = 19, 
       col ="purple")
dev.off()
## png 
##   2
setwd("/rCode")
# R plot Fig. 1.7: Data line graph with a linear trend line
par(mar=c(3.5,3.5,2.5,1), mgp=c(2,0.8,0))
plot(NOAAtemp[1:139,1], NOAAtemp[1:139,2],
     type="l", col="brown", lwd=3,
     main=" Global Land-Ocean Average Annual Mean 
Surface Temperature Anomaly: 1880-2018",
     cex.lab=1.2,cex.axis=1.2,
     xlab="Year", 
     ylab=expression(paste(
       "Temperature Anomaly [", degree,"C]"))
)



abline(lm(NOAAtemp[1:139,2] ~ NOAAtemp[1:139,1]),
       lwd=3, col="blue")
lm(NOAAtemp[1:139,2] ~ NOAAtemp[1:139,1])
## 
## Call:
## lm(formula = NOAAtemp[1:139, 2] ~ NOAAtemp[1:139, 1])
## 
## Coefficients:
##        (Intercept)  NOAAtemp[1:139, 1]  
##         -13.872921            0.007023
#       (Intercept)  NOAAtemp[1:139, 1]  
#-13.872921            0.007023 
#Trend 0.7023 degC/100a
text(1930, 0.5, 
     expression(paste("Linear trend: 0.7023",
                      degree,"C/100a")),
     cex = 1.5, col="blue")

#
# Rea read the netCDF data: NOAAGlobalTemp 
#install.packages("ncdf4")
library(ncdf4)
nc = ncdf4::nc_open("data/air.mon.anom.nc")
nc # describes details of the dataset
## File data/air.mon.anom.nc (NC_FORMAT_NETCDF4_CLASSIC):
## 
##      2 variables (excluding dimension variables):
##         double time_bnds[nbnds,time]   (Chunking: [2,1])  (Compression: shuffle,level 2)
##             long_name: Time Boundaries
##         float air[lon,lat,time]   (Chunking: [72,36,1])  (Compression: shuffle,level 2)
##             var_desc: Air Temperature
##             level_desc: Surface
##             statistic: Anomaly
##             parent_stat: Observation
##             valid_range: -40
##              valid_range: 40
##             units: degC
##             missing_value: -9.96920996838687e+36
##             long_name: Surface Air Temperature and SST Monthly Anomaly
##             precision: 2
##             cell_methods: time: anomaly (monthly from values)
##             standard_name: air_temperature_anomaly
##             dataset: NOAA Global Temperature
##             actual_range: -20.1061992645264
##              actual_range: 20.0300006866455
##             date_of_file_acquired: 2019-8-2
## 
##      4 dimensions:
##         time  Size:1674   *** is unlimited *** 
##             units: days since 1800-1-1 00:00:0.0
##             long_name: Time
##             delta_t: 0000-01-00 00:00:00
##             avg_period: 0000-01-00 00:00:00
##             standard_name: time
##             axis: T
##             coordinate_defines: start
##             bounds: time_bnds
##             calendar: gregorian
##             coverage_content_type: coordinate
##             actual_range: 29219
##              actual_range: 80139
##         lat  Size:36 
##             long_name: Latitude
##             units: degrees_north
##             actual_range: -87.5
##              actual_range: 87.5
##             axis: Y
##             standard_name: latitude
##             grids: Uniform grid from -87.5 to 87.5 by 5
##             coordinate_defines: center
##             _CoordinateAxisType: Lat
##         lon  Size:72 
##             long_name: Longitude
##             units: degrees_east
##             actual_range: 2.5
##              actual_range: 357.5
##             axis: X
##             standard_name: longitude
##             grids: Uniform grid from 2.5 to 357.5 by 5
##             coordinate_defines: center
##             _CoordinateAxisType: Lon
##         nbnds  Size:2 (no dimvar)
## 
##     22 global attributes:
##         Conventions: CF-1.0
##         Source: ftp://ftp.ncdc.noaa.gov/pub/data/noaaglobaltemp/operational/
##         dataset_title: NOAA Global Surface Temperature (NOAAGlobalTemp)
##         References: https://www.esrl.noaa.gov/psd/data/gridded/data.noaaglobaltemp.html
##         keywords_vocabulary: Climate and Forecast (CF) Standard Name Table (Version 46, 25 July 2017)
##         keywords: Earth Science > Oceans > Ocean Temperature > Sea Surface Temperature, Earth Science > Atmosphere > Atmospheric Temperature > Surface Temperature > Air Temperature
##         cdm_data_type: Grid
##         dataset_citation_url: https://doi.org/10.25921/9qth-2p70
##         references: Vose, R. S., et al., 2012: NOAAs merged land-ocean surface temperature analysis. Bulletin of the American Meteorological Society, 93, 1677-1685. doi: 10.1175/BAMS-D-11-00241.1. Huang, B., Peter W. Thorne, et. al, 2017: Extended Reconstructed Sea Surface Temperature version 5 (ERSSTv5), Upgrades, validations, and intercomparisons. J. Climate, 30, 8179-8205. doi: 10.1175/JCLI-D-16-0836.1
##         climatology: Climatology is based on 1971-2000 monthly climatology
##         license: These data are available for use without restriction.
##         source: https://www.ncdc.noaa.gov/noaa-merged-land-ocean-global-surface-temperature-analysis-noaaglobaltemp-v5
##         platform: Ships, moored buoys, surface drifting buoys, Argo floats, and weather stations
##         instrument: Conventional thermometers
##         creator_email: Boyin.Huang@noaa.gov, Xungang.Yin@noaa.gov
##         comment: Merged land ocean surface temperature anomalies. Version 5.0.0, blending ERSST V5 and GHCN-M V4
##         source_institution: DOC/NOAA/NESDIS/National Centers for Environmental Information(NCEI)
##         history: Created 08/2019 using new V5  data from NCEI
##         version: V5
##         geospatial_bounds: POLYGON ((2.5 -87.5, 2.5 87.5, 357.5 87.5, 357.5 -87.5, 2.5 -87.5))
##         title: NOAA Merged Land Ocean Global Surface Temperature Analysis (NOAAGlobalTemp)
##         data_modified: 2019-08-02
Lat <- ncvar_get(nc, "lat")
Lat # latitude data
##  [1] -87.5 -82.5 -77.5 -72.5 -67.5 -62.5 -57.5 -52.5 -47.5 -42.5 -37.5 -32.5
## [13] -27.5 -22.5 -17.5 -12.5  -7.5  -2.5   2.5   7.5  12.5  17.5  22.5  27.5
## [25]  32.5  37.5  42.5  47.5  52.5  57.5  62.5  67.5  72.5  77.5  82.5  87.5
#[1] -87.5 -82.5 -77.5 -72.5 -67.5 -62.5
Lon <- ncvar_get(nc, "lon")
Lon # longitude data
##  [1]   2.5   7.5  12.5  17.5  22.5  27.5  32.5  37.5  42.5  47.5  52.5  57.5
## [13]  62.5  67.5  72.5  77.5  82.5  87.5  92.5  97.5 102.5 107.5 112.5 117.5
## [25] 122.5 127.5 132.5 137.5 142.5 147.5 152.5 157.5 162.5 167.5 172.5 177.5
## [37] 182.5 187.5 192.5 197.5 202.5 207.5 212.5 217.5 222.5 227.5 232.5 237.5
## [49] 242.5 247.5 252.5 257.5 262.5 267.5 272.5 277.5 282.5 287.5 292.5 297.5
## [61] 302.5 307.5 312.5 317.5 322.5 327.5 332.5 337.5 342.5 347.5 352.5 357.5
#[1]   2.5   7.5  12.5  17.5  22.5  27.5 
Time<- ncvar_get(nc, "time")
head(Time) # time data in Julian days
## [1] 29219 29250 29279 29310 29340 29371
#[1] 29219 29250 29279 29310 29340 29371
#install.packages("chron")
library(chron) # convert Julian date to calendar date
nc$dim$time$units # .nc base time for conversion
## [1] "days since 1800-1-1 00:00:0.0"
#[1] "days since 1800-1-1 00:00:0.0"
month.day.year(29219,c(month = 1, day = 1, year = 1800))
## $month
## [1] 1
## 
## $day
## [1] 1
## 
## $year
## [1] 1880
#1880-01-01 # the beginning time of the dataset
tail(Time)
## [1] 79988 80019 80047 80078 80108 80139
#[1] 79988 80019 80047 80078 80108 80139
month.day.year(80139,c(month = 1, day = 1, year = 1800))
## $month
## [1] 6
## 
## $day
## [1] 1
## 
## $year
## [1] 2019
#2019-06-01 # the end time of the dataset

# extract anomaly data in (lon, lat, time) coordinates
NOAAgridT <- ncvar_get(nc, "air") 
dim(NOAAgridT) # dimensions of the data array
## [1]   72   36 1674
#[1]  72   36 1674 #5-by-5, 1674 months from Jan 1880-Jun 2019
#
#Plot Fig. 1.8: Dec 2015 global surface temp anomalies map
#install.packages(maps)
library(maps)# requires maps package 
mapmat=NOAAgridT[,,1632]
# Julian date time 1632 corresponds to Dec 2015
mapmat=pmax(pmin(mapmat,6),-6) # put values in [-6, 6]
int=seq(-6, 6, length.out=81)
rgb.palette=colorRampPalette(c('black','blue', 
   'darkgreen', 'green', 'yellow','pink','red','maroon'),
                             interpolate='spline')
par(mar=c(3.5, 4, 2.5, 1), mgp=c(2.3, 0.8, 0))
filled.contour(Lon, Lat, mapmat, 
               color.palette=rgb.palette, levels=int,
  plot.title=title(main="NOAAGlobalTemp Anomalies Dec 2015",
            xlab="Latitude", ylab="Longitude", cex.lab=1.2),
  plot.axes={axis(1, cex.axis=1.2, las=1); 
                          axis(2, cex.axis=1.2, las=2);
                          map('world2', add=TRUE); grid()},
  key.title=title(main=expression(paste("[", degree, "C]"))),
  key.axes={axis(4, cex.axis=1.2)})

#Fig. 1.9 is plotted by Panoply
#
#R plot Fig. 1.10: Hovmoller diagram
library(maps)
mapmat=NOAAgridT[30,12:24,1309:1668]
#Longitude= 240 deg, Lat =[-30 30] deg
#Time=Jan 1989-Dec 2018: 30 years
mapmat=pmax(pmin(mapmat,2),-2) # put values in [-2,2]
par(mar=c(4,5,3,0))
int=seq(-2,2,length.out=81)
rgb.palette=colorRampPalette(c('black','blue', 
 'darkgreen','green', 'yellow','pink','red','maroon'),
                             interpolate='spline')
par(mar=c(3.5,3.5,2.5,1), mgp=c(2.4, 0.8, 0))
x = seq(1989, 2018, len=360)
y = seq(-30, 30, by=5)
filled.contour(x, y, t(mapmat), 
       color.palette=rgb.palette, levels=int,
       plot.title=title(main=
    "Hovmoller diagram of the NOAAGlobalTemp Anomalies",
               xlab="Time",ylab="Latitude", cex.lab=1.2),
      plot.axes={axis(1, cex.axis=1.2); 
                 axis(2, cex.axis=1.2); 
                 map('world2', add=TRUE);grid()},
        key.title=title(main = 
                expression(paste("[", degree, "C]"))),
        key.axes={axis(4, cex.axis=1.2)})

setwd("/rCode")
#
#R read a 4-Dimensional netCDF file
library(ncdf4)
# read GODAS data 1-by-1 deg, 40 levels, Jan-Dec 2015
nc=ncdf4::nc_open("data/godas2015.nc")
nc
## File data/godas2015.nc (NC_FORMAT_CLASSIC):
## 
##      3 variables (excluding dimension variables):
##         int date[time]   
##             units: YYYYMM
##         float timePlot[time]   
##             info: good for plotting time series
##             units: YYYY.(fractional part of year)
##         short pottmp[lon,lat,level,time]   
##             missing_value: 32767
##             dataset: NCEP GODAS
##             var_desc: potential temperature
##             level_desc: Multiple Levels
##             statistic: Monthly Mean
##             parent_stat: Individual Obs
##             _FillValue: 32767
##             sub_center: Environmental Modeling Center
##             center: US National Weather Service - NCEP (WMC)
##             long_name: Potential temperature
##             units: K
##             level_indicator: 160
##             gds_grid_type: 0
##             parameter_table_version: 2
##             parameter_number: 13
##             add_offset: 285
##             scale_factor: 0.00152592500671744
##             unpacked_valid_range: 260
##              unpacked_valid_range: 310
##             actual_range: 269.938995361328
##              actual_range: 306.221008300781
##             valid_range: -16384
##              valid_range: 16384
## 
##      4 dimensions:
##         level  Size:40 
##             positive: down
##             units: m
##             long_name: depth below sea level
##             actual_range: 5
##              actual_range: 4478
##             axis: Z
##         lon  Size:360 
##             units: degrees_east
##             GridType: Cylindrical Equidistant Projection Grid
##             long_name: longitude
##             actual_range: 0.5
##              actual_range: 359.5
##             standard_name: longitude
##             axis: X
##         lat  Size:418 
##             units: degrees_north
##             GridType: Cylindrical Equidistant Projection Grid
##             long_name: latitude
##             actual_range: -74.5
##              actual_range: 64.4990005493164
##             standard_name: latitude
##             axis: Y
##         time  Size:12   *** is unlimited *** 
##             units: days since 1800-01-01 00:00:0.0
##             info: This is the FIRST day of the averaging period.
##             long_name: time
##             actual_range: 78527
##              actual_range: 78861
##             delta_t: 0000-01-00 00:00:00
##             avg_period: 0000-01-00 00:00:00
##             standard_name: time
##             axis: T
## 
##     13 global attributes:
##         creation_date: Mon Dec 23 12:15:32 MST 2013
##         sfcHeatFlux: 
## Note that the net surface heat flux are the total surface heat flux 
## from the NCEP reanalysis 2 plus the relaxation terms.
##         time_comment: The internal time stamp indicates the FIRST day of the averaging period.
##         Conventions: COARDS
##         grib_file: godas.M.2014*
##         html_REFERENCES: http://www.cpc.ncep.noaa.gov/products/GODAS/background.shtml
##         html_BACKGROUND: http://www.cpc.ncep.noaa.gov/products/GODAS/background.shtml
##         html_GODAS: www.cpc.ncep.noaa.gov/products/GODAS
##         comment: NOTE:  THESE ARE THE BIAS CORRECTED GODAS FILES.
##         title: GODAS: Global Ocean Data Assimilation System
##         history: Created 2013/12 by Hoop
##         References: https://www.esrl.noaa.gov/psd/data/gridded/data.godas.html
##         dataset_title: NCEP Global Ocean Data Assimilation System (GODAS)
Lat <- ncvar_get(nc, "lat")
Lon <- ncvar_get(nc, "lon")
Level <- ncvar_get(nc, "level")
Time <- ncvar_get(nc, "time")
head(Time)
## [1] 78527 78558 78586 78617 78647 78678
#[1] 78527 78558 78586 78617 78647 78678
library(chron)
month.day.year(78527,c(month = 1, day = 1, year = 1800))
## $month
## [1] 1
## 
## $day
## [1] 1
## 
## $year
## [1] 2015
# 2015-01-01
# potential temperature pottmp[lon, lat, level, time] 
godasT <- ncvar_get(nc, "pottmp")
dim(godasT)
## [1] 360 418  40  12
#[1] 360 418  40  12, 
#i.e., 360 lon, 418 lat, 40 levels, 12 months=2015
t(godasT[246:250, 209:210, 2, 12]) 
##          [,1]     [,2]     [,3]     [,4]     [,5]
## [1,] 300.0655 299.9831 299.8793 299.7771 299.6641
## [2,] 300.1845 300.1006 299.9998 299.9007 299.8045
#Dec level 2 (15-meter depth) water temperature [K] of
#a few grid boxes over the eastern tropical Pacific
#        [,1]     [,2]     [,3]     [,4]     [,5]
#[1,] 300.0655 299.9831 299.8793 299.7771 299.6641
#[2,] 300.1845 300.1006 299.9998 299.9007 299.8045
#
# R plot Fig. 1.11: The ocean potential temperature
# the 20th layer from surface: 195 meters depth
# compute 2015 annual mean temperature at 20th layer
library(maps)
climmat=matrix(0,nrow=360,ncol=418)
sdmat=matrix(0,nrow=360,ncol=418)
Jmon<-1:12
for (i in 1:360){
  for (j in 1:418){
    climmat[i,j] = mean(godasT[i,j,20,Jmon]); 
    sdmat[i,j]=sd(godasT[i,j,20,Jmon]) 
                  }
                }
int=seq(273,298,length.out=81)
rgb.palette=colorRampPalette(c('black','blue',
    'darkgreen','green', 'white','yellow',
    'pink','red','maroon'), interpolate='spline')
  
setEPS() # save the .eps figure file 
postscript("fig0111.eps", height = 5, width = 10)
par(mar=c(3.5, 3.5, 2.5, 0), mgp=c(2, 0.8, 0))
filled.contour(Lon, Lat, climmat, 
     color.palette=rgb.palette, levels=int,
    plot.title=title(main=
"GODAS 2015 Annual Mean Temperature at 195 [m] Depth Level",
            xlab="Longitude",ylab="Latitude",
            cex.lab=1.3, cex.axis=1.3),
plot.axes={axis(1); axis(2); map('world2', add=TRUE);grid()},
            key.title=title(main="[K]"))
dev.off()
## png 
##   2