Welcome to the exercises of module 2! Please try to solve the following exercises, and if you do not remember how to write the code for certain things, have a look again at the previous scripts. Programming is about learning by doing! So, do not hesitate to look for help in the internet, too. Remember that there is almost always more than just one way to solve a problem in R!

Part A: Weather station measurements

Exercise 01 (5 points)

Import the file of the weather measurements that corresponds to the station assigned to your group from the folder “Weather_by_station”. You can find the list of stations for each group in the file “M02_Overview.pptx”. Process the station measurements according to the workflow in script 01, lines 83 to 115.

library(reshape2)
library(lubridate)
library(dplyr)
library(Kendall)
library(ggplot2)

dir <- "C:/Users/Max/Desktop/IAMO_neu/eLearning/Module_2_Climate_trends/solutions/"
sisian <- read.csv(paste0(dir, "sisian.csv"), sep=";")
paras  <- read.csv(paste0(dir, "parameters.csv"), sep=";")

### substitute hyphens by NA
sisian[sisian == " -"] <- NA
sisian[sisian == "-"]  <- NA

# class(sisian[,4])  ## the data is not formatted as numeric!

### transform data entries to numbers
sisian[, 4:34] = sapply(sisian[, 4:34], as.numeric)  
### transform column names to numbers
colnames(sisian)[4:34] <- gsub("day", "", names(sisian))[4:34]
  
### melt dataframe
sisian <- melt(sisian, id.vars=c("station", "parameter", "year_month"))

### create a column for year and month
sisian$YEAR  <- as.numeric(substr(sisian$year_month, 1, 4))
sisian$MONTH <- as.numeric(substr(sisian$year_month, 6, 7))
  
### join "sisian" with "paras" 
sisian <- merge(sisian, paras, by="parameter")
  
### rename, reorder and delete some columns
names(sisian) <- c("PARA_CODE", "STATION", "YEAR_MONTH", "DAY", "VALUE", "YEAR", "MONTH", "PARA_LONG", "PARA_SHORT")
sisian     <- sisian[,c(2,9,8,6,7,4,5)]
sisian$DAY <- as.numeric(sisian$DAY)

### create a date-column and delete all entries with non-existing dates (e.g. February 30th, etc.)
sisian$DATE = as.Date(paste(sisian$YEAR, sisian$MONTH, sisian$DAY, sep = "-"))
sisian = sisian[!is.na(sisian$DATE), ]

sisian$DOY <- yday(sisian$DATE)

head(sisian)
STATION PARA_SHORT PARA_LONG YEAR MONTH DAY VALUE DATE DOY
Sisian TMAX Maximum_Temperature 1995 1 1 2.1 1995-01-01 1
Sisian TMAX Maximum_Temperature 1995 2 1 6.4 1995-02-01 32
Sisian TMAX Maximum_Temperature 1995 3 1 8.8 1995-03-01 60
Sisian TMAX Maximum_Temperature 1995 4 1 16.3 1995-04-01 91
Sisian TMAX Maximum_Temperature 1995 5 1 19.5 1995-05-01 121
Sisian TMAX Maximum_Temperature 1995 6 1 21.8 1995-06-01 152

Exercise 02 (5 points)

Plot the mean and standard deviation of snow cover measured at your station during each day of the year (1-366), across all years for which data is available. Export your plot as PNG.

sisian_snow <- filter(sisian, PARA_SHORT == "SNOW")
sisian_snow_mean <- sisian_snow %>% group_by(DOY) %>%  summarize(mean  = mean(na.omit(VALUE)), sd = sd(na.omit(VALUE))) %>% as.data.frame()

plot2 <- ggplot(data=sisian_snow_mean, aes(x=DOY, y=mean)) +
    geom_line() + 
    geom_ribbon(aes(ymin = mean-sd, ymax = mean+sd), alpha=0.2) +
    labs(x="Day of the year", y="snow depth in mm") +
    ggtitle("Average Snow depth at Sisian station, mean 1995-2020") +
    theme(text=element_text(size=16))

# png(paste0(dir, "plot2.png"), width=600, height=400)
#   plot2
# dev.off()

plot2

Exercise 03 (15 points)

For each year available, calculate the average of the daily maximum temperature measurements in May. Plot these averages as points connected by a line, and add to the plot the regression line of a linear model with “average daily maximum temperature in May” as dependent and “year” as explanatory variable. Export your plot as PNG. Report the in- or decrease in average daily maximum temperature in May per decade and whether the trend is significant according to the Mann-Kendall test.

sisian_tmax         <- filter(sisian, PARA_SHORT == "TMAX")
sisian_tmax_monthly <- sisian_tmax %>% group_by(YEAR, MONTH) %>%  summarize(mean  = mean(na.omit(VALUE))) %>% as.data.frame()
## `summarise()` has grouped output by 'YEAR'. You can override using the `.groups` argument.

sisian_tmax_May <- filter(sisian_tmax_monthly, MONTH == 5)

## run linear model
model1 <- lm(sisian_tmax_May$mean ~ sisian_tmax_May$YEAR)

plot3 <- ggplot(data=sisian_tmax_May, aes(x=YEAR, y=mean)) +
    geom_line() + 
    geom_point(size=2) +
    theme(axis.text.x=element_text(angle=45, hjust=1),
          text=element_text(size=16)) +
    ggtitle("Average maximum temperature in May, at Sisian station") + 
    ylab("°C") + xlab("") +
    geom_abline(aes(intercept = model1$coefficients[1], slope = model1$coefficients[2]))

# png(paste0(dir, "plot3.png"), width=600, height=400)
#   plot3
# dev.off()

plot3


## Increase per decade:
model1$coefficients[2]*10
## sisian_tmax_May$YEAR 
##            0.2681665
## increase in May maximum temperatures is 0.268 degrees Celsius per decade!

## BUT there is no significant trend!
teststat <- MannKendall(sisian_tmax_May$mean)
teststat[2]$sl[1]<0.05
## [1] FALSE

Exercise 04 (15 points)

For all parameters available at your station, and for all months, calculate whether there has been a significant in- or decrease in monthly sums (precipitation and sunshine duration) or monthly means (all other parameters) for the entire period that is covered by your dataset. Visualize your results in a bubble plot (see last figure in script 01) and export it as PNG.

## define vector with all parameter names 
paras   <- unique(sisian$PARA_SHORT)

## create empty results object
results <- NULL

## iterate over each parameter
for (p in 1:length(paras))  
{ ## create temporary, empty results dataframe to keep track of the results of the Mann-Kendall tests
  results_temp <- data.frame(parameter = paras[p], month = c(1:12), significant = NA) 
  
  ## iterate over each month
  for (m in 1:12)           
  { ## create data selection based on parameter p and month m
    sisian_sel  <- filter(sisian, PARA_SHORT == paras[p], MONTH == m)  
    
    ## check whether there is actually data for the combination of parameter and month 
    ## (otherwise, there will be an error in some iterations)
    if (dim(sisian_sel)[1] > 0 ) { 
      
      ## for each month, calculate the SUM of daily values for all parameters related to precipitation, snow cover and sunshine duration
      if (paras[p] %in% c("PRCP", "SUNSHINE")) {
      sisian_sel_comb <- sisian_sel %>% group_by(YEAR, MONTH) %>% summarize(comb  = sum(na.omit(VALUE))) %>% as.data.frame() 
      
      ## for each month, calculate the MEAN of daily values for all parameters related to humidity, temperature, vapour pressure and wind
      } else { sisian_sel_comb <- sisian_sel %>% group_by(YEAR, MONTH) %>% summarize(comb  = mean(na.omit(VALUE))) %>% as.data.frame() }
  
      ## check whether there are at least 3 years with data
      ## (otherwise, there will be an error in some iterations)
      if (dim(sisian_sel_comb)[1] > 3 ) { 
      
        ## calculate Mann Kendall statistics
        teststat <- MannKendall(sisian_sel_comb$comb)  

        ## write result of Mann Kendall test into "results_temp"
        if (teststat[2]$sl[1] < 0.05) { results_temp$significant[m] <- 1 }
        else if (teststat[2]$sl[1] >= 0.05) { results_temp$significant[m] <- 0 }
      }
    }
  }
## append "results_temp" to "results"
results <- rbind(results, results_temp) 
}

plot4 <- ggplot(data=results, aes(x=month, y=parameter, color=factor(significant))) +
    geom_point(size=5) +
    ggtitle("Trends from 1995 to 2020 at Sisian station") + 
    scale_x_continuous(breaks=c(1:12), labels = c("JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC")) +
    scale_color_manual(labels = c("no significant trend", "significant trend", "NA"), 
                       values=c("firebrick2", "chartreuse3", "lightgrey"), 
                       name="") +
    ylab("") + xlab("") + theme(text=element_text(size=16))

# png(paste0(dir, "plot4.png"), width=800, height=800)
#   plot4
# dev.off()

plot4

Part B: CHIRPS

Exercise 05 (30 points)

Download all available CHIRPS layers from 1981 to 2020 for the month assigned to your group to the folder “download”. You can find the list of months for each group in the file “M02_Overview.pptx”. In each raster, change the value “-9999” to “NA”. Use the functions crop() and mask() to reduce the data to the extent of Armenia. For each year, calculate the sum of the daily precipitation measurements during the month assigned to your group. Save the resulting 40 rasters of monthly sums as TIFFs. Plot the 40 monthly sum rasters as maps and save your plot(s) as PNG.

## please refer to script M02_Script02_CHIRPS.Rmd for different options of downloading CHIRPS data.
## The procedure of cropping, masking and calculating monthly sum rasters is described in that script in chapter 3, Step A.

Exercise 06 (15 points)

Import all 40 monthly precipitation sum rasters as a raster stack. Use the function exact_extract() to calculate marz-level averages of the monthly precipitation sums for all 40 years. Add the result to the attribute table of the marzes shapefile. Use spplot() to create a multipart map of the marz-level monthly precipitation for the years 1990, 2000, 2010 and 2020.

library(raster)
library(exactextractr)
library(sp)
library(rgdal)

monthly_sums <- stack(list.files(paste0(dir, "monthly_sums"), full.names=T))

marzes <- readOGR(paste0(dir, "shapefiles/ARM_marzes_short_simplified_500.shp"), verbose=F)
marzes_new <- spTransform(marzes, proj4string(monthly_sums))

zonalstats <- exact_extract(x=monthly_sums, y=marzes_new, fun="mean", progress = F)

marzes_new@data[,2:41] <- zonalstats
names(marzes_new@data)[2:41] <- paste0("year_", c(1981:2020))

spplot(marzes_new, c("year_2010", "year_2020", "year_1990", "year_2000"), col.regions= rev(terrain.colors(100)), at=c(1:100), main = "January total precipitation")

Exercise 07 (10 points)

Use ggplot() and facet_wrap() to create line plots that show the yearly precipitation sums during the selected month, from 1981 to 2020. Each facet should represent one marz. Add a regression line to each plot.

dat <- melt(marzes_new@data, id.vars = "div_short")
names(dat) <- c("marz", "year", "precip")
dat$year <- as.numeric(substring(dat$year, 6,9))

ggplot(data=dat, aes(x=year, y=precip)) +
    geom_line(size=1) + 
    geom_point(size=2) +
    labs(x="", y="precipitation (in mm)") +
    ggtitle("January precipitation") +
    facet_wrap(~marz, scales="free") +
    geom_smooth(method="lm", se=F)
## `geom_smooth()` using formula 'y ~ x'

Exercise 08 (5 points)

In a table, report the significance of the Mann-Kendall test for the precipitation time series of each marz.

results <- data.frame(marz = marzes_new@data[,1], result = NA)

for (i in 1:NROW(results))
{ teststat <- MannKendall(zonalstats[i,])  
  if (teststat[2]$sl[1] < 0.05) { results$result[i] <- "significant trend" }
  else if (teststat[2]$sl[1] >= 0.05) { results$result[i] <- "no significant trend" }
}

results
marz result
Syunik no significant trend
Vayotz Dzor no significant trend
Ararat no significant trend
Armavir significant trend
Gegharkunik significant trend
Yerevan significant trend
Aragatsotn significant trend
Kotayk significant trend
Shirak significant trend
Tavush significant trend
Lori significant trend