WORKSHOP 2

Install.packages(“devtools”) # if devtools isn’t already installed

devtools::install_github("troyhill/VulnToolkit")
## Skipping install of 'VulnToolkit' from a github remote, the SHA1 (0387170d) has not changed since last install.
##   Use `force = TRUE` to force installation

devtools::install_github(“troyhill/VulnToolkit”)

library(VulnToolkit)
library(maps)

Clear variables and any open plots before starting

graphics.off()
rm(list=ls())

Set your working directory

setwd("D:/Documentos D/MSc Climate Change/GY667 - The Ocean and Climate Change/WS2") 

Task 1: load your data. Hint: read.csv. Look at the syntax from last week.

Some of you have OPW data and some of you have MI data. The format of each is different. from WS1, read.csv, with options skip, header

port_oriel=read.csv(file.path("D:/Documentos D/MSc Climate Change/GY667 - The Ocean and Climate Change/WS2/port_oriel.csv"),skip=7,header=TRUE)
summary(port_oriel)
##                Date            Value          Quality    
##  01/01/2008 00:00:     1   Min.   :-3.36700   ! :  4646  
##  01/01/2008 00:15:     1   1st Qu.:-1.19800   * :169744  
##  01/01/2008 00:30:     1   Median : 0.00600   31:106647  
##  01/01/2008 00:45:     1   Mean   :-0.04325   B :  6489  
##  01/01/2008 01:00:     1   3rd Qu.: 1.12700              
##  01/01/2008 01:15:     1   Max.   : 3.33800              
##  (Other)         :287520

how would you look at the file? how do you know its structure? outside of R?

When reading the file, a data set with 287526 observations of 3 variables appear.

Rename some of these variables:

names(port_oriel)<-c("time","h","flag")

Pull out some of the variables. you don’t need repeats of lat, lon, name

port_oriel.lat <- port_oriel$lat[1]
port_oriel.lon <- port_oriel$lon[1]

Task 2: Plot the location of your tide gauge:

map("world",c("ireland","uk"),fill=TRUE,xlim=c(-12,-4),ylim=c(51,56))
map.axes(cex.axis=1)
title(main="Location of Port Oriel Tide Gauge",xlab="Longitude",ylab="Latitude")
points(-6.221441,53.798042,pch=21,col="gray",bg="red")
text(-6.221441,53.798042,"port_oriel",col="gray")

As the Port Oriel database has no values of long and lat, these information was obtained at the Marine Institue website: https://erddap.marine.ie/erddap/tabledap/IrishNationalTideGaugeNetwork.subset?station_id,longitude,latitude&station_id=%22OPW%20Port%20Oriel%22&.viewDistinctMap=true&.viewDistinctData=1000&.viewRelatedData=0&distinct()&.last=station_id

To insert the long and lat manually, the R code “port_oriel.long” and “port_oriel.lat” was exchanged for the direct coordinates.

Task 3: Convert your data to POSIXlt date format.

rtime<-as.POSIXlt(port_oriel$time,format="%d/%m/%Y%H:%M",tz='UTC')

dates and times are always hard. look at the code you had last week: look at the data, what format are they in? Refer to the Workshop guidelines. head(dublin$time) # or equivalent [1] 2007-03-20T06:55:00Z 2007-03-20T07:00:00Z 2007-03-20T07:05:00Z [4] 2007-03-20T07:10:00Z

what does this format look like? YYYY-mm-ddT

The Port Oriel database has the format d/m/Y (in which d = day as a number, m = month, Y = 4-digit year)

make a new data frame with variables year, month, day, hour, minute, second,

sea level (h), and flag

port_oriel_dataframe <- data.frame("time"=rtime,
                  "year"=rtime$year+1900,
                  "month"=rtime$mon+1,
                  "day"=rtime$mday,
                  "hour"=rtime$hour+1,
                  "min"=rtime$min,
                  "h"=port_oriel$h,
                  "flag"=port_oriel$flag)

Make an hourly data frame

Before aggregating by hour, chop off un even bits at start and end

head(port_oriel_dataframe) # tells us to get rid of first variable, =4
##                  time year month day hour min     h flag
## 1 2007-04-17 09:15:00 2007     4  17   10  15 1.635    *
## 2 2007-04-17 09:30:00 2007     4  17   10  30 1.720    *
## 3 2007-04-17 09:45:00 2007     4  17   10  45 1.937    *
## 4 2007-04-17 10:00:00 2007     4  17   11   0 1.998    *
## 5 2007-04-17 10:15:00 2007     4  17   11  15 2.125    *
## 6 2007-04-17 10:30:00 2007     4  17   11  30 2.153    *
tail(port_oriel_dataframe,n=11) # tells us to get rid of length(dub[,1]) - 9, =287525
##                       time year month day hour min      h flag
## 287516 2019-01-31 21:30:00 2019     1  31   22  30  1.492    *
## 287517 2019-01-31 21:45:00 2019     1  31   22  45  1.393    *
## 287518 2019-01-31 22:00:00 2019     1  31   23   0  1.284    *
## 287519 2019-01-31 22:15:00 2019     1  31   23  15  1.142    *
## 287520 2019-01-31 22:30:00 2019     1  31   23  30  0.958    *
## 287521 2019-01-31 22:45:00 2019     1  31   23  45  0.771    *
## 287522 2019-01-31 23:00:00 2019     1  31   24   0  0.556    *
## 287523 2019-01-31 23:15:00 2019     1  31   24  15  0.363    *
## 287524 2019-01-31 23:30:00 2019     1  31   24  30  0.172    *
## 287525 2019-01-31 23:45:00 2019     1  31   24  45  0.172    *
## 287526 2019-02-01 00:00:00 2019     2   1    1   0 -0.165    *
port_oriel_sub<-port_oriel_dataframe[4:287525,]
port_oriel.hour <- aggregate(h~hour+day+month+year,port_oriel_sub,mean)

Create a new empty column for a decimal year

port_oriel.hour <- cbind(port_oriel.hour, NA)
names(port_oriel.hour) <- c("hour","day","month", "year", "h", "time")

Add a decimal year variable

port_oriel.hour$time <- as.POSIXlt(sprintf("%s/%s/%s %s", 
                                    port_oriel.hour$year, port_oriel.hour$month, 
                                    port_oriel.hour$day, port_oriel.hour$hour),
                            format="%Y/%m/%d %H",tz='UTC')
summary(port_oriel.hour)
##       hour            day            month             year     
##  Min.   : 1.00   Min.   : 1.00   Min.   : 1.000   Min.   :2007  
##  1st Qu.: 7.00   1st Qu.: 8.00   1st Qu.: 4.000   1st Qu.:2011  
##  Median :13.00   Median :16.00   Median : 7.000   Median :2014  
##  Mean   :12.51   Mean   :15.79   Mean   : 6.537   Mean   :2014  
##  3rd Qu.:18.00   3rd Qu.:23.00   3rd Qu.: 9.000   3rd Qu.:2017  
##  Max.   :24.00   Max.   :31.00   Max.   :12.000   Max.   :2019  
##        h                 time                    
##  Min.   :-3.29725   Min.   :2007-04-17 11:00:00  
##  1st Qu.:-1.18681   1st Qu.:2011-07-24 17:45:00  
##  Median : 0.00225   Median :2014-05-15 11:30:00  
##  Mean   :-0.04323   Mean   :2014-02-15 23:40:12  
##  3rd Qu.: 1.11500   3rd Qu.:2017-01-12 11:15:00  
##  Max.   : 3.23200   Max.   :2019-02-01 00:00:00

Before aggregating by month, chop off un even bits at start and end

head(port_oriel.hour) # tells us to get rid of first variable, n=1308
##   hour day month year        h                time
## 1   11  17     4 2007  2.11650 2007-04-17 11:00:00
## 2   12  17     4 2007  2.10000 2007-04-17 12:00:00
## 3   13  17     4 2007  1.52700 2007-04-17 13:00:00
## 4   14  17     4 2007  0.44775 2007-04-17 14:00:00
## 5   15  17     4 2007 -0.86150 2007-04-17 15:00:00
## 6   16  17     4 2007 -2.02625 2007-04-17 16:00:00
tail(port_oriel.hour,n=11) # tells us to get rid of length(dub[,1]) - 9, 287525
##       hour day month year        h                time
## 71946   14  31     1 2019 -1.06825 2019-01-31 14:00:00
## 71947   15  31     1 2019 -1.18100 2019-01-31 15:00:00
## 71948   16  31     1 2019 -0.95650 2019-01-31 16:00:00
## 71949   17  31     1 2019 -0.47850 2019-01-31 17:00:00
## 71950   18  31     1 2019  0.14800 2019-01-31 18:00:00
## 71951   19  31     1 2019  0.74475 2019-01-31 19:00:00
## 71952   20  31     1 2019  1.28000 2019-01-31 20:00:00
## 71953   21  31     1 2019  1.57800 2019-01-31 21:00:00
## 71954   22  31     1 2019  1.52525 2019-01-31 22:00:00
## 71955   23  31     1 2019  1.03875 2019-01-31 23:00:00
## 71956   24  31     1 2019  0.31575 2019-02-01 00:00:00
port_oriel.month<-port_oriel.hour[1308:287525,]

Starting at line 1308 (1 May 2007) and finishing at line 287525 (31 Jan 2019)

Much mean sea level analyses use monthly values

Task 4: create monthly averages.

Repeat the previous two steps (subsetting i.e. chop the data so you average full months-you will need to use which here, Aggregating, add time back in) to create monthly means.

Create a new empty column for time

port_oriel.month <- cbind(port_oriel.month, NA)
names(port_oriel.month) <- c("hour", "day", "month", "year", "h", "time")

port_oriel.month$time <- as.POSIXlt(sprintf("%s/%s/%s %s", port_oriel.month$year, port_oriel.month$month, port_oriel.month$day, port_oriel.month$h), format="%Y/%m/%d %H",tz='UTC')
port_oriel.month <- aggregate(h~month+year,port_oriel.month,mean)
port_oriel.month <- cbind(port_oriel.month, NA)
names(port_oriel.month) <- c("month", "year", "h", "time")

Add a POSIXlt variable back in

port_oriel.month$time <- as.POSIXlt(sprintf("%s/%s/15", port_oriel.month$year, port_oriel.month$month),tz="UTC")

Task 5: Plot the resultant data, add legends etc.

plot(port_oriel.month)

plot(port_oriel.month$time,port_oriel.month$h,
     type = "b",
     col = "red",
     lwd = 1,
     pch=8,
     xlab = "Year",
     ylab = "Sea level [m OD Malin]",
     main= "Port Oriel Monthly")

Task 6. Save your data. Hint: write.csv

write.csv(port_oriel.month,file=“port_oriel.month.csv”)