Tide Buoy and Seismometer Data from 2011 Japan Earthquake

This script reads in tide buoy and seismometer data from the 2011 Japan earthquake, then filters the tide buoy data to remove the tidal signal so the tsunami signal and delay in arrival time of the tsunami between stations is more visible.

Load Packages

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:cowplot':
## 
##     stamp

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'signal'

## The following objects are masked from 'package:pracma':
## 
##     conv, ifft, interp1, pchip, polyval, roots

## The following objects are masked from 'package:stats':
## 
##     filter, poly

The month column in the dataset is in the format ‘3’ instead of ‘03’ so it needs to be converted.

suppressWarnings({
    ### Convert 'MM' column to numeric
    apia$MM <- as.numeric(apia$MM)
    ### Convert month column '3' to '03' format
    apia$MM <- sprintf("%02d", apia$MM)
    ### Drop values where height is 9999
    apia <- apia[apia$HEIGHT != 9999, , drop = FALSE]
    ### Create combined date column as POSIXct format
    apia$Date <- as.POSIXct(paste(apia$X.YY, apia$MM, apia$DD, apia$hh, apia$mm, apia$ss, sep = " "), format='%Y %m %d %H %M %S')

### Same for itrup
    iturup$MM <- as.numeric(iturup$MM)
    iturup$MM <- sprintf("%02d", iturup$MM)
    iturup <- iturup[iturup$HEIGHT != 9999, ]
    iturup$Date <- as.POSIXct(paste(iturup$X.YY, iturup$MM, iturup$DD, iturup$hh, iturup$mm, iturup$ss, sep = " "), format='%Y %m %d %H %M %S')

### And for guadalcanal
    guadalcanal$MM <- as.numeric(guadalcanal$MM)
    guadalcanal$MM <- sprintf("%02d", guadalcanal$MM)
    guadalcanal <- guadalcanal[guadalcanal$HEIGHT != 9999, ]
    guadalcanal$Date <- as.POSIXct(paste(guadalcanal$X.YY, guadalcanal$MM, guadalcanal$DD, guadalcanal$hh, guadalcanal$mm, guadalcanal$ss, sep = " "), format='%Y %m %d %H %M %S')

### Normalize height against the average
    apia_height_avg <- mean(apia$HEIGHT)
    apia$height_norm <- apia$HEIGHT - apia_height_avg

    iturup_height_avg <- mean(iturup$HEIGHT)
    iturup$height_norm <- iturup$HEIGHT - iturup_height_avg

    guadalcanal_height_avg <- mean(guadalcanal$HEIGHT)
    guadalcanal$height_norm <- guadalcanal$HEIGHT - guadalcanal_height_avg
})

The data has 3 different sampling frequencies, denoted by values of 1, 2, or 3 in the ‘T’ column of the data. A value of 1 is a 15 minute sampling frequency, 2 is a 1 minute frequency, and 3 is a 15 second frequency. To look at the distribution of the different sampling rates in the data, Apia is split into 3 sets by the value in the ‘T’ column, then plotted.

suppressWarnings({
### Separate data by sampling frequency
    apia1 <- subset(apia, T == 1)
    apia2 <- subset(apia, T == 2)
    apia3 <- subset(apia, T == 3)

### Reset the row names for each DataFrame
    rownames(apia1) <- NULL
    rownames(apia2) <- NULL
    rownames(apia3) <- NULL

### Extract time columns for each DataFrame
    time1 <- apia1$Date
    time2 <- apia2$Date
    time3 <- apia3$Date

    apia1$Date <- as.POSIXct(paste(apia1$X.YY, apia1$MM, apia1$DD, apia1$hh, apia1$mm, apia1$ss, sep = " "), format='%Y %m %d %H %M %S')
    apia2$Date <- as.POSIXct(paste(apia2$X.YY, apia2$MM, apia2$DD, apia2$hh, apia2$mm, apia2$ss, sep = " "), format='%Y %m %d %H %M %S')
    apia3$Date <- as.POSIXct(paste(apia3$X.YY, apia3$MM, apia3$DD, apia3$hh, apia3$mm, apia3$ss, sep = " "), format='%Y %m %d %H %M %S')

### Create three separate plots
    plot1 <- ggplot(apia1, aes(x = Date, y = height_norm)) +
    geom_point(size = 0.7) +
    labs(title = "Apia1 vs Date", x = "Date", y = "Height Norm")+
    scale_x_datetime(limits = c(min(apia1$Date), max(apia1$Date)))

    plot2 <- ggplot(apia2, aes(x = Date, y = height_norm)) +
    geom_point(size = 0.7) +
    labs(title = "Apia2 vs Date", x = "Date", y = "Height Norm")+
    scale_x_datetime(limits = c(min(apia1$Date), max(apia1$Date)))

    plot3 <- ggplot(apia3, aes(x = Date, y = height_norm)) +
    geom_point(size = 0.7) +
    labs(title = "Apia3 vs Date", x = "Date", y = "Height Norm")+
    scale_x_datetime(limits = c(min(apia1$Date), max(apia1$Date)))

    # Arrange the plots in a grid layout
    grid.arrange(plot1, plot2, plot3, ncol = 1)
})

This plot separates the data by its sampling frequency, and we can see that most of the tsunami signal we want to visualize is in the 1 minute sampling frequency. The tidal signal is the longer term, higher amplitude cyclical signal that resembles a sine wave, and the tsunami signal is the smaller, high-frequency variations that are superimposed on top of it.

Now I want to plot the raw data for each station, and add scatter points that are colored by the sampling frequency.

suppressWarnings({
### Scatter plot for apia
    plot1 <- ggplot(data = apia, aes(x = Date, y = height_norm, color = T)) +
    geom_line() +
    #geom_point(size = 1) +
    labs(x = " ", y = " ") +
    ggtitle("Apia") +
    geom_vline(xintercept = as.numeric(as.POSIXct("2011-03-11 05:46:24")), color = "red")+
    theme_minimal()+
    ylim(-0.6, 0.6)+
    scale_x_datetime(limits = c(as.POSIXct("2011-03-11 3:00:00"), max(apia$Date)))


### Scatter plot for guadalcanal
    plot2 <- ggplot(data = guadalcanal, aes(x = Date, y = height_norm, color = T)) +
    geom_line() +
    #geom_point(size = 1) +
    labs(x = " ", y = "Normalized Height") +
    ggtitle("Guadalcanal") +
    geom_vline(xintercept = as.numeric(as.POSIXct("2011-03-11 05:46:24")), color = "red")+
    theme_minimal()+
    ylim(-0.6, 0.6)+
    scale_x_datetime(limits = c(as.POSIXct("2011-03-11 3:00:00"), max(guadalcanal$Date)))


### Scatter plot for iturup
    plot3 <- ggplot(data = iturup, aes(x = Date, y = height_norm, color = T)) +
    geom_line() +
    #geom_point(size = 1) +
    labs(x = " Date ", y = " ") +
    ggtitle("Iturup") +
    geom_vline(xintercept = as.numeric(as.POSIXct("2011-03-11 05:46:24")), color = "red")+
    theme_minimal()+
    ylim(-0.6, 0.6)+
    scale_x_datetime(limits = c(as.POSIXct("2011-03-11 3:00:00"), max(iturup$Date)))


### Create subplots
    grid.arrange(plot3, plot2, plot1, ncol = 1, nrow = 3, 
                top = textGrob("Subplots", gp = gpar(fontsize = 14)))
})

In the above plot with the raw buoy data, the red vertical line denotes the time of the earthquake. A colorbar value of 3 is a 15 seecond sampling frequendy, a value of 2 is a 1 minute sampling frequency, and a value of 1 is a 15 minute sampling frequency. Most of the data of the tsunami signal is at a 1 minute sampling frequency, which is the medium blue color.

The Fourier Transform allows me to get the frequency content of the data, which is waht I use to decide what parts of the signal I want to filter out. In order to compute the Fourier transform and apply a filter to the data to remove the tidal signal, the data needs to be resampled to a consistent frequency of 15 seconds.

suppressWarnings({
#Now resample time series data to 15s sampling frequency
    new_freq = 15 #15 seconds

    ### Create zoo object
    apia_data <- zoo(apia$height_norm, apia$Date)

    ### Check for duplicate timestamps
    duplicated_timestamps <- duplicated(apia$Date)

    ### Remove duplicates
    apia_unique <- apia[!duplicated_timestamps, ]
    ### Set Date column as the index
    apia_zoo <- zoo(apia_unique$height_norm, order.by = apia_unique$Date)

    ### Resample the time series data
    apia_resamp <- na.approx(merge(apia_zoo, zoo(, seq(start(apia_zoo), end(apia_zoo), by = new_freq))), xout = seq(start(apia_zoo), end(apia_zoo), by = new_freq))
    ### Convert apia_resamp to numeric 
    apia_resamp <- as.numeric(apia_resamp)
})

This next chunk of code computes the Fourier transform of the signal from each buoy. By looking at the amplitude vs frequency plots, I can see the tidal signal at a lower frequency and higher amplitude, and the tsunami signal appears as the smaller variations that occur above a frequency of 0.003 Hz. This means I want to design a filter that will remove parts of the signal that are at a frequency of lower than 0.003 Hz, and allow higher frequency signals to pass, hence the term ‘high-pass’ filter. Specifically, I am using a Butterworth high-pass filter.

suppressWarnings({
### Compute Fourier transform
    fft_apia <- fft(apia_resamp)
    fftshift_apia <- fftshift(fft_apia)

    ### Compute amplitude spectrum
    amp_apia <- Mod(fftshift_apia)

    ### Number of data points
    N <- length(amp_apia)

    ### Sampling frequency
    sample_freq <- 4  # 4 samples per minute

    ### Calculate the time step
    dt <- 1 / sample_freq

    ### Compute the frequency values corresponding to the FFT result
    freq <- seq(-sample_freq / 2, sample_freq / 2, length.out = N)

    ### Plot the frequency amplitude spectrum on a semilogx scale
    plot(freq, amp_apia, type = "l", log = 'x', xlab = "Frequency", ylab = "Amplitude", main = 'Apia')
    grid(lwd = 1)
    
#Now apply butterworth filter
    ### Define the filter parameters
    poles <- 4  # Filter order
    fc <- 0.003 # Corner frequency in Hz to filter out tides
    fs <- 1/15  # Sampling frequency (1 sample every 15 seconds)

    ### Calculate the normalized corner frequency
    fnyquist <- 0.5 * fs
    normalized_corner_freq <- fc / fnyquist

    ### Design the Butterworth highpass filter
    b <- butter(poles, normalized_corner_freq, type = "high")

    ### Filter the data using a two-pass Butterworth highpass filter
    apia_filt <- filter(b, filter(b, apia_resamp, sides = 1), sides = 1)
    
    ### Now filter the data from the other two buoys

    guadalcanal_data <- zoo(guadalcanal$height_norm, guadalcanal$Date)

    duplicated_timestamps <- duplicated(guadalcanal$Date)

    guadalcanal_unique <- guadalcanal[!duplicated_timestamps, ]

    guadalcanal_zoo <- zoo(guadalcanal_unique$height_norm, order.by = guadalcanal_unique$Date)

    guadalcanal_resamp <- na.approx(merge(guadalcanal_zoo, zoo(, seq(start(guadalcanal_zoo), end(guadalcanal_zoo), by = new_freq))), xout = seq(start(guadalcanal_zoo), end(guadalcanal_zoo), by = new_freq))

    guadalcanal_resamp <- as.numeric(guadalcanal_resamp)

    fft_guadalcanal <- fft(guadalcanal_resamp)
    fftshift_guadalcanal <- fftshift(fft_guadalcanal)

    amp_guadalcanal <- Mod(fftshift_guadalcanal)

    N <- length(amp_guadalcanal)

    freq <- seq(-sample_freq / 2, sample_freq / 2, length.out = N)

    plot(freq, amp_guadalcanal, type = "l", log = 'x', xlab = "Frequency", ylab = "Amplitude", main = 'Guadalcanal')
    grid(lwd = 1)

    guadalcanal_filt <- filter(b, filter(b, guadalcanal_resamp, sides = 1), sides = 1)

### and Iturup

    iturup_data <- zoo(iturup$height_norm, iturup$Date)

    duplicated_timestamps <- duplicated(iturup$Date)

    iturup_unique <- iturup[!duplicated_timestamps, ]

    iturup_zoo <- zoo(iturup_unique$height_norm, order.by = iturup_unique$Date)

    iturup_resamp <- na.approx(merge(iturup_zoo, zoo(, seq(start(iturup_zoo), end(iturup_zoo), by = new_freq))), xout = seq(start(iturup_zoo), end(iturup_zoo), by = new_freq))

    iturup_resamp <- as.numeric(iturup_resamp)

    fft_iturup <- fft(iturup_resamp)
    fftshift_iturup <- fftshift(fft_iturup)

    amp_iturup <- Mod(fftshift_iturup)

    N <- length(amp_iturup)

    freq <- seq(-sample_freq / 2, sample_freq / 2, length.out = N)

    plot(freq, amp_iturup, type = "l", log = 'x', xlab = "Frequency", ylab = "Amplitude", main = 'Iturup')
    grid(lwd = 1)

    iturup_filt <- filter(b, filter(b, iturup_resamp, sides = 1), sides = 1)
})

Here are the amplitude vs frequency plots.

Now plot the filtered signals

Now I want to plot the filtered signals against the original data to see if I filtered out the tidal signal.

suppressWarnings({
### Set up plotting layout
    par(mfrow = c(3, 1), mar = c(4, 4, 2, 1))
    ### Timestamp for earthquake on March 11, 2011 5:46pm UTC
    eq_timestamp <- as.POSIXct("2011-03-11 05:46:00", tz = "UTC")
    ### Plot signals for iturup
    plot(iturup_resamp, type = "l", col = "blue", xlab = "Time", ylab = "Amplitude", main = "Iturup")
    lines(iturup_filt, col = "red")
    legend("topright", legend = c("Original", "Filtered"), col = c("blue", "red"), lty = 1)
    grid(lwd = 1)

    ### Plot signals for guadalcanal
    plot(guadalcanal_resamp, type = "l", col = "blue", xlab = "Time", ylab = "Amplitude", main = "Guadalcanal")
    lines(guadalcanal_filt, col = "red")
    legend("topright", legend = c("Original", "Filtered"), col = c("blue", "red"), lty = 1)
    grid(lwd = 1)

    ### Plot signals for apia
    plot(apia_resamp, type = "l", col = "blue", xlab = "Time", ylab = "Amplitude", main = "Apia")
    lines(apia_filt, col = "red")
    legend("topright", legend = c("Original", "Filtered"), col = c("blue", "red"), lty = 1)
    grid(lwd = 1)
})

The tidal signal has been filtered out and the tsunami arrival time delay can be seen between the stations. Iturup is located closest to the earthquake, and Guadalcanal and Apia are both in the south Pacific ocean.

Now plot seismic stations and buoys on a map

Now I want to visualize the seismic data from to seismic stations in northern and southern Japan.

suppressWarnings({
### Add the seismic data from Erimo (close to eq epicenter)
    japaneq <- read.csv("C:/Users/kelsa/OneDrive/Desktop/Documents/GEOG490/japaneq.csv")

### Data has no time values so we need to generate time values for plotting

    sample_count <- 42000
    sampling_rate_hz <- 20
    start_time <- ymd_hms("2011-03-11T05:42:19.029500Z")

### Generate time values in seconds
    time_seconds <- seq(0, (sample_count - 1) / sampling_rate_hz, by = 1 / sampling_rate_hz)

### Add generated time values as a column
    japaneq$Time <- start_time + seconds(time_seconds)


### Plot acceleration data
    plot4 <- ggplot(japaneq, aes(x = Time, y = Z, color = "Z")) +
    geom_line() +
    labs(title = "Erimo Seismic Data",
        x = " ", y = " ") +
    scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE), 
                        breaks = scales::extended_breaks(n = 8),
                        limits = c(-20000000, 20000000)) + 
    scale_color_manual(values = c("turquoise")) +
    theme_bw() +
    theme(panel.border = element_rect(color = "black", fill = NA, size = 1))

    plot5 <- ggplot(japaneq, aes(x = Time, y = N_S, color = "N/S")) +
    geom_line() +
    labs(title = "North/South ",
        x = " ", y = "Acceleration (m/s/s*10^7)") +
    scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE), 
                        breaks = scales::extended_breaks(n = 8),
                        limits = c(-20000000, 20000000)) + 
    scale_color_manual(values = c("orange")) +
    theme_bw() +
    theme(panel.border = element_rect(color = "black", fill = NA, size = 1))

    plot6 <- ggplot(japaneq, aes(x = Time, y = E_W, color = "E/W")) +
    geom_line() +
    labs(title = "East/West ",
        x = "Time", y = " ") +
    scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE), 
                        breaks = scales::extended_breaks(n = 8),
                        limits = c(-20000000, 20000000)) + 
    scale_color_manual(values = c("green")) +
    theme_bw() +
    theme(panel.border = element_rect(color = "black", fill = NA, size = 1))

    ### Create subplots
    grid.arrange(plot4, plot5, plot6, ncol = 1, nrow = 3, 
                top = textGrob("Subplots", gp = gpar(fontsize = 14)))

})

Add the seismic data from Matsuhiro (SE Japan)

suppressWarnings({
    matsushiro <- read.csv("C:/Users/kelsa/OneDrive/Desktop/Documents/GEOG490/matsushiro.csv")

    sample_count <- 42000
    sampling_rate_hz <- 20
    start_time <- ymd_hms("2011-03-11T05:42:19.029500Z")

### Generate time values in seconds
    time_seconds <- seq(0, (sample_count - 1) / sampling_rate_hz, by = 1 / sampling_rate_hz)

### Add generated time values as a column
    matsushiro$Time <- start_time + seconds(time_seconds)

### Plot acceleration data
    plot7 <- ggplot(matsushiro, aes(x = Time, y = Z, color = "Z")) +
    geom_line() +
    labs(title = "Matsuhiro Seismic Data",
        x = " ", y = " ") +
    scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE), 
                        breaks = scales::extended_breaks(n = 8),
                        limits = c(-40000000, 40000000)) + 
    scale_color_manual(values = c("turquoise")) +
    theme_bw() +
    theme(panel.border = element_rect(color = "black", fill = NA, size = 1))

    plot8 <- ggplot(matsushiro, aes(x = Time, y = N_S, color = "N/S")) +
    geom_line() +
    labs(title = "North/South ",
        x = " ", y = "Acceleration (m/s/s*10^7)") +
    scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE), 
                        breaks = scales::extended_breaks(n = 8),
                        limits = c(-40000000, 40000000)) + 
    scale_color_manual(values = c("orange")) +
    theme_bw() +
    theme(panel.border = element_rect(color = "black", fill = NA, size = 1))

    plot9 <- ggplot(matsushiro, aes(x = Time, y = E_W, color = "E/W")) +
    geom_line() +
    labs(title = "East/West ",
        x = "Time", y = " ") +
    scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE), 
                        breaks = scales::extended_breaks(n = 8),
                        limits = c(-40000000, 40000000)) + 
    scale_color_manual(values = c("green")) +
    theme_bw() +
    theme(panel.border = element_rect(color = "black", fill = NA, size = 1))

    # Create subplots
    grid.arrange(plot7, plot8, plot9, ncol = 1, nrow = 3, 
                top = textGrob("Subplots", gp = gpar(fontsize = 14)))
})

Now add buoy, eq, and seismic stations to a map

Now I want to visualize the earthquake epicenter, seismic stations, and buoys on a map.

    ### buoy coordinates
    buoy_points <- data.frame(name = c("Guadalcanal", "Apia", "Iturup"),
    lon = c(164.99,176.26, 152.58),  
    lat = c(5.37, 9.51, 42.62)      
    )

    ### EQ coordinates
    eq_point <- data.frame(name = "EQ",
    lon = 142.8600,  
    lat = 38.1033    
    )

    ### Erimo seismic station coordinates
    erimo_point <- data.frame(name = "Erimo", 
    lat = 42.02,
    lon = 143.16 
    )

    ### Matsushiro seismic station
    matsushiro_point <- data.frame(name = "Matsushiro",
    lat = 36.55,
    lon = 138.20
    )

    ### Create a map centered around Japan
    m <- leaflet() %>%
    setView(lng = 140, lat = 35, zoom = 3) %>%
    addTiles()

    ### Add buoys to map
    m <- m %>%
    addCircleMarkers(data = buoy_points, lng = ~lon, lat = ~lat, color = "red", radius = 5,popup = ~as.character(name))

    ### Add eq to map with different symbology
    m <- addCircleMarkers(m, data = eq_point, lng = ~lon, lat = ~lat, color = "green", radius = 8, popup = ~as.character(name))

    ### Add seismic stations to map
    m <- m %>%
    addCircleMarkers(data = erimo_point, lng = ~lon, lat = ~lat, color = "blue", radius = 5, popup = ~as.character(name))
    m <- m %>%
    addCircleMarkers(data = matsushiro_point, lng = ~lon, lat = ~lat, color = "darkblue", radius = 5, popup = ~as.character(name))

    # Display the map
    m

Summary

The delay in tsunami arrival time is clear across the three stations. Apia has a spike in the buoy data around the time of the earthquake, but do to Apia’ distance from the epicenter I believe this to be unrelated to the earthquake and tsunami because the tsunami waves could not travel quickly enough to reach the buoy so soon after the earthquake. This may be a storm signal or something relevant to Apia’s location among islands in Micronesia, because Guadalcanal is located on the other side of the islands of Majel and Kiribati, and does not have the same signal present. The seismic data shows the earthquake arrival as well as a smaller aftershock towards the of the recording.