This script reads in tide buoy and seismometer data from the 2011 Japan earthquake, then filters the tide buoy data to remove the tidal signal so the tsunami signal and delay in arrival time of the tsunami between stations is more visible.
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:cowplot':
##
## stamp
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'signal'
## The following objects are masked from 'package:pracma':
##
## conv, ifft, interp1, pchip, polyval, roots
## The following objects are masked from 'package:stats':
##
## filter, poly
The month column in the dataset is in the format ‘3’ instead of ‘03’ so it needs to be converted.
suppressWarnings({
### Convert 'MM' column to numeric
apia$MM <- as.numeric(apia$MM)
### Convert month column '3' to '03' format
apia$MM <- sprintf("%02d", apia$MM)
### Drop values where height is 9999
apia <- apia[apia$HEIGHT != 9999, , drop = FALSE]
### Create combined date column as POSIXct format
apia$Date <- as.POSIXct(paste(apia$X.YY, apia$MM, apia$DD, apia$hh, apia$mm, apia$ss, sep = " "), format='%Y %m %d %H %M %S')
### Same for itrup
iturup$MM <- as.numeric(iturup$MM)
iturup$MM <- sprintf("%02d", iturup$MM)
iturup <- iturup[iturup$HEIGHT != 9999, ]
iturup$Date <- as.POSIXct(paste(iturup$X.YY, iturup$MM, iturup$DD, iturup$hh, iturup$mm, iturup$ss, sep = " "), format='%Y %m %d %H %M %S')
### And for guadalcanal
guadalcanal$MM <- as.numeric(guadalcanal$MM)
guadalcanal$MM <- sprintf("%02d", guadalcanal$MM)
guadalcanal <- guadalcanal[guadalcanal$HEIGHT != 9999, ]
guadalcanal$Date <- as.POSIXct(paste(guadalcanal$X.YY, guadalcanal$MM, guadalcanal$DD, guadalcanal$hh, guadalcanal$mm, guadalcanal$ss, sep = " "), format='%Y %m %d %H %M %S')
### Normalize height against the average
apia_height_avg <- mean(apia$HEIGHT)
apia$height_norm <- apia$HEIGHT - apia_height_avg
iturup_height_avg <- mean(iturup$HEIGHT)
iturup$height_norm <- iturup$HEIGHT - iturup_height_avg
guadalcanal_height_avg <- mean(guadalcanal$HEIGHT)
guadalcanal$height_norm <- guadalcanal$HEIGHT - guadalcanal_height_avg
})
The data has 3 different sampling frequencies, denoted by values of 1, 2, or 3 in the ‘T’ column of the data. A value of 1 is a 15 minute sampling frequency, 2 is a 1 minute frequency, and 3 is a 15 second frequency. To look at the distribution of the different sampling rates in the data, Apia is split into 3 sets by the value in the ‘T’ column, then plotted.
suppressWarnings({
### Separate data by sampling frequency
apia1 <- subset(apia, T == 1)
apia2 <- subset(apia, T == 2)
apia3 <- subset(apia, T == 3)
### Reset the row names for each DataFrame
rownames(apia1) <- NULL
rownames(apia2) <- NULL
rownames(apia3) <- NULL
### Extract time columns for each DataFrame
time1 <- apia1$Date
time2 <- apia2$Date
time3 <- apia3$Date
apia1$Date <- as.POSIXct(paste(apia1$X.YY, apia1$MM, apia1$DD, apia1$hh, apia1$mm, apia1$ss, sep = " "), format='%Y %m %d %H %M %S')
apia2$Date <- as.POSIXct(paste(apia2$X.YY, apia2$MM, apia2$DD, apia2$hh, apia2$mm, apia2$ss, sep = " "), format='%Y %m %d %H %M %S')
apia3$Date <- as.POSIXct(paste(apia3$X.YY, apia3$MM, apia3$DD, apia3$hh, apia3$mm, apia3$ss, sep = " "), format='%Y %m %d %H %M %S')
### Create three separate plots
plot1 <- ggplot(apia1, aes(x = Date, y = height_norm)) +
geom_point(size = 0.7) +
labs(title = "Apia1 vs Date", x = "Date", y = "Height Norm")+
scale_x_datetime(limits = c(min(apia1$Date), max(apia1$Date)))
plot2 <- ggplot(apia2, aes(x = Date, y = height_norm)) +
geom_point(size = 0.7) +
labs(title = "Apia2 vs Date", x = "Date", y = "Height Norm")+
scale_x_datetime(limits = c(min(apia1$Date), max(apia1$Date)))
plot3 <- ggplot(apia3, aes(x = Date, y = height_norm)) +
geom_point(size = 0.7) +
labs(title = "Apia3 vs Date", x = "Date", y = "Height Norm")+
scale_x_datetime(limits = c(min(apia1$Date), max(apia1$Date)))
# Arrange the plots in a grid layout
grid.arrange(plot1, plot2, plot3, ncol = 1)
})
This plot separates the data by its sampling frequency, and we can see that most of the tsunami signal we want to visualize is in the 1 minute sampling frequency. The tidal signal is the longer term, higher amplitude cyclical signal that resembles a sine wave, and the tsunami signal is the smaller, high-frequency variations that are superimposed on top of it.
Now I want to plot the raw data for each station, and add scatter points that are colored by the sampling frequency.
suppressWarnings({
### Scatter plot for apia
plot1 <- ggplot(data = apia, aes(x = Date, y = height_norm, color = T)) +
geom_line() +
#geom_point(size = 1) +
labs(x = " ", y = " ") +
ggtitle("Apia") +
geom_vline(xintercept = as.numeric(as.POSIXct("2011-03-11 05:46:24")), color = "red")+
theme_minimal()+
ylim(-0.6, 0.6)+
scale_x_datetime(limits = c(as.POSIXct("2011-03-11 3:00:00"), max(apia$Date)))
### Scatter plot for guadalcanal
plot2 <- ggplot(data = guadalcanal, aes(x = Date, y = height_norm, color = T)) +
geom_line() +
#geom_point(size = 1) +
labs(x = " ", y = "Normalized Height") +
ggtitle("Guadalcanal") +
geom_vline(xintercept = as.numeric(as.POSIXct("2011-03-11 05:46:24")), color = "red")+
theme_minimal()+
ylim(-0.6, 0.6)+
scale_x_datetime(limits = c(as.POSIXct("2011-03-11 3:00:00"), max(guadalcanal$Date)))
### Scatter plot for iturup
plot3 <- ggplot(data = iturup, aes(x = Date, y = height_norm, color = T)) +
geom_line() +
#geom_point(size = 1) +
labs(x = " Date ", y = " ") +
ggtitle("Iturup") +
geom_vline(xintercept = as.numeric(as.POSIXct("2011-03-11 05:46:24")), color = "red")+
theme_minimal()+
ylim(-0.6, 0.6)+
scale_x_datetime(limits = c(as.POSIXct("2011-03-11 3:00:00"), max(iturup$Date)))
### Create subplots
grid.arrange(plot3, plot2, plot1, ncol = 1, nrow = 3,
top = textGrob("Subplots", gp = gpar(fontsize = 14)))
})
In the above plot with the raw buoy data, the red vertical line denotes the time of the earthquake. A colorbar value of 3 is a 15 seecond sampling frequendy, a value of 2 is a 1 minute sampling frequency, and a value of 1 is a 15 minute sampling frequency. Most of the data of the tsunami signal is at a 1 minute sampling frequency, which is the medium blue color.
The Fourier Transform allows me to get the frequency content of the data, which is waht I use to decide what parts of the signal I want to filter out. In order to compute the Fourier transform and apply a filter to the data to remove the tidal signal, the data needs to be resampled to a consistent frequency of 15 seconds.
suppressWarnings({
#Now resample time series data to 15s sampling frequency
new_freq = 15 #15 seconds
### Create zoo object
apia_data <- zoo(apia$height_norm, apia$Date)
### Check for duplicate timestamps
duplicated_timestamps <- duplicated(apia$Date)
### Remove duplicates
apia_unique <- apia[!duplicated_timestamps, ]
### Set Date column as the index
apia_zoo <- zoo(apia_unique$height_norm, order.by = apia_unique$Date)
### Resample the time series data
apia_resamp <- na.approx(merge(apia_zoo, zoo(, seq(start(apia_zoo), end(apia_zoo), by = new_freq))), xout = seq(start(apia_zoo), end(apia_zoo), by = new_freq))
### Convert apia_resamp to numeric
apia_resamp <- as.numeric(apia_resamp)
})
This next chunk of code computes the Fourier transform of the signal from each buoy. By looking at the amplitude vs frequency plots, I can see the tidal signal at a lower frequency and higher amplitude, and the tsunami signal appears as the smaller variations that occur above a frequency of 0.003 Hz. This means I want to design a filter that will remove parts of the signal that are at a frequency of lower than 0.003 Hz, and allow higher frequency signals to pass, hence the term ‘high-pass’ filter. Specifically, I am using a Butterworth high-pass filter.
suppressWarnings({
### Compute Fourier transform
fft_apia <- fft(apia_resamp)
fftshift_apia <- fftshift(fft_apia)
### Compute amplitude spectrum
amp_apia <- Mod(fftshift_apia)
### Number of data points
N <- length(amp_apia)
### Sampling frequency
sample_freq <- 4 # 4 samples per minute
### Calculate the time step
dt <- 1 / sample_freq
### Compute the frequency values corresponding to the FFT result
freq <- seq(-sample_freq / 2, sample_freq / 2, length.out = N)
### Plot the frequency amplitude spectrum on a semilogx scale
plot(freq, amp_apia, type = "l", log = 'x', xlab = "Frequency", ylab = "Amplitude", main = 'Apia')
grid(lwd = 1)
#Now apply butterworth filter
### Define the filter parameters
poles <- 4 # Filter order
fc <- 0.003 # Corner frequency in Hz to filter out tides
fs <- 1/15 # Sampling frequency (1 sample every 15 seconds)
### Calculate the normalized corner frequency
fnyquist <- 0.5 * fs
normalized_corner_freq <- fc / fnyquist
### Design the Butterworth highpass filter
b <- butter(poles, normalized_corner_freq, type = "high")
### Filter the data using a two-pass Butterworth highpass filter
apia_filt <- filter(b, filter(b, apia_resamp, sides = 1), sides = 1)
### Now filter the data from the other two buoys
guadalcanal_data <- zoo(guadalcanal$height_norm, guadalcanal$Date)
duplicated_timestamps <- duplicated(guadalcanal$Date)
guadalcanal_unique <- guadalcanal[!duplicated_timestamps, ]
guadalcanal_zoo <- zoo(guadalcanal_unique$height_norm, order.by = guadalcanal_unique$Date)
guadalcanal_resamp <- na.approx(merge(guadalcanal_zoo, zoo(, seq(start(guadalcanal_zoo), end(guadalcanal_zoo), by = new_freq))), xout = seq(start(guadalcanal_zoo), end(guadalcanal_zoo), by = new_freq))
guadalcanal_resamp <- as.numeric(guadalcanal_resamp)
fft_guadalcanal <- fft(guadalcanal_resamp)
fftshift_guadalcanal <- fftshift(fft_guadalcanal)
amp_guadalcanal <- Mod(fftshift_guadalcanal)
N <- length(amp_guadalcanal)
freq <- seq(-sample_freq / 2, sample_freq / 2, length.out = N)
plot(freq, amp_guadalcanal, type = "l", log = 'x', xlab = "Frequency", ylab = "Amplitude", main = 'Guadalcanal')
grid(lwd = 1)
guadalcanal_filt <- filter(b, filter(b, guadalcanal_resamp, sides = 1), sides = 1)
### and Iturup
iturup_data <- zoo(iturup$height_norm, iturup$Date)
duplicated_timestamps <- duplicated(iturup$Date)
iturup_unique <- iturup[!duplicated_timestamps, ]
iturup_zoo <- zoo(iturup_unique$height_norm, order.by = iturup_unique$Date)
iturup_resamp <- na.approx(merge(iturup_zoo, zoo(, seq(start(iturup_zoo), end(iturup_zoo), by = new_freq))), xout = seq(start(iturup_zoo), end(iturup_zoo), by = new_freq))
iturup_resamp <- as.numeric(iturup_resamp)
fft_iturup <- fft(iturup_resamp)
fftshift_iturup <- fftshift(fft_iturup)
amp_iturup <- Mod(fftshift_iturup)
N <- length(amp_iturup)
freq <- seq(-sample_freq / 2, sample_freq / 2, length.out = N)
plot(freq, amp_iturup, type = "l", log = 'x', xlab = "Frequency", ylab = "Amplitude", main = 'Iturup')
grid(lwd = 1)
iturup_filt <- filter(b, filter(b, iturup_resamp, sides = 1), sides = 1)
})
Here are the amplitude vs frequency plots.
Now I want to plot the filtered signals against the original data to see if I filtered out the tidal signal.
suppressWarnings({
### Set up plotting layout
par(mfrow = c(3, 1), mar = c(4, 4, 2, 1))
### Timestamp for earthquake on March 11, 2011 5:46pm UTC
eq_timestamp <- as.POSIXct("2011-03-11 05:46:00", tz = "UTC")
### Plot signals for iturup
plot(iturup_resamp, type = "l", col = "blue", xlab = "Time", ylab = "Amplitude", main = "Iturup")
lines(iturup_filt, col = "red")
legend("topright", legend = c("Original", "Filtered"), col = c("blue", "red"), lty = 1)
grid(lwd = 1)
### Plot signals for guadalcanal
plot(guadalcanal_resamp, type = "l", col = "blue", xlab = "Time", ylab = "Amplitude", main = "Guadalcanal")
lines(guadalcanal_filt, col = "red")
legend("topright", legend = c("Original", "Filtered"), col = c("blue", "red"), lty = 1)
grid(lwd = 1)
### Plot signals for apia
plot(apia_resamp, type = "l", col = "blue", xlab = "Time", ylab = "Amplitude", main = "Apia")
lines(apia_filt, col = "red")
legend("topright", legend = c("Original", "Filtered"), col = c("blue", "red"), lty = 1)
grid(lwd = 1)
})
The tidal signal has been filtered out and the tsunami arrival time
delay can be seen between the stations. Iturup is located closest to the
earthquake, and Guadalcanal and Apia are both in the south Pacific
ocean.
Now I want to visualize the seismic data from to seismic stations in northern and southern Japan.
suppressWarnings({
### Add the seismic data from Erimo (close to eq epicenter)
japaneq <- read.csv("C:/Users/kelsa/OneDrive/Desktop/Documents/GEOG490/japaneq.csv")
### Data has no time values so we need to generate time values for plotting
sample_count <- 42000
sampling_rate_hz <- 20
start_time <- ymd_hms("2011-03-11T05:42:19.029500Z")
### Generate time values in seconds
time_seconds <- seq(0, (sample_count - 1) / sampling_rate_hz, by = 1 / sampling_rate_hz)
### Add generated time values as a column
japaneq$Time <- start_time + seconds(time_seconds)
### Plot acceleration data
plot4 <- ggplot(japaneq, aes(x = Time, y = Z, color = "Z")) +
geom_line() +
labs(title = "Erimo Seismic Data",
x = " ", y = " ") +
scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE),
breaks = scales::extended_breaks(n = 8),
limits = c(-20000000, 20000000)) +
scale_color_manual(values = c("turquoise")) +
theme_bw() +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1))
plot5 <- ggplot(japaneq, aes(x = Time, y = N_S, color = "N/S")) +
geom_line() +
labs(title = "North/South ",
x = " ", y = "Acceleration (m/s/s*10^7)") +
scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE),
breaks = scales::extended_breaks(n = 8),
limits = c(-20000000, 20000000)) +
scale_color_manual(values = c("orange")) +
theme_bw() +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1))
plot6 <- ggplot(japaneq, aes(x = Time, y = E_W, color = "E/W")) +
geom_line() +
labs(title = "East/West ",
x = "Time", y = " ") +
scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE),
breaks = scales::extended_breaks(n = 8),
limits = c(-20000000, 20000000)) +
scale_color_manual(values = c("green")) +
theme_bw() +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1))
### Create subplots
grid.arrange(plot4, plot5, plot6, ncol = 1, nrow = 3,
top = textGrob("Subplots", gp = gpar(fontsize = 14)))
})
suppressWarnings({
matsushiro <- read.csv("C:/Users/kelsa/OneDrive/Desktop/Documents/GEOG490/matsushiro.csv")
sample_count <- 42000
sampling_rate_hz <- 20
start_time <- ymd_hms("2011-03-11T05:42:19.029500Z")
### Generate time values in seconds
time_seconds <- seq(0, (sample_count - 1) / sampling_rate_hz, by = 1 / sampling_rate_hz)
### Add generated time values as a column
matsushiro$Time <- start_time + seconds(time_seconds)
### Plot acceleration data
plot7 <- ggplot(matsushiro, aes(x = Time, y = Z, color = "Z")) +
geom_line() +
labs(title = "Matsuhiro Seismic Data",
x = " ", y = " ") +
scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE),
breaks = scales::extended_breaks(n = 8),
limits = c(-40000000, 40000000)) +
scale_color_manual(values = c("turquoise")) +
theme_bw() +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1))
plot8 <- ggplot(matsushiro, aes(x = Time, y = N_S, color = "N/S")) +
geom_line() +
labs(title = "North/South ",
x = " ", y = "Acceleration (m/s/s*10^7)") +
scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE),
breaks = scales::extended_breaks(n = 8),
limits = c(-40000000, 40000000)) +
scale_color_manual(values = c("orange")) +
theme_bw() +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1))
plot9 <- ggplot(matsushiro, aes(x = Time, y = E_W, color = "E/W")) +
geom_line() +
labs(title = "East/West ",
x = "Time", y = " ") +
scale_y_continuous(labels = function(x) format(x / 1e7, scientific = FALSE),
breaks = scales::extended_breaks(n = 8),
limits = c(-40000000, 40000000)) +
scale_color_manual(values = c("green")) +
theme_bw() +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1))
# Create subplots
grid.arrange(plot7, plot8, plot9, ncol = 1, nrow = 3,
top = textGrob("Subplots", gp = gpar(fontsize = 14)))
})
Now I want to visualize the earthquake epicenter, seismic stations, and buoys on a map.
### buoy coordinates
buoy_points <- data.frame(name = c("Guadalcanal", "Apia", "Iturup"),
lon = c(164.99,176.26, 152.58),
lat = c(5.37, 9.51, 42.62)
)
### EQ coordinates
eq_point <- data.frame(name = "EQ",
lon = 142.8600,
lat = 38.1033
)
### Erimo seismic station coordinates
erimo_point <- data.frame(name = "Erimo",
lat = 42.02,
lon = 143.16
)
### Matsushiro seismic station
matsushiro_point <- data.frame(name = "Matsushiro",
lat = 36.55,
lon = 138.20
)
### Create a map centered around Japan
m <- leaflet() %>%
setView(lng = 140, lat = 35, zoom = 3) %>%
addTiles()
### Add buoys to map
m <- m %>%
addCircleMarkers(data = buoy_points, lng = ~lon, lat = ~lat, color = "red", radius = 5,popup = ~as.character(name))
### Add eq to map with different symbology
m <- addCircleMarkers(m, data = eq_point, lng = ~lon, lat = ~lat, color = "green", radius = 8, popup = ~as.character(name))
### Add seismic stations to map
m <- m %>%
addCircleMarkers(data = erimo_point, lng = ~lon, lat = ~lat, color = "blue", radius = 5, popup = ~as.character(name))
m <- m %>%
addCircleMarkers(data = matsushiro_point, lng = ~lon, lat = ~lat, color = "darkblue", radius = 5, popup = ~as.character(name))
# Display the map
m
The delay in tsunami arrival time is clear across the three stations. Apia has a spike in the buoy data around the time of the earthquake, but do to Apia’ distance from the epicenter I believe this to be unrelated to the earthquake and tsunami because the tsunami waves could not travel quickly enough to reach the buoy so soon after the earthquake. This may be a storm signal or something relevant to Apia’s location among islands in Micronesia, because Guadalcanal is located on the other side of the islands of Majel and Kiribati, and does not have the same signal present. The seismic data shows the earthquake arrival as well as a smaller aftershock towards the of the recording.