Here i load all the needed library
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(corrplot)
## corrplot 0.92 loaded
library(scatterplot3d)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.6 ✓ purrr 0.3.4
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(patchwork)
df <- read.csv("data.csv")
head(df)
## time latitude longitude depth mag magType nst gap dmin
## 1 2021-10-18T22:21:32.032Z 10.1039 41.5128 10.00 4.5 mb NA 121 1.930
## 2 2021-10-15T17:42:25.708Z 9.9933 41.6294 13.03 4.5 mb NA 90 1.940
## 3 2021-07-28T19:31:38.767Z 12.7208 48.2020 10.00 4.6 mb NA 144 5.369
## 4 2021-06-18T11:29:05.834Z 12.4004 47.3260 10.00 4.5 mb NA 94 4.467
## 5 2021-05-15T06:13:34.513Z 9.1826 39.6144 10.00 4.0 mb NA 157 0.966
## 6 2021-04-23T04:54:47.872Z 9.4032 42.0914 10.00 4.2 mb NA 223 2.240
## rms net id updated place
## 1 0.74 us us6000fwqs 2021-10-26T02:05:53.040Z 68 km NW of Dire Dawa, Ethiopia
## 2 0.69 us us6000fuzi 2021-10-24T16:24:00.706Z 51 km NNW of Dire Dawa, Ethiopia
## 3 0.60 us us6000f1xn 2021-10-03T05:24:26.040Z 172 km N of Las Khorey, Somalia
## 4 0.41 us us7000ee3f 2021-08-27T18:57:16.040Z 166 km NW of Las Khorey, Somalia
## 5 0.19 us us7000e4rg 2021-09-12T13:26:16.038Z 45 km NW of Metah?ra, Ethiopia
## 6 0.76 us us7000dyjw 2021-07-07T18:33:47.040Z 10 km NNW of Harar, Ethiopia
## type horizontalError depthError magError magNst status locationSource
## 1 earthquake 9.9 2.0 0.192 8 reviewed us
## 2 earthquake 8.9 4.8 0.082 47 reviewed us
## 3 earthquake 10.0 1.9 0.314 3 reviewed us
## 4 earthquake 8.6 1.9 0.075 52 reviewed us
## 5 earthquake 8.5 2.0 0.259 5 reviewed us
## 6 earthquake 8.2 1.9 0.198 7 reviewed us
## magSource
## 1 us
## 2 us
## 3 us
## 4 us
## 5 us
## 6 us
str(df)
## 'data.frame': 981 obs. of 22 variables:
## $ time : chr "2021-10-18T22:21:32.032Z" "2021-10-15T17:42:25.708Z" "2021-07-28T19:31:38.767Z" "2021-06-18T11:29:05.834Z" ...
## $ latitude : num 10.1 9.99 12.72 12.4 9.18 ...
## $ longitude : num 41.5 41.6 48.2 47.3 39.6 ...
## $ depth : num 10 13 10 10 10 ...
## $ mag : num 4.5 4.5 4.6 4.5 4 4.2 4.6 4.3 4.4 4.6 ...
## $ magType : chr "mb" "mb" "mb" "mb" ...
## $ nst : int NA NA NA NA NA NA NA NA NA NA ...
## $ gap : num 121 90 144 94 157 223 43 148 168 40 ...
## $ dmin : num 1.93 1.94 5.369 4.467 0.966 ...
## $ rms : num 0.74 0.69 0.6 0.41 0.19 0.76 0.68 0.26 0.48 1.08 ...
## $ net : chr "us" "us" "us" "us" ...
## $ id : chr "us6000fwqs" "us6000fuzi" "us6000f1xn" "us7000ee3f" ...
## $ updated : chr "2021-10-26T02:05:53.040Z" "2021-10-24T16:24:00.706Z" "2021-10-03T05:24:26.040Z" "2021-08-27T18:57:16.040Z" ...
## $ place : chr "68 km NW of Dire Dawa, Ethiopia" "51 km NNW of Dire Dawa, Ethiopia" "172 km N of Las Khorey, Somalia" "166 km NW of Las Khorey, Somalia" ...
## $ type : chr "earthquake" "earthquake" "earthquake" "earthquake" ...
## $ horizontalError: num 9.9 8.9 10 8.6 8.5 8.2 9.2 7.6 9.3 10.2 ...
## $ depthError : num 2 4.8 1.9 1.9 2 1.9 1.9 1.9 2 1.9 ...
## $ magError : num 0.192 0.082 0.314 0.075 0.259 0.198 0.059 0.142 0.108 0.075 ...
## $ magNst : int 8 47 3 52 5 7 86 14 25 54 ...
## $ status : chr "reviewed" "reviewed" "reviewed" "reviewed" ...
## $ locationSource : chr "us" "us" "us" "us" ...
## $ magSource : chr "us" "us" "us" "us" ...
summary(df)
## time latitude longitude depth
## Length:981 Min. : 3.678 Min. :33.20 Min. : 0.1
## Class :character 1st Qu.:11.764 1st Qu.:40.57 1st Qu.:10.0
## Mode :character Median :11.986 Median :43.12 Median :10.0
## Mean :11.894 Mean :42.78 Mean :11.7
## 3rd Qu.:12.514 3rd Qu.:44.03 3rd Qu.:10.0
## Max. :14.761 Max. :48.34 Max. :62.0
##
## mag magType nst gap
## Min. :3.000 Length:981 Min. : 4.0 Min. : 25.40
## 1st Qu.:4.300 Class :character 1st Qu.: 16.0 1st Qu.: 79.88
## Median :4.500 Mode :character Median : 27.0 Median :113.50
## Mean :4.591 Mean : 40.8 Mean :124.45
## 3rd Qu.:4.800 3rd Qu.: 50.0 3rd Qu.:155.55
## Max. :6.500 Max. :390.0 Max. :327.20
## NA's :408 NA's :353
## dmin rms net id
## Min. : 0.032 Min. :0.100 Length:981 Length:981
## 1st Qu.: 1.837 1st Qu.:0.870 Class :character Class :character
## Median : 3.364 Median :1.010 Mode :character Mode :character
## Mean : 3.373 Mean :1.013
## 3rd Qu.: 4.399 3rd Qu.:1.200
## Max. :11.561 Max. :1.920
## NA's :903 NA's :131
## updated place type horizontalError
## Length:981 Length:981 Length:981 Min. : 3.600
## Class :character Class :character Class :character 1st Qu.: 7.925
## Mode :character Mode :character Mode :character Median : 9.150
## Mean : 9.493
## 3rd Qu.:10.950
## Max. :19.700
## NA's :907
## depthError magError magNst status
## Min. : 0.000 Min. :0.0550 Min. : 1.00 Length:981
## 1st Qu.: 0.000 1st Qu.:0.0810 1st Qu.: 2.00 Class :character
## Median : 1.900 Median :0.1170 Median : 6.00 Mode :character
## Mean : 5.081 Mean :0.1342 Mean : 12.53
## 3rd Qu.: 5.100 3rd Qu.:0.1540 3rd Qu.: 16.00
## Max. :65.500 Max. :0.3690 Max. :104.00
## NA's :828 NA's :904 NA's :283
## locationSource magSource
## Length:981 Length:981
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
glimpse(df)
## Rows: 981
## Columns: 22
## $ time <chr> "2021-10-18T22:21:32.032Z", "2021-10-15T17:42:25.708Z"…
## $ latitude <dbl> 10.1039, 9.9933, 12.7208, 12.4004, 9.1826, 9.4032, 12.…
## $ longitude <dbl> 41.5128, 41.6294, 48.2020, 47.3260, 39.6144, 42.0914, …
## $ depth <dbl> 10.00, 13.03, 10.00, 10.00, 10.00, 10.00, 10.00, 10.00…
## $ mag <dbl> 4.5, 4.5, 4.6, 4.5, 4.0, 4.2, 4.6, 4.3, 4.4, 4.6, 4.5,…
## $ magType <chr> "mb", "mb", "mb", "mb", "mb", "mb", "mb", "mb", "mb", …
## $ nst <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ gap <dbl> 121, 90, 144, 94, 157, 223, 43, 148, 168, 40, 141, 118…
## $ dmin <dbl> 1.930, 1.940, 5.369, 4.467, 0.966, 2.240, 5.348, 5.375…
## $ rms <dbl> 0.74, 0.69, 0.60, 0.41, 0.19, 0.76, 0.68, 0.26, 0.48, …
## $ net <chr> "us", "us", "us", "us", "us", "us", "us", "us", "us", …
## $ id <chr> "us6000fwqs", "us6000fuzi", "us6000f1xn", "us7000ee3f"…
## $ updated <chr> "2021-10-26T02:05:53.040Z", "2021-10-24T16:24:00.706Z"…
## $ place <chr> "68 km NW of Dire Dawa, Ethiopia", "51 km NNW of Dire …
## $ type <chr> "earthquake", "earthquake", "earthquake", "earthquake"…
## $ horizontalError <dbl> 9.9, 8.9, 10.0, 8.6, 8.5, 8.2, 9.2, 7.6, 9.3, 10.2, 6.…
## $ depthError <dbl> 2.0, 4.8, 1.9, 1.9, 2.0, 1.9, 1.9, 1.9, 2.0, 1.9, 1.9,…
## $ magError <dbl> 0.192, 0.082, 0.314, 0.075, 0.259, 0.198, 0.059, 0.142…
## $ magNst <int> 8, 47, 3, 52, 5, 7, 86, 14, 25, 54, 12, 16, 25, 23, 41…
## $ status <chr> "reviewed", "reviewed", "reviewed", "reviewed", "revie…
## $ locationSource <chr> "us", "us", "us", "us", "us", "us", "us", "us", "us", …
## $ magSource <chr> "us", "us", "us", "us", "us", "us", "us", "us", "us", …
The significance of the features in the data set are the following (reference for the technical terms: earthquake.usgs.gov):
I perform few processing of the data: I replace NAs with 0. I extract the date from the time for easier plotting values against time.
#process the data - replace NAs with 0
df[is.na(df)] <- 0
#process the data - extract date from the time field
df$date <- as.Date(substring(df$time,1,10),"%Y-%m-%d")
We investigate the correlation between numerical (and without missing data) dimmensions in the data using corrplot library functions. We are using the default correlation option for the R cor function, Pearson’s correlation.
# Correlation plot
earthquakeCor <- df[,c("latitude","longitude","depth","mag", "nst","gap",
"dmin","rms","horizontalError","depthError","magError","magNst")]
correlations <- cor(earthquakeCor)
p <- corrplot(correlations, method="circle")
We can observe that there is a strong inverse correlation between rms and gap and there is a smaller inverse correlation between gap and mag (magnitude). Positive correlation exists between horrizontalError and dmin and smaller positive correlation exists between rms and mag (magnitude) and notably between depth and longitude. Also there are positive correlations between mag and magNst and between magError and horizontalError.
The positive correlation between horrizontalError and dmin seems easy to explain: one is the error in estimating the distance from the observation point and the position of the earthquake whilst dmin is the distance to the nearest measurement/retection station.
One can try to explain as well the positive correlation between mag and magNst. One possible explanation could be that the bigger one earthquake’s magnitude, the larger the number of stations that will detect the event and therefore magNst will be larger. Let’s verify this assumption.
df %>% group_by(magNst) %>%
summarize(avg = mean(mag), min=min(mag), max=max(mag), events = length(mag)) %>%
ungroup() -> dfm
head(dfm)
## # A tibble: 6 × 5
## magNst avg min max events
## <dbl> <dbl> <dbl> <dbl> <int>
## 1 0 4.93 3 6.5 283
## 2 1 4.21 3.5 5.2 110
## 3 2 4.31 3.8 5 75
## 4 3 4.31 3.9 4.8 48
## 5 4 4.31 3.8 4.9 56
## 6 5 4.36 3.7 5.6 37
glimpse(dfm)
## Rows: 69
## Columns: 5
## $ magNst <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ avg <dbl> 4.926148, 4.206364, 4.306667, 4.312500, 4.310714, 4.364865, 4.4…
## $ min <dbl> 3.0, 3.5, 3.8, 3.9, 3.8, 3.7, 4.0, 4.1, 4.1, 4.1, 4.2, 4.2, 4.2…
## $ max <dbl> 6.5, 5.2, 5.0, 4.8, 4.9, 5.6, 4.9, 4.9, 4.8, 6.1, 4.7, 4.8, 4.7…
## $ events <int> 283, 110, 75, 48, 56, 37, 31, 24, 21, 22, 10, 21, 16, 15, 13, 1…
tail(dfm)
## # A tibble: 6 × 5
## magNst avg min max events
## <dbl> <dbl> <dbl> <dbl> <int>
## 1 74 4.8 4.8 4.8 1
## 2 86 4.6 4.6 4.6 1
## 3 90 5 5 5 1
## 4 91 4.6 4.6 4.6 1
## 5 102 4.7 4.7 4.7 1
## 6 104 4.9 4.9 4.9 1
One can see that actually for values of magNst from 1 to 5 the assumption seems correct. For larger values of magNst, mag oscilates above 4 degrees Richter and sustain as well the assumption. In the same time, the vast majority of the events does not have a magNst set correctly, 403 of these having it currenlty set to 0 (being actually not set initially).
I show here timeseries of earthquakes magnitude.
ggplot(df, aes(x=date, y=mag)) + geom_point() + geom_line(linetype="dotted",color="blue") + theme_bw() +
labs(title="Time series for Ethiopian Earthquakes (1906-2021)", x="Date", y="Magnitude")
We observe that from 1906 t0 1960 years were only few major earthquakes (more than 5 degree of magnitude). In the next plot we show only the events with degree of magnitude larger or equal to 5.
dfmag <- df %>% filter(mag >= 5)
ggplot(dfmag, aes(x=date, y=mag)) + geom_point() + geom_line(linetype="solid",color="blue") + theme_bw() +
labs(title="Time series for large (magnitude > 5) Ethiopians Earthquakes (1906-2021)",
x="Date", y="Magnitude")
While the Magnitude (mag) express the energy of the earthquake, the depth is the depth where the earthquake occurs. Magnitude is important, because the larger the energy of the earthquake, the larger are the potential effects of such an event. The deepth is also important because this can also dramatically influence the propagation of the earthquake and the final effect of the event.
#boxplot of magnitude
boxplot(df$mag, main="Boxplot for magnitude", ylab="Magnitude")
We can easily observe that the median is around 4.5 while 1st and 3rd quartiles are above 4 and 5 respectively. The outliers above 4th quartile are the significant events.
#boxplot of depth
boxplot(df$depth, main="Boxplot for depth", ylab="Depth")
We can also observe in the above graph the distribution of depth, with the average around 10 km.
The magnitude information is provided by several sources.
The following graphs shows the boxplots for magnitude, grouped by magnitude source.
#boxplot of magnitude, grouped by Magnitude source
colors = c("red", "blue", "yellow", "green", "magenta",
"lightblue", "grey", "lightgreen", "darkblue", "cyan")
boxplot(mag ~ magSource, data=df, col=colors, xlim=c(0.5,10), ylim=c(1,8),
ylab="Magnitude", main="Magnitude, grouped by magnitude source")
Magnitude type is describing the type of magnitude measurement. Here is an explanation of the magnitude types:
#boxplot of magnitude, grouped by magnitude type
colors = c("red", "blue", "yellow", "green", "magenta",
"lightblue", "grey", "lightgreen", "darkblue", "cyan")
boxplot(mag ~ magType, data=df, col=colors, xlim=c(0.5,10), ylim=c(1,8),
ylab = "Magnitude", main="Magnitude grouped by magnitude type")
We can observe that the majority of the high magnitude events are using Mw and Mw derivates (mwb, mwc, mwr, mww).
Let’s see the relationship between magnitude and position.
attach(df)
scatterplot3d(longitude,latitude,mag, pch=16, highlight.3d=TRUE,
type="h",
main="Earthquakes in Ethiopia (1906-2021)\nMagnitude vs. position",
xlab="Longitude",
ylab="Latitude",
zlab="Magnitude [Richter]")
We can see that the majority of large earthquakes are located between latitude 4 and 6 and longitude 40 and 45. This correspond to a zone around region in Ethiopia.
In the next graph we show only the major earthquakes in Romania (with magnitude larger or equal to 5), specifying the depth and year as well. The source of the measurement is specified by color of the plot (and in this case is either Bucharest or US) and the position of the earthquake is specified as a legend near each graph point.
#create column indicating Earthquake year
dfmag$year <- as.integer(strftime(dfmag$date, "%Y"))
# create column indicating point color
dfmag$pcolor <- "darkgreen" #all other than Bucharest or US
dfmag$pcolor[dfmag$locationSource=="buc"] <- "red"
dfmag$pcolor[dfmag$locationSource=="us"] <- "blue"
with(dfmag, {
s3d <- scatterplot3d(year, depth, mag, # x y and z axis
color=pcolor, pch=19, # circle color indicates location Source
type="h", lty.hplot=2, # lines to the horizontal plane
scale.y=.95, # scale y axis (reduce by 25%)
main="Major earthquakes (magnitude > 5) in Ethiopia (1906-2021)",
xlab="Year",
ylab="Depth [Km]",
zlab="Magnitude [Richter]")
s3d.coords <- s3d$xyz.convert(year, depth, mag)
text(s3d.coords$x, s3d.coords$y, # x and y coordinates
labels=place, # text to plot
pos=4, cex=.5) # shrink text 50% and place on right side of points)
# add the legend
legend("topleft", inset=.05, # location and inset
bty="n", cex=.75, # suppress legend box, shrink text 50%
title="Location source",
c("gcmt", "US", "other"), fill=c("red", "blue", "darkgreen"))
})
In the next graph we show as well the largest earthquakes but we change the legend for each point to show the magnitude type. For the largest earthquakes, the mw (energy-motivated magnitude scale) is used; as well, most of the major events have the location source US
with(dfmag, {
s3d <- scatterplot3d(year, depth, mag, # x y and z axis
color=pcolor, pch=19, # circle color indicates location Source
type="h", lty.hplot=2, # lines to the horizontal plane
scale.y=.75, # scale y axis (reduce by 25%)
main="Major earthquakes (magnitude > 5) in Ethiopia (1906-2021)",
xlab="Year",
ylab="Depth [Km]",
zlab="Magnitude [Richter]")
s3d.coords <- s3d$xyz.convert(year, depth, mag)
text(s3d.coords$x, s3d.coords$y, # x and y coordinates
labels=magType, # text to plot
pos=4, cex=.5) # shrink text 50% and place on right side of points)
# add the legend
legend("topleft", inset=.05, # location and inset
bty="n", cex=.75, # suppress legend box, shrink text 50%
title="Location source",
c("gcmt", "US", "other"), fill=c("red", "blue", "darkgreen"))
})
Let’s see all earthquakes events displayed on a plotly choropleth map. Each event is shown as a dot. The dots color corresponds to the earthquake magnitude.
g <- list(
scope = 'ethiopia',
projection = list(type = 'Mercator'),
showland = TRUE,
landcolor = toRGB("gray85"), subunitcolor = toRGB("green"), countrycolor = toRGB("black"),
countrywidth = 0.5, subunitwidth = 0.5,
showlakes = TRUE, lakecolor = toRGB("white"),
showsubunits = TRUE, showcountries = TRUE,
resolution = 50,
lonaxis = list(showgrid = TRUE, gridwidth = 0.5, range = c(20,30), dtick = 1),
lataxis = list(showgrid = TRUE, gridwidth = 0.5, range = c(40,48), dtick = 1)
)
plot_geo(df, lat = ~latitude, lon = ~longitude) %>%
add_markers(
text = ~paste(mag),
color = ~mag, symbol = I("circle"), size = I(9), hoverinfo = "text"
) %>%
colorbar(title = "Magnitude") %>%
layout(
title = 'Earthquakes in Ethiopia 1906-2021', geo = g
)