R Markdown

Here i load all the needed library

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(corrplot)
## corrplot 0.92 loaded
library(scatterplot3d) 
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.6     ✓ purrr   0.3.4
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x dplyr::lag()     masks stats::lag()
library(ggplot2)
library(patchwork)
df <- read.csv("data.csv")
head(df)
##                       time latitude longitude depth mag magType nst gap  dmin
## 1 2021-10-18T22:21:32.032Z  10.1039   41.5128 10.00 4.5      mb  NA 121 1.930
## 2 2021-10-15T17:42:25.708Z   9.9933   41.6294 13.03 4.5      mb  NA  90 1.940
## 3 2021-07-28T19:31:38.767Z  12.7208   48.2020 10.00 4.6      mb  NA 144 5.369
## 4 2021-06-18T11:29:05.834Z  12.4004   47.3260 10.00 4.5      mb  NA  94 4.467
## 5 2021-05-15T06:13:34.513Z   9.1826   39.6144 10.00 4.0      mb  NA 157 0.966
## 6 2021-04-23T04:54:47.872Z   9.4032   42.0914 10.00 4.2      mb  NA 223 2.240
##    rms net         id                  updated                            place
## 1 0.74  us us6000fwqs 2021-10-26T02:05:53.040Z  68 km NW of Dire Dawa, Ethiopia
## 2 0.69  us us6000fuzi 2021-10-24T16:24:00.706Z 51 km NNW of Dire Dawa, Ethiopia
## 3 0.60  us us6000f1xn 2021-10-03T05:24:26.040Z  172 km N of Las Khorey, Somalia
## 4 0.41  us us7000ee3f 2021-08-27T18:57:16.040Z 166 km NW of Las Khorey, Somalia
## 5 0.19  us us7000e4rg 2021-09-12T13:26:16.038Z   45 km NW of Metah?ra, Ethiopia
## 6 0.76  us us7000dyjw 2021-07-07T18:33:47.040Z     10 km NNW of Harar, Ethiopia
##         type horizontalError depthError magError magNst   status locationSource
## 1 earthquake             9.9        2.0    0.192      8 reviewed             us
## 2 earthquake             8.9        4.8    0.082     47 reviewed             us
## 3 earthquake            10.0        1.9    0.314      3 reviewed             us
## 4 earthquake             8.6        1.9    0.075     52 reviewed             us
## 5 earthquake             8.5        2.0    0.259      5 reviewed             us
## 6 earthquake             8.2        1.9    0.198      7 reviewed             us
##   magSource
## 1        us
## 2        us
## 3        us
## 4        us
## 5        us
## 6        us
str(df)
## 'data.frame':    981 obs. of  22 variables:
##  $ time           : chr  "2021-10-18T22:21:32.032Z" "2021-10-15T17:42:25.708Z" "2021-07-28T19:31:38.767Z" "2021-06-18T11:29:05.834Z" ...
##  $ latitude       : num  10.1 9.99 12.72 12.4 9.18 ...
##  $ longitude      : num  41.5 41.6 48.2 47.3 39.6 ...
##  $ depth          : num  10 13 10 10 10 ...
##  $ mag            : num  4.5 4.5 4.6 4.5 4 4.2 4.6 4.3 4.4 4.6 ...
##  $ magType        : chr  "mb" "mb" "mb" "mb" ...
##  $ nst            : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ gap            : num  121 90 144 94 157 223 43 148 168 40 ...
##  $ dmin           : num  1.93 1.94 5.369 4.467 0.966 ...
##  $ rms            : num  0.74 0.69 0.6 0.41 0.19 0.76 0.68 0.26 0.48 1.08 ...
##  $ net            : chr  "us" "us" "us" "us" ...
##  $ id             : chr  "us6000fwqs" "us6000fuzi" "us6000f1xn" "us7000ee3f" ...
##  $ updated        : chr  "2021-10-26T02:05:53.040Z" "2021-10-24T16:24:00.706Z" "2021-10-03T05:24:26.040Z" "2021-08-27T18:57:16.040Z" ...
##  $ place          : chr  "68 km NW of Dire Dawa, Ethiopia" "51 km NNW of Dire Dawa, Ethiopia" "172 km N of Las Khorey, Somalia" "166 km NW of Las Khorey, Somalia" ...
##  $ type           : chr  "earthquake" "earthquake" "earthquake" "earthquake" ...
##  $ horizontalError: num  9.9 8.9 10 8.6 8.5 8.2 9.2 7.6 9.3 10.2 ...
##  $ depthError     : num  2 4.8 1.9 1.9 2 1.9 1.9 1.9 2 1.9 ...
##  $ magError       : num  0.192 0.082 0.314 0.075 0.259 0.198 0.059 0.142 0.108 0.075 ...
##  $ magNst         : int  8 47 3 52 5 7 86 14 25 54 ...
##  $ status         : chr  "reviewed" "reviewed" "reviewed" "reviewed" ...
##  $ locationSource : chr  "us" "us" "us" "us" ...
##  $ magSource      : chr  "us" "us" "us" "us" ...
summary(df)
##      time              latitude        longitude         depth     
##  Length:981         Min.   : 3.678   Min.   :33.20   Min.   : 0.1  
##  Class :character   1st Qu.:11.764   1st Qu.:40.57   1st Qu.:10.0  
##  Mode  :character   Median :11.986   Median :43.12   Median :10.0  
##                     Mean   :11.894   Mean   :42.78   Mean   :11.7  
##                     3rd Qu.:12.514   3rd Qu.:44.03   3rd Qu.:10.0  
##                     Max.   :14.761   Max.   :48.34   Max.   :62.0  
##                                                                    
##       mag          magType               nst             gap        
##  Min.   :3.000   Length:981         Min.   :  4.0   Min.   : 25.40  
##  1st Qu.:4.300   Class :character   1st Qu.: 16.0   1st Qu.: 79.88  
##  Median :4.500   Mode  :character   Median : 27.0   Median :113.50  
##  Mean   :4.591                      Mean   : 40.8   Mean   :124.45  
##  3rd Qu.:4.800                      3rd Qu.: 50.0   3rd Qu.:155.55  
##  Max.   :6.500                      Max.   :390.0   Max.   :327.20  
##                                     NA's   :408     NA's   :353     
##       dmin             rms            net                 id           
##  Min.   : 0.032   Min.   :0.100   Length:981         Length:981        
##  1st Qu.: 1.837   1st Qu.:0.870   Class :character   Class :character  
##  Median : 3.364   Median :1.010   Mode  :character   Mode  :character  
##  Mean   : 3.373   Mean   :1.013                                        
##  3rd Qu.: 4.399   3rd Qu.:1.200                                        
##  Max.   :11.561   Max.   :1.920                                        
##  NA's   :903      NA's   :131                                          
##    updated             place               type           horizontalError 
##  Length:981         Length:981         Length:981         Min.   : 3.600  
##  Class :character   Class :character   Class :character   1st Qu.: 7.925  
##  Mode  :character   Mode  :character   Mode  :character   Median : 9.150  
##                                                           Mean   : 9.493  
##                                                           3rd Qu.:10.950  
##                                                           Max.   :19.700  
##                                                           NA's   :907     
##    depthError        magError          magNst          status         
##  Min.   : 0.000   Min.   :0.0550   Min.   :  1.00   Length:981        
##  1st Qu.: 0.000   1st Qu.:0.0810   1st Qu.:  2.00   Class :character  
##  Median : 1.900   Median :0.1170   Median :  6.00   Mode  :character  
##  Mean   : 5.081   Mean   :0.1342   Mean   : 12.53                     
##  3rd Qu.: 5.100   3rd Qu.:0.1540   3rd Qu.: 16.00                     
##  Max.   :65.500   Max.   :0.3690   Max.   :104.00                     
##  NA's   :828      NA's   :904      NA's   :283                        
##  locationSource      magSource        
##  Length:981         Length:981        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
glimpse(df)
## Rows: 981
## Columns: 22
## $ time            <chr> "2021-10-18T22:21:32.032Z", "2021-10-15T17:42:25.708Z"…
## $ latitude        <dbl> 10.1039, 9.9933, 12.7208, 12.4004, 9.1826, 9.4032, 12.…
## $ longitude       <dbl> 41.5128, 41.6294, 48.2020, 47.3260, 39.6144, 42.0914, …
## $ depth           <dbl> 10.00, 13.03, 10.00, 10.00, 10.00, 10.00, 10.00, 10.00…
## $ mag             <dbl> 4.5, 4.5, 4.6, 4.5, 4.0, 4.2, 4.6, 4.3, 4.4, 4.6, 4.5,…
## $ magType         <chr> "mb", "mb", "mb", "mb", "mb", "mb", "mb", "mb", "mb", …
## $ nst             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ gap             <dbl> 121, 90, 144, 94, 157, 223, 43, 148, 168, 40, 141, 118…
## $ dmin            <dbl> 1.930, 1.940, 5.369, 4.467, 0.966, 2.240, 5.348, 5.375…
## $ rms             <dbl> 0.74, 0.69, 0.60, 0.41, 0.19, 0.76, 0.68, 0.26, 0.48, …
## $ net             <chr> "us", "us", "us", "us", "us", "us", "us", "us", "us", …
## $ id              <chr> "us6000fwqs", "us6000fuzi", "us6000f1xn", "us7000ee3f"…
## $ updated         <chr> "2021-10-26T02:05:53.040Z", "2021-10-24T16:24:00.706Z"…
## $ place           <chr> "68 km NW of Dire Dawa, Ethiopia", "51 km NNW of Dire …
## $ type            <chr> "earthquake", "earthquake", "earthquake", "earthquake"…
## $ horizontalError <dbl> 9.9, 8.9, 10.0, 8.6, 8.5, 8.2, 9.2, 7.6, 9.3, 10.2, 6.…
## $ depthError      <dbl> 2.0, 4.8, 1.9, 1.9, 2.0, 1.9, 1.9, 1.9, 2.0, 1.9, 1.9,…
## $ magError        <dbl> 0.192, 0.082, 0.314, 0.075, 0.259, 0.198, 0.059, 0.142…
## $ magNst          <int> 8, 47, 3, 52, 5, 7, 86, 14, 25, 54, 12, 16, 25, 23, 41…
## $ status          <chr> "reviewed", "reviewed", "reviewed", "reviewed", "revie…
## $ locationSource  <chr> "us", "us", "us", "us", "us", "us", "us", "us", "us", …
## $ magSource       <chr> "us", "us", "us", "us", "us", "us", "us", "us", "us", …

The significance of the features in the data set are the following (reference for the technical terms: earthquake.usgs.gov):

I perform few processing of the data: I replace NAs with 0. I extract the date from the time for easier plotting values against time.

#process the data - replace NAs with 0
df[is.na(df)] <- 0
#process the data - extract date from the time field
df$date <- as.Date(substring(df$time,1,10),"%Y-%m-%d")

Correlation plot

We investigate the correlation between numerical (and without missing data) dimmensions in the data using corrplot library functions. We are using the default correlation option for the R cor function, Pearson’s correlation.

# Correlation plot
earthquakeCor <- df[,c("latitude","longitude","depth","mag", "nst","gap",
               "dmin","rms","horizontalError","depthError","magError","magNst")]
correlations <- cor(earthquakeCor)
p <- corrplot(correlations, method="circle")

We can observe that there is a strong inverse correlation between rms and gap and there is a smaller inverse correlation between gap and mag (magnitude). Positive correlation exists between horrizontalError and dmin and smaller positive correlation exists between rms and mag (magnitude) and notably between depth and longitude. Also there are positive correlations between mag and magNst and between magError and horizontalError.

The positive correlation between horrizontalError and dmin seems easy to explain: one is the error in estimating the distance from the observation point and the position of the earthquake whilst dmin is the distance to the nearest measurement/retection station.

One can try to explain as well the positive correlation between mag and magNst. One possible explanation could be that the bigger one earthquake’s magnitude, the larger the number of stations that will detect the event and therefore magNst will be larger. Let’s verify this assumption.

df %>% group_by(magNst) %>% 
        summarize(avg = mean(mag), min=min(mag), max=max(mag), events = length(mag)) %>% 
        ungroup() -> dfm
head(dfm)
## # A tibble: 6 × 5
##   magNst   avg   min   max events
##    <dbl> <dbl> <dbl> <dbl>  <int>
## 1      0  4.93   3     6.5    283
## 2      1  4.21   3.5   5.2    110
## 3      2  4.31   3.8   5       75
## 4      3  4.31   3.9   4.8     48
## 5      4  4.31   3.8   4.9     56
## 6      5  4.36   3.7   5.6     37
glimpse(dfm)
## Rows: 69
## Columns: 5
## $ magNst <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ avg    <dbl> 4.926148, 4.206364, 4.306667, 4.312500, 4.310714, 4.364865, 4.4…
## $ min    <dbl> 3.0, 3.5, 3.8, 3.9, 3.8, 3.7, 4.0, 4.1, 4.1, 4.1, 4.2, 4.2, 4.2…
## $ max    <dbl> 6.5, 5.2, 5.0, 4.8, 4.9, 5.6, 4.9, 4.9, 4.8, 6.1, 4.7, 4.8, 4.7…
## $ events <int> 283, 110, 75, 48, 56, 37, 31, 24, 21, 22, 10, 21, 16, 15, 13, 1…
tail(dfm)
## # A tibble: 6 × 5
##   magNst   avg   min   max events
##    <dbl> <dbl> <dbl> <dbl>  <int>
## 1     74   4.8   4.8   4.8      1
## 2     86   4.6   4.6   4.6      1
## 3     90   5     5     5        1
## 4     91   4.6   4.6   4.6      1
## 5    102   4.7   4.7   4.7      1
## 6    104   4.9   4.9   4.9      1

One can see that actually for values of magNst from 1 to 5 the assumption seems correct. For larger values of magNst, mag oscilates above 4 degrees Richter and sustain as well the assumption. In the same time, the vast majority of the events does not have a magNst set correctly, 403 of these having it currenlty set to 0 (being actually not set initially).

Time series of earthquakes magnitude

I show here timeseries of earthquakes magnitude.

ggplot(df, aes(x=date, y=mag)) + geom_point() + geom_line(linetype="dotted",color="blue") + theme_bw() + 
labs(title="Time series for Ethiopian Earthquakes (1906-2021)", x="Date", y="Magnitude")

We observe that from 1906 t0 1960 years were only few major earthquakes (more than 5 degree of magnitude). In the next plot we show only the events with degree of magnitude larger or equal to 5.

dfmag <- df %>% filter(mag >= 5) 

ggplot(dfmag, aes(x=date, y=mag)) + geom_point() + geom_line(linetype="solid",color="blue") + theme_bw() + 
labs(title="Time series for large (magnitude > 5) Ethiopians Earthquakes (1906-2021)", 
x="Date", y="Magnitude")

Boxplots for magnitude and depth

While the Magnitude (mag) express the energy of the earthquake, the depth is the depth where the earthquake occurs. Magnitude is important, because the larger the energy of the earthquake, the larger are the potential effects of such an event. The deepth is also important because this can also dramatically influence the propagation of the earthquake and the final effect of the event.

#boxplot of magnitude
boxplot(df$mag, main="Boxplot for magnitude", ylab="Magnitude")

We can easily observe that the median is around 4.5 while 1st and 3rd quartiles are above 4 and 5 respectively. The outliers above 4th quartile are the significant events.

#boxplot of depth
boxplot(df$depth, main="Boxplot for depth", ylab="Depth")

We can also observe in the above graph the distribution of depth, with the average around 10 km.

Boxplots of magnitude, grouped by magnitude source

The magnitude information is provided by several sources.

The following graphs shows the boxplots for magnitude, grouped by magnitude source.

#boxplot of magnitude, grouped by Magnitude source
colors = c("red", "blue", "yellow", "green", "magenta",
           "lightblue", "grey", "lightgreen", "darkblue", "cyan")
boxplot(mag ~ magSource, data=df, col=colors, xlim=c(0.5,10), ylim=c(1,8), 
        ylab="Magnitude", main="Magnitude, grouped by magnitude source")

Magnitude, grouped by magnitude type

Magnitude type is describing the type of magnitude measurement. Here is an explanation of the magnitude types:

#boxplot of magnitude, grouped by magnitude type
colors = c("red", "blue", "yellow", "green", "magenta",
           "lightblue", "grey", "lightgreen", "darkblue", "cyan")
boxplot(mag ~ magType, data=df, col=colors, xlim=c(0.5,10), ylim=c(1,8),
       ylab = "Magnitude", main="Magnitude grouped by magnitude type")

We can observe that the majority of the high magnitude events are using Mw and Mw derivates (mwb, mwc, mwr, mww).

Magnitude versus position

Let’s see the relationship between magnitude and position.

attach(df)
scatterplot3d(longitude,latitude,mag, pch=16, highlight.3d=TRUE,
              type="h",
              main="Earthquakes in Ethiopia (1906-2021)\nMagnitude vs. position",
              xlab="Longitude",
              ylab="Latitude",
              zlab="Magnitude [Richter]")

We can see that the majority of large earthquakes are located between latitude 4 and 6 and longitude 40 and 45. This correspond to a zone around region in Ethiopia.

Major earthquakes in Ethiopia (magnitude > 5)

In the next graph we show only the major earthquakes in Romania (with magnitude larger or equal to 5), specifying the depth and year as well. The source of the measurement is specified by color of the plot (and in this case is either Bucharest or US) and the position of the earthquake is specified as a legend near each graph point.

#create column indicating Earthquake year
dfmag$year <- as.integer(strftime(dfmag$date, "%Y"))
# create column indicating point color
dfmag$pcolor <- "darkgreen" #all other than Bucharest or US
dfmag$pcolor[dfmag$locationSource=="buc"] <- "red"
dfmag$pcolor[dfmag$locationSource=="us"] <- "blue"

with(dfmag, {
  s3d <- scatterplot3d(year, depth, mag,        # x y and z axis
                       color=pcolor, pch=19,        # circle color indicates location Source
                       type="h", lty.hplot=2,       # lines to the horizontal plane
                       scale.y=.95,                 # scale y axis (reduce by 25%)
                       main="Major earthquakes (magnitude > 5) in Ethiopia (1906-2021)",
                       xlab="Year",
                       ylab="Depth [Km]",
                       zlab="Magnitude [Richter]")
  s3d.coords <- s3d$xyz.convert(year, depth, mag)
  text(s3d.coords$x, s3d.coords$y,     # x and y coordinates
       labels=place,       # text to plot
       pos=4, cex=.5)                  # shrink text 50% and place on right side of points)
  # add the legend
  legend("topleft", inset=.05,      # location and inset
         bty="n", cex=.75,              # suppress legend box, shrink text 50%
         title="Location source",
         c("gcmt", "US", "other"), fill=c("red", "blue", "darkgreen"))
})

Magnitude type, location source, depth and years

In the next graph we show as well the largest earthquakes but we change the legend for each point to show the magnitude type. For the largest earthquakes, the mw (energy-motivated magnitude scale) is used; as well, most of the major events have the location source US

with(dfmag, {
  s3d <- scatterplot3d(year, depth, mag,        # x y and z axis
                       color=pcolor, pch=19,        # circle color indicates location Source
                       type="h", lty.hplot=2,       # lines to the horizontal plane
                       scale.y=.75,                 # scale y axis (reduce by 25%)
                       main="Major earthquakes (magnitude > 5) in Ethiopia (1906-2021)",
                       xlab="Year",
                       ylab="Depth [Km]",
                       zlab="Magnitude [Richter]")
  s3d.coords <- s3d$xyz.convert(year, depth, mag)
  text(s3d.coords$x, s3d.coords$y,     # x and y coordinates
       labels=magType,       # text to plot
       pos=4, cex=.5)                  # shrink text 50% and place on right side of points)
  # add the legend
  legend("topleft", inset=.05,      # location and inset
         bty="n", cex=.75,              # suppress legend box, shrink text 50%
         title="Location source",
         c("gcmt", "US", "other"), fill=c("red", "blue", "darkgreen"))
})

Plotly choropleth map with Ethiopian earthquakes

Let’s see all earthquakes events displayed on a plotly choropleth map. Each event is shown as a dot. The dots color corresponds to the earthquake magnitude.

g <- list(
  scope = 'ethiopia',
  projection = list(type = 'Mercator'),
  showland = TRUE,
  landcolor = toRGB("gray85"), subunitcolor = toRGB("green"), countrycolor = toRGB("black"),
  countrywidth = 0.5, subunitwidth = 0.5,
  showlakes = TRUE, lakecolor = toRGB("white"),
  showsubunits = TRUE, showcountries = TRUE,
  resolution = 50,
  lonaxis = list(showgrid = TRUE, gridwidth = 0.5, range = c(20,30), dtick = 1),
  lataxis = list(showgrid = TRUE, gridwidth = 0.5, range = c(40,48), dtick = 1)
)

plot_geo(df, lat = ~latitude, lon = ~longitude) %>%
  add_markers(
    text = ~paste(mag),
    color = ~mag, symbol = I("circle"), size = I(9), hoverinfo = "text"
  ) %>%
  colorbar(title = "Magnitude") %>%
  layout(
    title = 'Earthquakes in Ethiopia 1906-2021', geo = g
  )

The End