library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gghighlight)
library(leaflet)
library(IRdisplay)
options(scipen = 999)
options(warn = -1)
library(lubridate)
library(viridis)
## Loading required package: viridisLite
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
##
## The following object is masked from 'package:purrr':
##
## transpose
library(caTools)
library(kernlab)
##
## Attaching package: 'kernlab'
##
## The following object is masked from 'package:purrr':
##
## cross
##
## The following object is masked from 'package:ggplot2':
##
## alpha
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(ROSE)
## Loaded ROSE 0.0-4
## Reading in files
list.files(path = "../input")
## character(0)
earthquake <- read_csv("all_month.csv")
## Rows: 8161 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): magType, net, id, place, type, status, locationSource, magSource
## dbl (12): latitude, longitude, depth, mag, nst, gap, dmin, rms, horizontalE...
## dttm (2): time, updated
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(earthquake)
## Rows: 8,161
## Columns: 22
## $ time <dttm> 2019-03-11 04:30:18, 2019-03-11 04:24:26, 2019-03-11 …
## $ latitude <dbl> 33.52100, 61.06940, 38.77333, 65.01810, 37.29130, 35.1…
## $ longitude <dbl> -116.7945, -151.8329, -122.7435, -148.7292, -117.5088,…
## $ depth <dbl> 2.82, 105.60, 1.16, 8.60, 6.90, 5.00, 6.20, 48.70, 2.5…
## $ mag <dbl> 0.44, 1.10, 0.73, 1.60, 0.70, 1.80, 1.70, 1.90, 0.56, …
## $ magType <chr> "ml", "ml", "md", "ml", "ml", "mb_lg", "ml", "ml", "md…
## $ nst <dbl> 18, NA, 5, NA, 8, NA, 30, NA, 10, NA, NA, NA, NA, NA, …
## $ gap <dbl> 53.00, NA, 213.00, NA, 176.36, 64.00, 93.14, NA, 77.00…
## $ dmin <dbl> 0.009898, NA, 0.011220, NA, 0.375000, 0.219000, 0.1370…
## $ rms <dbl> 0.1400, 0.2500, 0.0100, 0.5100, 0.2800, 0.5100, 0.3300…
## $ net <chr> "ci", "ak", "nc", "ak", "nn", "us", "nn", "ak", "nc", …
## $ id <chr> "ci38263479", "ak01937u587x", "nc73150441", "ak01937u2…
## $ updated <dttm> 2019-03-11 04:33:55, 2019-03-11 04:27:53, 2019-03-11 …
## $ place <chr> "11km NE of Aguanga, CA", "51km NW of Nikiski, Alaska"…
## $ type <chr> "earthquake", "earthquake", "earthquake", "earthquake"…
## $ horizontalError <dbl> 0.25, NA, 1.55, NA, NA, 1.10, NA, NA, 0.42, 5.80, NA, …
## $ depthError <dbl> 1.02, 1.50, 0.70, 0.20, 24.50, 1.70, 0.70, 1.50, 1.31,…
## $ magError <dbl> 0.129, NA, 0.230, NA, NA, 0.255, NA, NA, NA, 0.080, NA…
## $ magNst <dbl> 11, NA, 2, NA, NA, 4, NA, NA, 1, 15, NA, NA, 11, 60, 1…
## $ status <chr> "automatic", "automatic", "automatic", "automatic", "a…
## $ locationSource <chr> "ci", "ak", "nc", "ak", "nn", "us", "nn", "ak", "nc", …
## $ magSource <chr> "ci", "ak", "nc", "ak", "nn", "us", "nn", "ak", "nc", …
head(earthquake)
## # A tibble: 6 × 22
## time latitude longitude depth mag magType nst gap
## <dttm> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 2019-03-11 04:30:18 33.5 -117. 2.82 0.44 ml 18 53
## 2 2019-03-11 04:24:26 61.1 -152. 106. 1.1 ml NA NA
## 3 2019-03-11 04:22:42 38.8 -123. 1.16 0.73 md 5 213
## 4 2019-03-11 04:13:42 65.0 -149. 8.6 1.6 ml NA NA
## 5 2019-03-11 04:10:13 37.3 -118. 6.9 0.7 ml 8 176.
## 6 2019-03-11 04:04:12 35.1 -97.6 5 1.8 mb_lg NA 64
## # ℹ 14 more variables: dmin <dbl>, rms <dbl>, net <chr>, id <chr>,
## # updated <dttm>, place <chr>, type <chr>, horizontalError <dbl>,
## # depthError <dbl>, magError <dbl>, magNst <dbl>, status <chr>,
## # locationSource <chr>, magSource <chr>
options(repr.plot.width=12, repr.plot.height=6)
ggplot(earthquake, aes(time, mag)) +
geom_line(color = 'green')+ theme_bw()+
ggtitle("Earthquake Magnitude")+
xlab('Date')+
ylab('Magnitude')+
theme(plot.title = element_text(hjust = 0.5))

#Location
earthquake$location <- sub('.*,\\s*','', earthquake$place)
#Time of the day in 'Hour'
earthquake$hour <- ymd_hms(earthquake$time)
earthquake$hour <- hour(earthquake$hour)
#Visualizing the number of quakes that have happened at a particular time
earthquake %>%
filter(!is.na(mag))%>%
group_by(hour)%>%
summarise(count = length(id),max_magnitude = max(mag))%>%
ggplot(aes(hour,count, color = max_magnitude))+geom_line()+
scale_color_viridis(direction = -1)+
scale_x_continuous(breaks=seq(0,23,1))+
xlab("Time of the day")+
ylab("Number of earthquakes")+
ggtitle("Number of quakes as per time of the day")+
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))

earthquake %>%
group_by(location) %>%
filter(!(is.na(mag)))%>%
summarise(Number_of_quakes = length(location),
Average_Magnitude = mean(mag))%>%
mutate(Percent = round(prop.table(Number_of_quakes)*100,2))%>%
arrange(desc(Number_of_quakes))%>% top_n(25, Number_of_quakes)
## # A tibble: 25 × 4
## location Number_of_quakes Average_Magnitude Percent
## <chr> <int> <dbl> <dbl>
## 1 CA 2643 1.02 32.4
## 2 Alaska 2551 1.72 31.3
## 3 Nevada 480 0.776 5.88
## 4 Utah 480 1.39 5.88
## 5 Puerto Rico 269 2.37 3.3
## 6 Hawaii 241 1.83 2.95
## 7 Montana 174 1.27 2.13
## 8 Washington 114 1.06 1.4
## 9 Indonesia 102 4.60 1.25
## 10 California 86 0.980 1.05
## # ℹ 15 more rows
ggplot(earthquake, aes(mag,depth,color = hour))+
geom_jitter(alpha = 0.5)+
gghighlight(max(depth>200)| max(mag>4))+
scale_x_continuous(breaks=seq(0,9,1))+
scale_color_viridis(direction = -1)+
theme_bw()+
xlab('Magnitude')+
ylab('Depth')+
ggtitle('Magnitude Vs. Depth in Km')+
theme(plot.title = element_text(hjust = 0.5))

earthquake %>%
group_by(type)%>%
summarise(count = length(type))%>%
mutate(Percent = prop.table(count*100))%>%
ggplot(aes(type,Percent, fill = type))+
geom_col()+theme_bw()+
xlab('Type of Seismic Activity')+
ylab('Percent')+
ggtitle('Percent of Seismic Activity')+
theme(plot.title = element_text(hjust = 0.5))

ggplot(earthquake, aes('',depth, color = type))+geom_boxplot()+
scale_y_continuous(limits = c(0, 100))+theme_bw()+
xlab('Type of Seismic Activity')+
ggtitle('Depth of Seismic Activity')

ggplot(earthquake, aes('',mag, color = type))+geom_boxplot()+
scale_y_continuous(limits = c(0, 8))+theme_bw()+
xlab('Type of Seismic Activity')+
ggtitle('Magnitude of Seismic Activity')

ggplot(earthquake, aes('',hour, color = type))+geom_boxplot()+
scale_y_continuous(limits = c(0, 24))+theme_bw()+
xlab('Type of Seismic Activity')+
ggtitle('Time of Seismic Activity')

#Choosing the relevant columns
quake <- earthquake[,c(2,3,4,5,24,15)]
#Categorising the type
quake$type <- ifelse(quake$type == 'earthquake', 'Earthquake', 'Otherquake')
#quake[,c(1:5)] <- scale(quake[,c(1:5)])
#Converting our target column 'type' into a factor
quake$type <- factor(quake$type)
#Checking and removing any row having NA
colSums(is.na(quake))
## latitude longitude depth mag hour type
## 0 0 0 4 0 0
quake <- quake[!is.na(quake$mag),]
head(quake)
## # A tibble: 6 × 6
## latitude longitude depth mag hour type
## <dbl> <dbl> <dbl> <dbl> <int> <fct>
## 1 33.5 -117. 2.82 0.44 4 Earthquake
## 2 61.1 -152. 106. 1.1 4 Earthquake
## 3 38.8 -123. 1.16 0.73 4 Earthquake
## 4 65.0 -149. 8.6 1.6 4 Earthquake
## 5 37.3 -118. 6.9 0.7 4 Earthquake
## 6 35.1 -97.6 5 1.8 4 Earthquake
table(quake$type)
##
## Earthquake Otherquake
## 8005 152
# splitting the data between train and test
set.seed(1234)
indices = sample.split(quake$type, SplitRatio = 0.7)
train = quake[indices,]
test = quake[!(indices),]
head(train)
## # A tibble: 6 × 6
## latitude longitude depth mag hour type
## <dbl> <dbl> <dbl> <dbl> <int> <fct>
## 1 33.5 -117. 2.82 0.44 4 Earthquake
## 2 61.1 -152. 106. 1.1 4 Earthquake
## 3 38.8 -123. 1.16 0.73 4 Earthquake
## 4 65.0 -149. 8.6 1.6 4 Earthquake
## 5 35.1 -97.6 5 1.8 4 Earthquake
## 6 37.3 -117. 6.2 1.7 3 Earthquake
table(train$type)
##
## Earthquake Otherquake
## 5604 106
table(test$type)
##
## Earthquake Otherquake
## 2401 46
set.seed(1234)
#Using RBF Kernel
Model_RBF <- ksvm(type~ ., data = train, scale = FALSE, kernel = "rbfdot")
Eval_RBF <- predict(Model_RBF, test[,-6])
#confusion matrix - RBF Kernel
confusionMatrix(Eval_RBF,test$type)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Earthquake Otherquake
## Earthquake 2401 46
## Otherquake 0 0
##
## Accuracy : 0.9812
## 95% CI : (0.975, 0.9862)
## No Information Rate : 0.9812
## P-Value [Acc > NIR] : 0.5391
##
## Kappa : 0
##
## Mcnemar's Test P-Value : 0.00000000003247
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.9812
## Neg Pred Value : NaN
## Prevalence : 0.9812
## Detection Rate : 0.9812
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : Earthquake
##
set.seed(1234)
over <- ovun.sample(type~., data = train, method="over", N= 11640)$data
set.seed(1234)
#Using RBF Kernel
Model_RBF <- ksvm(type~ ., data = over, scale = FALSE, kernel = "rbfdot")
Eval_RBF <- predict(Model_RBF, test[,-6])
#confusion matrix - RBF Kernel
confusionMatrix(Eval_RBF,test$type)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Earthquake Otherquake
## Earthquake 2285 4
## Otherquake 116 42
##
## Accuracy : 0.951
## 95% CI : (0.9416, 0.9592)
## No Information Rate : 0.9812
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.3941
##
## Mcnemar's Test P-Value : <0.0000000000000002
##
## Sensitivity : 0.9517
## Specificity : 0.9130
## Pos Pred Value : 0.9983
## Neg Pred Value : 0.2658
## Prevalence : 0.9812
## Detection Rate : 0.9338
## Detection Prevalence : 0.9354
## Balanced Accuracy : 0.9324
##
## 'Positive' Class : Earthquake
##
#bins=seq(1, 8.0, by=1.0)
#palette = colorBin( palette="YlOrBr", domain=earthquake$mag, na.color="transparent", bins=bins)
#d=leaflet(earthquake) %>%
#addTiles() %>%
#setView( lng = 166.45, lat = 21, zoom = 1.25) %>%
# addProviderTiles(providers$Esri.WorldImagery) %>%
#addCircleMarkers(~longitude, ~latitude,
# fillColor = ~palette(mag), fillOpacity = 0.7, color="white", radius=3, stroke=FALSE,
# popup = paste("Place:", earthquake$place, "<br>",
# "Magnitude:", earthquake$mag, "<br>",
# # "Time:", earthquake$time, "<br>")) %>%
# addLegend( pal=palette, values=~mag, opacity=0.9, title = "Magnitude", position = "bottomright" )
#htmlwidgets::saveWidget(d, "d.html")
#display_html('<iframe src="d.html" width=100% height=450></iframe>')
#pal <- colorFactor(palette = "Accent",domain = earthquake$type)
#e=leaflet(earthquake) %>%
# addTiles() %>%
#setView( lng = -119.417931, lat = 50.778259, zoom = 3) %>%
# addProviderTiles(providers$CartoDB.Positron) %>%
#addCircleMarkers(~longitude, ~latitude,
# fillColor = ~pal(type), fillOpacity = 0.5, color="white", radius=3, stroke=FALSE,
# popup = paste("Place:", earthquake$place, "<br>",
# "Magnitude:", earthquake$mag, "<br>",
# "Time:", earthquake$time, "<br>",
#"Type:", earthquake$type, "<br>")) %>%
# addLegend( pal=pal, values=~type, opacity=0.9, title = "Type", position = "bottomright" )
#htmlwidgets::saveWidget(e, "e.html")
#display_html('<iframe src="e.html" width=100% height=450></iframe>')