library(tidyverse) 
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gghighlight)
library(leaflet)
library(IRdisplay)
options(scipen = 999)
options(warn = -1)
library(lubridate)
library(viridis)
## Loading required package: viridisLite
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
library(caTools)
library(kernlab)
## 
## Attaching package: 'kernlab'
## 
## The following object is masked from 'package:purrr':
## 
##     cross
## 
## The following object is masked from 'package:ggplot2':
## 
##     alpha
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(ROSE)
## Loaded ROSE 0.0-4
## Reading in files

list.files(path = "../input")
## character(0)
earthquake <- read_csv("all_month.csv")
## Rows: 8161 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (8): magType, net, id, place, type, status, locationSource, magSource
## dbl  (12): latitude, longitude, depth, mag, nst, gap, dmin, rms, horizontalE...
## dttm  (2): time, updated
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(earthquake)
## Rows: 8,161
## Columns: 22
## $ time            <dttm> 2019-03-11 04:30:18, 2019-03-11 04:24:26, 2019-03-11 …
## $ latitude        <dbl> 33.52100, 61.06940, 38.77333, 65.01810, 37.29130, 35.1…
## $ longitude       <dbl> -116.7945, -151.8329, -122.7435, -148.7292, -117.5088,…
## $ depth           <dbl> 2.82, 105.60, 1.16, 8.60, 6.90, 5.00, 6.20, 48.70, 2.5…
## $ mag             <dbl> 0.44, 1.10, 0.73, 1.60, 0.70, 1.80, 1.70, 1.90, 0.56, …
## $ magType         <chr> "ml", "ml", "md", "ml", "ml", "mb_lg", "ml", "ml", "md…
## $ nst             <dbl> 18, NA, 5, NA, 8, NA, 30, NA, 10, NA, NA, NA, NA, NA, …
## $ gap             <dbl> 53.00, NA, 213.00, NA, 176.36, 64.00, 93.14, NA, 77.00…
## $ dmin            <dbl> 0.009898, NA, 0.011220, NA, 0.375000, 0.219000, 0.1370…
## $ rms             <dbl> 0.1400, 0.2500, 0.0100, 0.5100, 0.2800, 0.5100, 0.3300…
## $ net             <chr> "ci", "ak", "nc", "ak", "nn", "us", "nn", "ak", "nc", …
## $ id              <chr> "ci38263479", "ak01937u587x", "nc73150441", "ak01937u2…
## $ updated         <dttm> 2019-03-11 04:33:55, 2019-03-11 04:27:53, 2019-03-11 …
## $ place           <chr> "11km NE of Aguanga, CA", "51km NW of Nikiski, Alaska"…
## $ type            <chr> "earthquake", "earthquake", "earthquake", "earthquake"…
## $ horizontalError <dbl> 0.25, NA, 1.55, NA, NA, 1.10, NA, NA, 0.42, 5.80, NA, …
## $ depthError      <dbl> 1.02, 1.50, 0.70, 0.20, 24.50, 1.70, 0.70, 1.50, 1.31,…
## $ magError        <dbl> 0.129, NA, 0.230, NA, NA, 0.255, NA, NA, NA, 0.080, NA…
## $ magNst          <dbl> 11, NA, 2, NA, NA, 4, NA, NA, 1, 15, NA, NA, 11, 60, 1…
## $ status          <chr> "automatic", "automatic", "automatic", "automatic", "a…
## $ locationSource  <chr> "ci", "ak", "nc", "ak", "nn", "us", "nn", "ak", "nc", …
## $ magSource       <chr> "ci", "ak", "nc", "ak", "nn", "us", "nn", "ak", "nc", …
head(earthquake)
## # A tibble: 6 × 22
##   time                latitude longitude  depth   mag magType   nst   gap
##   <dttm>                 <dbl>     <dbl>  <dbl> <dbl> <chr>   <dbl> <dbl>
## 1 2019-03-11 04:30:18     33.5    -117.    2.82  0.44 ml         18   53 
## 2 2019-03-11 04:24:26     61.1    -152.  106.    1.1  ml         NA   NA 
## 3 2019-03-11 04:22:42     38.8    -123.    1.16  0.73 md          5  213 
## 4 2019-03-11 04:13:42     65.0    -149.    8.6   1.6  ml         NA   NA 
## 5 2019-03-11 04:10:13     37.3    -118.    6.9   0.7  ml          8  176.
## 6 2019-03-11 04:04:12     35.1     -97.6   5     1.8  mb_lg      NA   64 
## # ℹ 14 more variables: dmin <dbl>, rms <dbl>, net <chr>, id <chr>,
## #   updated <dttm>, place <chr>, type <chr>, horizontalError <dbl>,
## #   depthError <dbl>, magError <dbl>, magNst <dbl>, status <chr>,
## #   locationSource <chr>, magSource <chr>
options(repr.plot.width=12, repr.plot.height=6)

ggplot(earthquake, aes(time, mag)) + 
geom_line(color = 'green')+ theme_bw()+ 
ggtitle("Earthquake Magnitude")+
xlab('Date')+
ylab('Magnitude')+
theme(plot.title = element_text(hjust = 0.5))

#Location
earthquake$location <- sub('.*,\\s*','', earthquake$place)

#Time of the day in 'Hour'
earthquake$hour <- ymd_hms(earthquake$time)
earthquake$hour <- hour(earthquake$hour)

#Visualizing the number of quakes that have happened at a particular time
earthquake %>% 
filter(!is.na(mag))%>%
group_by(hour)%>%
summarise(count = length(id),max_magnitude = max(mag))%>%
ggplot(aes(hour,count, color = max_magnitude))+geom_line()+
scale_color_viridis(direction = -1)+
scale_x_continuous(breaks=seq(0,23,1))+
xlab("Time of the day")+
ylab("Number of earthquakes")+
ggtitle("Number of quakes as per time of the day")+
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))

earthquake %>% 
group_by(location) %>% 
filter(!(is.na(mag)))%>%
summarise(Number_of_quakes = length(location), 
          Average_Magnitude = mean(mag))%>%
mutate(Percent = round(prop.table(Number_of_quakes)*100,2))%>%
arrange(desc(Number_of_quakes))%>% top_n(25, Number_of_quakes)
## # A tibble: 25 × 4
##    location    Number_of_quakes Average_Magnitude Percent
##    <chr>                  <int>             <dbl>   <dbl>
##  1 CA                      2643             1.02    32.4 
##  2 Alaska                  2551             1.72    31.3 
##  3 Nevada                   480             0.776    5.88
##  4 Utah                     480             1.39     5.88
##  5 Puerto Rico              269             2.37     3.3 
##  6 Hawaii                   241             1.83     2.95
##  7 Montana                  174             1.27     2.13
##  8 Washington               114             1.06     1.4 
##  9 Indonesia                102             4.60     1.25
## 10 California                86             0.980    1.05
## # ℹ 15 more rows
ggplot(earthquake, aes(mag,depth,color = hour))+
geom_jitter(alpha = 0.5)+
gghighlight(max(depth>200)| max(mag>4))+
scale_x_continuous(breaks=seq(0,9,1))+
scale_color_viridis(direction = -1)+
theme_bw()+
xlab('Magnitude')+
ylab('Depth')+
ggtitle('Magnitude Vs. Depth in Km')+
theme(plot.title = element_text(hjust = 0.5))

earthquake %>%
group_by(type)%>%
summarise(count = length(type))%>%
mutate(Percent = prop.table(count*100))%>%
ggplot(aes(type,Percent, fill = type))+
geom_col()+theme_bw()+
xlab('Type of Seismic Activity')+
ylab('Percent')+
ggtitle('Percent of Seismic Activity')+
theme(plot.title = element_text(hjust = 0.5))

ggplot(earthquake, aes('',depth, color = type))+geom_boxplot()+
scale_y_continuous(limits = c(0, 100))+theme_bw()+
xlab('Type of Seismic Activity')+
ggtitle('Depth of Seismic Activity')

ggplot(earthquake, aes('',mag, color = type))+geom_boxplot()+
scale_y_continuous(limits = c(0, 8))+theme_bw()+
xlab('Type of Seismic Activity')+
ggtitle('Magnitude of Seismic Activity')

ggplot(earthquake, aes('',hour, color = type))+geom_boxplot()+
scale_y_continuous(limits = c(0, 24))+theme_bw()+
xlab('Type of Seismic Activity')+
ggtitle('Time of Seismic Activity')

#Choosing the relevant columns
quake <- earthquake[,c(2,3,4,5,24,15)]

#Categorising the type
quake$type <- ifelse(quake$type == 'earthquake', 'Earthquake', 'Otherquake')

#quake[,c(1:5)] <- scale(quake[,c(1:5)])

#Converting our target column 'type' into a factor
quake$type <- factor(quake$type)

#Checking and removing any row having NA
colSums(is.na(quake))
##  latitude longitude     depth       mag      hour      type 
##         0         0         0         4         0         0
quake <- quake[!is.na(quake$mag),]
head(quake)
## # A tibble: 6 × 6
##   latitude longitude  depth   mag  hour type      
##      <dbl>     <dbl>  <dbl> <dbl> <int> <fct>     
## 1     33.5    -117.    2.82  0.44     4 Earthquake
## 2     61.1    -152.  106.    1.1      4 Earthquake
## 3     38.8    -123.    1.16  0.73     4 Earthquake
## 4     65.0    -149.    8.6   1.6      4 Earthquake
## 5     37.3    -118.    6.9   0.7      4 Earthquake
## 6     35.1     -97.6   5     1.8      4 Earthquake
table(quake$type)
## 
## Earthquake Otherquake 
##       8005        152
# splitting the data between train and test
set.seed(1234)

indices = sample.split(quake$type, SplitRatio = 0.7)

train = quake[indices,]

test = quake[!(indices),]

head(train)
## # A tibble: 6 × 6
##   latitude longitude  depth   mag  hour type      
##      <dbl>     <dbl>  <dbl> <dbl> <int> <fct>     
## 1     33.5    -117.    2.82  0.44     4 Earthquake
## 2     61.1    -152.  106.    1.1      4 Earthquake
## 3     38.8    -123.    1.16  0.73     4 Earthquake
## 4     65.0    -149.    8.6   1.6      4 Earthquake
## 5     35.1     -97.6   5     1.8      4 Earthquake
## 6     37.3    -117.    6.2   1.7      3 Earthquake
table(train$type)
## 
## Earthquake Otherquake 
##       5604        106
table(test$type)
## 
## Earthquake Otherquake 
##       2401         46
set.seed(1234)
#Using RBF Kernel
Model_RBF <- ksvm(type~ ., data = train, scale = FALSE, kernel = "rbfdot")
Eval_RBF <- predict(Model_RBF, test[,-6])

  #confusion matrix - RBF Kernel
  confusionMatrix(Eval_RBF,test$type)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   Earthquake Otherquake
##   Earthquake       2401         46
##   Otherquake          0          0
##                                           
##                Accuracy : 0.9812          
##                  95% CI : (0.975, 0.9862) 
##     No Information Rate : 0.9812          
##     P-Value [Acc > NIR] : 0.5391          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : 0.00000000003247
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.9812          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.9812          
##          Detection Rate : 0.9812          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class : Earthquake      
## 
set.seed(1234)
over <- ovun.sample(type~., data = train, method="over", N= 11640)$data
set.seed(1234)
#Using RBF Kernel
Model_RBF <- ksvm(type~ ., data = over, scale = FALSE, kernel = "rbfdot")
Eval_RBF <- predict(Model_RBF, test[,-6])

  #confusion matrix - RBF Kernel
  confusionMatrix(Eval_RBF,test$type)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   Earthquake Otherquake
##   Earthquake       2285          4
##   Otherquake        116         42
##                                              
##                Accuracy : 0.951              
##                  95% CI : (0.9416, 0.9592)   
##     No Information Rate : 0.9812             
##     P-Value [Acc > NIR] : 1                  
##                                              
##                   Kappa : 0.3941             
##                                              
##  Mcnemar's Test P-Value : <0.0000000000000002
##                                              
##             Sensitivity : 0.9517             
##             Specificity : 0.9130             
##          Pos Pred Value : 0.9983             
##          Neg Pred Value : 0.2658             
##              Prevalence : 0.9812             
##          Detection Rate : 0.9338             
##    Detection Prevalence : 0.9354             
##       Balanced Accuracy : 0.9324             
##                                              
##        'Positive' Class : Earthquake         
## 
#bins=seq(1, 8.0, by=1.0)
#palette = colorBin( palette="YlOrBr", domain=earthquake$mag, na.color="transparent", bins=bins)


#d=leaflet(earthquake) %>% 
  #addTiles()  %>% 
  #setView( lng = 166.45, lat = 21, zoom = 1.25) %>%
 # addProviderTiles(providers$Esri.WorldImagery) %>%
  #addCircleMarkers(~longitude, ~latitude, 
   # fillColor = ~palette(mag), fillOpacity = 0.7, color="white", radius=3, stroke=FALSE,
   #  popup = paste("Place:", earthquake$place, "<br>",
        #    "Magnitude:", earthquake$mag, "<br>",
  #         # "Time:", earthquake$time, "<br>")) %>%
 # addLegend( pal=palette, values=~mag, opacity=0.9, title = "Magnitude", position = "bottomright" )


#htmlwidgets::saveWidget(d, "d.html")
#display_html('<iframe src="d.html" width=100% height=450></iframe>')
#pal <- colorFactor(palette = "Accent",domain = earthquake$type)

#e=leaflet(earthquake) %>% 
 # addTiles()  %>% 
  #setView( lng = -119.417931, lat = 50.778259, zoom = 3) %>%
 # addProviderTiles(providers$CartoDB.Positron) %>%
  #addCircleMarkers(~longitude, ~latitude, 
  #  fillColor = ~pal(type), fillOpacity = 0.5, color="white", radius=3, stroke=FALSE,
   #  popup = paste("Place:", earthquake$place, "<br>",
 #           "Magnitude:", earthquake$mag, "<br>",
          #  "Time:", earthquake$time, "<br>",
            #"Type:", earthquake$type, "<br>")) %>%
#  addLegend( pal=pal, values=~type, opacity=0.9, title = "Type", position = "bottomright" )


#htmlwidgets::saveWidget(e, "e.html")
#display_html('<iframe src="e.html" width=100% height=450></iframe>')