str(land)
## 'data.frame': 1212 obs. of 14 variables:
## $ X_id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Landslide : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Aspect : int 3 1 3 1 5 5 1 3 3 1 ...
## $ Curvature : int 3 5 4 3 4 5 3 5 2 4 ...
## $ Earthquake : int 2 2 3 3 2 2 2 3 3 3 ...
## $ Elevation : int 2 3 2 3 1 2 2 4 3 2 ...
## $ Flow : int 2 1 2 5 4 3 4 2 2 4 ...
## $ Lithology : int 1 1 4 1 1 1 1 2 6 1 ...
## $ NDVI : int 4 4 3 2 2 2 3 3 4 2 ...
## $ NDWI : int 2 2 2 4 4 4 4 3 2 4 ...
## $ Plan : int 2 5 4 3 3 5 3 5 2 4 ...
## $ Precipitation: int 3 5 5 5 3 3 5 5 5 5 ...
## $ Profile : int 3 2 2 3 1 2 3 2 4 2 ...
## $ Slope : int 2 2 2 3 4 2 2 4 4 4 ...
describe(land)
## vars n mean sd median trimmed mad min max range
## X_id 1 1212 606.50 350.02 606.5 606.50 449.23 1 1212 1211
## Landslide 2 1212 0.50 0.50 0.5 0.50 0.74 0 1 1
## Aspect 3 1212 2.96 1.15 3.0 2.95 1.48 1 5 4
## Curvature 4 1212 2.98 1.10 3.0 2.99 1.48 1 5 4
## Earthquake 5 1212 2.10 0.67 2.0 2.13 0.00 1 3 2
## Elevation 6 1212 2.44 1.24 2.0 2.35 1.48 1 5 4
## Flow 7 1212 2.34 1.11 2.0 2.21 1.48 1 5 4
## Lithology 8 1212 1.95 1.42 1.0 1.68 0.00 1 6 5
## NDVI 9 1212 3.04 1.24 3.0 3.05 1.48 1 5 4
## NDWI 10 1212 2.77 1.30 3.0 2.72 1.48 1 5 4
## Plan 11 1212 3.06 1.06 3.0 3.06 1.48 1 5 4
## Precipitation 12 1212 3.81 1.35 4.0 4.01 1.48 1 5 4
## Profile 13 1212 3.26 1.04 3.0 3.27 1.48 1 5 4
## Slope 14 1212 2.81 1.19 3.0 2.79 1.48 1 5 4
## skew kurtosis se
## X_id 0.00 -1.20 10.05
## Landslide 0.00 -2.00 0.01
## Aspect 0.03 -0.63 0.03
## Curvature -0.01 -0.66 0.03
## Earthquake -0.12 -0.79 0.02
## Elevation 0.41 -0.95 0.04
## Flow 0.85 0.11 0.03
## Lithology 1.35 0.83 0.04
## NDVI -0.18 -1.00 0.04
## NDWI 0.27 -1.06 0.04
## Plan -0.08 -0.54 0.03
## Precipitation -0.84 -0.57 0.04
## Profile -0.19 -0.50 0.03
## Slope 0.05 -0.96 0.03
summary(land)
## X_id Landslide Aspect Curvature Earthquake
## Min. : 1.0 Min. :0.0 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.: 303.8 1st Qu.:0.0 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median : 606.5 Median :0.5 Median :3.000 Median :3.000 Median :2.000
## Mean : 606.5 Mean :0.5 Mean :2.962 Mean :2.978 Mean :2.102
## 3rd Qu.: 909.2 3rd Qu.:1.0 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :1212.0 Max. :1.0 Max. :5.000 Max. :5.000 Max. :3.000
## Elevation Flow Lithology NDVI
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :1.000 Median :3.000
## Mean :2.436 Mean :2.338 Mean :1.949 Mean :3.043
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :6.000 Max. :5.000
## NDWI Plan Precipitation Profile
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:3.000
## Median :3.000 Median :3.000 Median :4.000 Median :3.000
## Mean :2.774 Mean :3.059 Mean :3.814 Mean :3.262
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Slope
## Min. :1.000
## 1st Qu.:2.000
## Median :3.000
## Mean :2.812
## 3rd Qu.:4.000
## Max. :5.000
#check missing valeus
colSums(is.na(land))
## X_id Landslide Aspect Curvature Earthquake
## 0 0 0 0 0
## Elevation Flow Lithology NDVI NDWI
## 0 0 0 0 0
## Plan Precipitation Profile Slope
## 0 0 0 0
#check for duplicate
sum(duplicated(land))
## [1] 0
#correlation
ggcorrplot(cor(land[,1:14]), type = "lower", lab = T, title = "Correlation")
#Modeling
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.2.3
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:psych':
##
## outlier
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.2.3
library(caret)
## Warning: package 'caret' was built under R version 4.2.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following objects are masked from 'package:Metrics':
##
## precision, recall
## The following object is masked from 'package:purrr':
##
## lift
earth_lm <- lm(Earthquake ~., data = land)
summary(earth_lm)
##
## Call:
## lm(formula = Earthquake ~ ., data = land)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78235 -0.31282 0.02293 0.33869 0.82872
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.630e-01 1.901e-01 1.910 0.056427 .
## X_id -1.900e-05 6.524e-05 -0.291 0.770987
## Landslide -3.804e-02 4.900e-02 -0.776 0.437784
## Aspect 7.121e-03 9.486e-03 0.751 0.452980
## Curvature -1.434e-02 2.874e-02 -0.499 0.617856
## Elevation 7.071e-03 1.066e-02 0.663 0.507260
## Flow -1.802e-02 1.122e-02 -1.606 0.108497
## Lithology 3.129e-02 8.397e-03 3.727 0.000203 ***
## NDVI -1.119e-02 2.452e-02 -0.456 0.648328
## NDWI 1.859e-02 2.435e-02 0.764 0.445271
## Plan 3.417e-02 2.020e-02 1.692 0.090927 .
## Precipitation 4.130e-01 8.738e-03 47.268 < 2e-16 ***
## Profile -3.165e-03 2.024e-02 -0.156 0.875761
## Slope 2.436e-02 9.945e-03 2.450 0.014432 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.371 on 1198 degrees of freedom
## Multiple R-squared: 0.6965, Adjusted R-squared: 0.6932
## F-statistic: 211.5 on 13 and 1198 DF, p-value: < 2.2e-16
earth_lm_best <- step(earth_lm, direction = "both")
## Start: AIC=-2389.59
## Earthquake ~ X_id + Landslide + Aspect + Curvature + Elevation +
## Flow + Lithology + NDVI + NDWI + Plan + Precipitation + Profile +
## Slope
##
## Df Sum of Sq RSS AIC
## - Profile 1 0.003 164.90 -2391.6
## - X_id 1 0.012 164.91 -2391.5
## - NDVI 1 0.029 164.92 -2391.4
## - Curvature 1 0.034 164.93 -2391.3
## - Elevation 1 0.061 164.96 -2391.2
## - Aspect 1 0.078 164.97 -2391.0
## - NDWI 1 0.080 164.98 -2391.0
## - Landslide 1 0.083 164.98 -2391.0
## <none> 164.90 -2389.6
## - Flow 1 0.355 165.25 -2389.0
## - Plan 1 0.394 165.29 -2388.7
## - Slope 1 0.826 165.72 -2385.5
## - Lithology 1 1.911 166.81 -2377.6
## - Precipitation 1 307.532 472.43 -1115.9
##
## Step: AIC=-2391.57
## Earthquake ~ X_id + Landslide + Aspect + Curvature + Elevation +
## Flow + Lithology + NDVI + NDWI + Plan + Precipitation + Slope
##
## Df Sum of Sq RSS AIC
## - X_id 1 0.012 164.91 -2393.5
## - NDVI 1 0.029 164.93 -2393.4
## - Curvature 1 0.053 164.95 -2393.2
## - Elevation 1 0.063 164.96 -2393.1
## - Aspect 1 0.079 164.98 -2393.0
## - NDWI 1 0.081 164.98 -2393.0
## - Landslide 1 0.084 164.98 -2393.0
## <none> 164.90 -2391.6
## - Flow 1 0.357 165.26 -2390.9
## - Plan 1 0.470 165.37 -2390.1
## + Profile 1 0.003 164.90 -2389.6
## - Slope 1 0.838 165.74 -2387.4
## - Lithology 1 1.912 166.81 -2379.6
## - Precipitation 1 307.529 472.43 -1117.9
##
## Step: AIC=-2393.48
## Earthquake ~ Landslide + Aspect + Curvature + Elevation + Flow +
## Lithology + NDVI + NDWI + Plan + Precipitation + Slope
##
## Df Sum of Sq RSS AIC
## - NDVI 1 0.03 164.94 -2395.3
## - Curvature 1 0.05 164.96 -2395.1
## - Elevation 1 0.06 164.97 -2395.1
## - Aspect 1 0.08 164.99 -2394.9
## - NDWI 1 0.08 164.99 -2394.9
## <none> 164.91 -2393.5
## - Flow 1 0.36 165.27 -2392.9
## - Plan 1 0.48 165.39 -2392.0
## + X_id 1 0.01 164.90 -2391.6
## + Profile 1 0.00 164.91 -2391.5
## - Landslide 1 0.58 165.49 -2391.2
## - Slope 1 0.84 165.75 -2389.3
## - Lithology 1 1.90 166.82 -2381.6
## - Precipitation 1 338.10 503.01 -1043.9
##
## Step: AIC=-2395.27
## Earthquake ~ Landslide + Aspect + Curvature + Elevation + Flow +
## Lithology + NDWI + Plan + Precipitation + Slope
##
## Df Sum of Sq RSS AIC
## - Curvature 1 0.06 165.00 -2396.9
## - Elevation 1 0.06 165.00 -2396.8
## - Aspect 1 0.08 165.02 -2396.7
## <none> 164.94 -2395.3
## - Flow 1 0.37 165.31 -2394.5
## - Plan 1 0.48 165.42 -2393.8
## + NDVI 1 0.03 164.91 -2393.5
## + X_id 1 0.01 164.93 -2393.4
## + Profile 1 0.00 164.94 -2393.3
## - Landslide 1 0.55 165.49 -2393.2
## - Slope 1 0.87 165.81 -2390.9
## - NDWI 1 1.16 166.10 -2388.7
## - Lithology 1 1.91 166.85 -2383.3
## - Precipitation 1 338.14 503.08 -1045.7
##
## Step: AIC=-2396.86
## Earthquake ~ Landslide + Aspect + Elevation + Flow + Lithology +
## NDWI + Plan + Precipitation + Slope
##
## Df Sum of Sq RSS AIC
## - Elevation 1 0.05 165.05 -2398.5
## - Aspect 1 0.08 165.07 -2398.3
## <none> 165.00 -2396.9
## - Flow 1 0.37 165.36 -2396.2
## + Curvature 1 0.06 164.94 -2395.3
## - Landslide 1 0.51 165.51 -2395.1
## + NDVI 1 0.03 164.96 -2395.1
## + Profile 1 0.02 164.97 -2395.0
## + X_id 1 0.01 164.98 -2395.0
## - Plan 1 0.73 165.73 -2393.5
## - Slope 1 0.82 165.82 -2392.8
## - NDWI 1 1.19 166.18 -2390.2
## - Lithology 1 1.91 166.90 -2384.9
## - Precipitation 1 338.71 503.71 -1046.2
##
## Step: AIC=-2398.47
## Earthquake ~ Landslide + Aspect + Flow + Lithology + NDWI + Plan +
## Precipitation + Slope
##
## Df Sum of Sq RSS AIC
## - Aspect 1 0.07 165.12 -2399.9
## <none> 165.05 -2398.5
## - Flow 1 0.39 165.44 -2397.6
## + Elevation 1 0.05 165.00 -2396.9
## + Curvature 1 0.04 165.00 -2396.8
## + NDVI 1 0.04 165.01 -2396.8
## + Profile 1 0.01 165.03 -2396.6
## + X_id 1 0.01 165.04 -2396.5
## - Landslide 1 0.58 165.62 -2396.2
## - Plan 1 0.79 165.83 -2394.7
## - Slope 1 1.09 166.14 -2392.5
## - NDWI 1 1.34 166.39 -2390.7
## - Lithology 1 2.47 167.52 -2382.5
## - Precipitation 1 339.24 504.29 -1046.8
##
## Step: AIC=-2399.95
## Earthquake ~ Landslide + Flow + Lithology + NDWI + Plan + Precipitation +
## Slope
##
## Df Sum of Sq RSS AIC
## <none> 165.12 -2399.9
## - Flow 1 0.41 165.53 -2399.0
## + Aspect 1 0.07 165.05 -2398.5
## + Elevation 1 0.05 165.07 -2398.3
## + Curvature 1 0.04 165.08 -2398.3
## + NDVI 1 0.04 165.08 -2398.2
## + Profile 1 0.01 165.11 -2398.0
## + X_id 1 0.01 165.11 -2398.0
## - Landslide 1 0.57 165.69 -2397.8
## - Plan 1 0.77 165.89 -2396.3
## - Slope 1 1.13 166.25 -2393.7
## - NDWI 1 1.50 166.62 -2391.0
## - Lithology 1 2.46 167.58 -2384.0
## - Precipitation 1 339.26 504.38 -1048.6
summary(earth_lm_best)
##
## Call:
## lm(formula = Earthquake ~ Landslide + Flow + Lithology + NDWI +
## Plan + Precipitation + Slope, data = land)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.77328 -0.31151 0.02506 0.33376 0.82892
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.292381 0.060641 4.821 1.61e-06 ***
## Landslide -0.048383 0.023696 -2.042 0.041388 *
## Flow -0.019196 0.011124 -1.726 0.084665 .
## Lithology 0.032942 0.007776 4.236 2.44e-05 ***
## NDWI 0.031545 0.009541 3.306 0.000974 ***
## Plan 0.024348 0.010247 2.376 0.017651 *
## Precipitation 0.413928 0.008322 49.737 < 2e-16 ***
## Slope 0.026419 0.009203 2.871 0.004168 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3703 on 1204 degrees of freedom
## Multiple R-squared: 0.6961, Adjusted R-squared: 0.6943
## F-statistic: 394 on 7 and 1204 DF, p-value: < 2.2e-16
par(mfrow = c(2,2))
plot(earth_lm_best)
earth_pred <- predict(earth_lm_best, land)
mse(land$Earthquake, earth_pred)
## [1] 0.136237
rmse(land$Earthquake, earth_pred)
## [1] 0.369103
R2(land$Earthquake, earth_pred)
## [1] 0.6960885
model has average r2 score
#Random Forest Model
rfmd <- randomForest(Earthquake ~., land, ntree= 100)
## Warning in randomForest.default(m, y, ...): The response has five or fewer
## unique values. Are you sure you want to do regression?
rfmd
##
## Call:
## randomForest(formula = Earthquake ~ ., data = land, ntree = 100)
## Type of random forest: regression
## Number of trees: 100
## No. of variables tried at each split: 4
##
## Mean of squared residuals: 0.0651664
## % Var explained: 85.46
earth_pred2 <- predict(rfmd, land)
mse(land$Earthquake, earth_pred2)
## [1] 0.01830075
rmse(land$Earthquake, earth_pred2)
## [1] 0.1352803
R2(land$Earthquake, earth_pred2)
## [1] 0.9630017
data_land_rfpred <- data.frame(actual = land$Earthquake, pred = earth_pred2)
head(data_land_rfpred)
## actual pred
## 1 2 2.036000
## 2 2 2.207667
## 3 3 2.865333
## 4 3 2.885000
## 5 2 2.078556
## 6 2 2.042167
R2 score is way better than before