str(land)
## 'data.frame':    1212 obs. of  14 variables:
##  $ X_id         : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Landslide    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Aspect       : int  3 1 3 1 5 5 1 3 3 1 ...
##  $ Curvature    : int  3 5 4 3 4 5 3 5 2 4 ...
##  $ Earthquake   : int  2 2 3 3 2 2 2 3 3 3 ...
##  $ Elevation    : int  2 3 2 3 1 2 2 4 3 2 ...
##  $ Flow         : int  2 1 2 5 4 3 4 2 2 4 ...
##  $ Lithology    : int  1 1 4 1 1 1 1 2 6 1 ...
##  $ NDVI         : int  4 4 3 2 2 2 3 3 4 2 ...
##  $ NDWI         : int  2 2 2 4 4 4 4 3 2 4 ...
##  $ Plan         : int  2 5 4 3 3 5 3 5 2 4 ...
##  $ Precipitation: int  3 5 5 5 3 3 5 5 5 5 ...
##  $ Profile      : int  3 2 2 3 1 2 3 2 4 2 ...
##  $ Slope        : int  2 2 2 3 4 2 2 4 4 4 ...
describe(land)
##               vars    n   mean     sd median trimmed    mad min  max range
## X_id             1 1212 606.50 350.02  606.5  606.50 449.23   1 1212  1211
## Landslide        2 1212   0.50   0.50    0.5    0.50   0.74   0    1     1
## Aspect           3 1212   2.96   1.15    3.0    2.95   1.48   1    5     4
## Curvature        4 1212   2.98   1.10    3.0    2.99   1.48   1    5     4
## Earthquake       5 1212   2.10   0.67    2.0    2.13   0.00   1    3     2
## Elevation        6 1212   2.44   1.24    2.0    2.35   1.48   1    5     4
## Flow             7 1212   2.34   1.11    2.0    2.21   1.48   1    5     4
## Lithology        8 1212   1.95   1.42    1.0    1.68   0.00   1    6     5
## NDVI             9 1212   3.04   1.24    3.0    3.05   1.48   1    5     4
## NDWI            10 1212   2.77   1.30    3.0    2.72   1.48   1    5     4
## Plan            11 1212   3.06   1.06    3.0    3.06   1.48   1    5     4
## Precipitation   12 1212   3.81   1.35    4.0    4.01   1.48   1    5     4
## Profile         13 1212   3.26   1.04    3.0    3.27   1.48   1    5     4
## Slope           14 1212   2.81   1.19    3.0    2.79   1.48   1    5     4
##                skew kurtosis    se
## X_id           0.00    -1.20 10.05
## Landslide      0.00    -2.00  0.01
## Aspect         0.03    -0.63  0.03
## Curvature     -0.01    -0.66  0.03
## Earthquake    -0.12    -0.79  0.02
## Elevation      0.41    -0.95  0.04
## Flow           0.85     0.11  0.03
## Lithology      1.35     0.83  0.04
## NDVI          -0.18    -1.00  0.04
## NDWI           0.27    -1.06  0.04
## Plan          -0.08    -0.54  0.03
## Precipitation -0.84    -0.57  0.04
## Profile       -0.19    -0.50  0.03
## Slope          0.05    -0.96  0.03
summary(land)
##       X_id          Landslide       Aspect        Curvature       Earthquake   
##  Min.   :   1.0   Min.   :0.0   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 303.8   1st Qu.:0.0   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median : 606.5   Median :0.5   Median :3.000   Median :3.000   Median :2.000  
##  Mean   : 606.5   Mean   :0.5   Mean   :2.962   Mean   :2.978   Mean   :2.102  
##  3rd Qu.: 909.2   3rd Qu.:1.0   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:3.000  
##  Max.   :1212.0   Max.   :1.0   Max.   :5.000   Max.   :5.000   Max.   :3.000  
##    Elevation          Flow         Lithology          NDVI      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:2.000  
##  Median :2.000   Median :2.000   Median :1.000   Median :3.000  
##  Mean   :2.436   Mean   :2.338   Mean   :1.949   Mean   :3.043  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :6.000   Max.   :5.000  
##       NDWI            Plan       Precipitation      Profile     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :3.000   Median :3.000   Median :4.000   Median :3.000  
##  Mean   :2.774   Mean   :3.059   Mean   :3.814   Mean   :3.262  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##      Slope      
##  Min.   :1.000  
##  1st Qu.:2.000  
##  Median :3.000  
##  Mean   :2.812  
##  3rd Qu.:4.000  
##  Max.   :5.000
#check missing valeus
colSums(is.na(land))
##          X_id     Landslide        Aspect     Curvature    Earthquake 
##             0             0             0             0             0 
##     Elevation          Flow     Lithology          NDVI          NDWI 
##             0             0             0             0             0 
##          Plan Precipitation       Profile         Slope 
##             0             0             0             0
#check for duplicate
sum(duplicated(land))
## [1] 0
#correlation
ggcorrplot(cor(land[,1:14]), type = "lower", lab = T, title = "Correlation")

#Modeling

library(randomForest)
## Warning: package 'randomForest' was built under R version 4.2.3
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:psych':
## 
##     outlier
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.2.3
library(caret)
## Warning: package 'caret' was built under R version 4.2.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following objects are masked from 'package:Metrics':
## 
##     precision, recall
## The following object is masked from 'package:purrr':
## 
##     lift
earth_lm <- lm(Earthquake ~., data = land)
summary(earth_lm)
## 
## Call:
## lm(formula = Earthquake ~ ., data = land)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78235 -0.31282  0.02293  0.33869  0.82872 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.630e-01  1.901e-01   1.910 0.056427 .  
## X_id          -1.900e-05  6.524e-05  -0.291 0.770987    
## Landslide     -3.804e-02  4.900e-02  -0.776 0.437784    
## Aspect         7.121e-03  9.486e-03   0.751 0.452980    
## Curvature     -1.434e-02  2.874e-02  -0.499 0.617856    
## Elevation      7.071e-03  1.066e-02   0.663 0.507260    
## Flow          -1.802e-02  1.122e-02  -1.606 0.108497    
## Lithology      3.129e-02  8.397e-03   3.727 0.000203 ***
## NDVI          -1.119e-02  2.452e-02  -0.456 0.648328    
## NDWI           1.859e-02  2.435e-02   0.764 0.445271    
## Plan           3.417e-02  2.020e-02   1.692 0.090927 .  
## Precipitation  4.130e-01  8.738e-03  47.268  < 2e-16 ***
## Profile       -3.165e-03  2.024e-02  -0.156 0.875761    
## Slope          2.436e-02  9.945e-03   2.450 0.014432 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.371 on 1198 degrees of freedom
## Multiple R-squared:  0.6965, Adjusted R-squared:  0.6932 
## F-statistic: 211.5 on 13 and 1198 DF,  p-value: < 2.2e-16
earth_lm_best <- step(earth_lm, direction = "both")
## Start:  AIC=-2389.59
## Earthquake ~ X_id + Landslide + Aspect + Curvature + Elevation + 
##     Flow + Lithology + NDVI + NDWI + Plan + Precipitation + Profile + 
##     Slope
## 
##                 Df Sum of Sq    RSS     AIC
## - Profile        1     0.003 164.90 -2391.6
## - X_id           1     0.012 164.91 -2391.5
## - NDVI           1     0.029 164.92 -2391.4
## - Curvature      1     0.034 164.93 -2391.3
## - Elevation      1     0.061 164.96 -2391.2
## - Aspect         1     0.078 164.97 -2391.0
## - NDWI           1     0.080 164.98 -2391.0
## - Landslide      1     0.083 164.98 -2391.0
## <none>                       164.90 -2389.6
## - Flow           1     0.355 165.25 -2389.0
## - Plan           1     0.394 165.29 -2388.7
## - Slope          1     0.826 165.72 -2385.5
## - Lithology      1     1.911 166.81 -2377.6
## - Precipitation  1   307.532 472.43 -1115.9
## 
## Step:  AIC=-2391.57
## Earthquake ~ X_id + Landslide + Aspect + Curvature + Elevation + 
##     Flow + Lithology + NDVI + NDWI + Plan + Precipitation + Slope
## 
##                 Df Sum of Sq    RSS     AIC
## - X_id           1     0.012 164.91 -2393.5
## - NDVI           1     0.029 164.93 -2393.4
## - Curvature      1     0.053 164.95 -2393.2
## - Elevation      1     0.063 164.96 -2393.1
## - Aspect         1     0.079 164.98 -2393.0
## - NDWI           1     0.081 164.98 -2393.0
## - Landslide      1     0.084 164.98 -2393.0
## <none>                       164.90 -2391.6
## - Flow           1     0.357 165.26 -2390.9
## - Plan           1     0.470 165.37 -2390.1
## + Profile        1     0.003 164.90 -2389.6
## - Slope          1     0.838 165.74 -2387.4
## - Lithology      1     1.912 166.81 -2379.6
## - Precipitation  1   307.529 472.43 -1117.9
## 
## Step:  AIC=-2393.48
## Earthquake ~ Landslide + Aspect + Curvature + Elevation + Flow + 
##     Lithology + NDVI + NDWI + Plan + Precipitation + Slope
## 
##                 Df Sum of Sq    RSS     AIC
## - NDVI           1      0.03 164.94 -2395.3
## - Curvature      1      0.05 164.96 -2395.1
## - Elevation      1      0.06 164.97 -2395.1
## - Aspect         1      0.08 164.99 -2394.9
## - NDWI           1      0.08 164.99 -2394.9
## <none>                       164.91 -2393.5
## - Flow           1      0.36 165.27 -2392.9
## - Plan           1      0.48 165.39 -2392.0
## + X_id           1      0.01 164.90 -2391.6
## + Profile        1      0.00 164.91 -2391.5
## - Landslide      1      0.58 165.49 -2391.2
## - Slope          1      0.84 165.75 -2389.3
## - Lithology      1      1.90 166.82 -2381.6
## - Precipitation  1    338.10 503.01 -1043.9
## 
## Step:  AIC=-2395.27
## Earthquake ~ Landslide + Aspect + Curvature + Elevation + Flow + 
##     Lithology + NDWI + Plan + Precipitation + Slope
## 
##                 Df Sum of Sq    RSS     AIC
## - Curvature      1      0.06 165.00 -2396.9
## - Elevation      1      0.06 165.00 -2396.8
## - Aspect         1      0.08 165.02 -2396.7
## <none>                       164.94 -2395.3
## - Flow           1      0.37 165.31 -2394.5
## - Plan           1      0.48 165.42 -2393.8
## + NDVI           1      0.03 164.91 -2393.5
## + X_id           1      0.01 164.93 -2393.4
## + Profile        1      0.00 164.94 -2393.3
## - Landslide      1      0.55 165.49 -2393.2
## - Slope          1      0.87 165.81 -2390.9
## - NDWI           1      1.16 166.10 -2388.7
## - Lithology      1      1.91 166.85 -2383.3
## - Precipitation  1    338.14 503.08 -1045.7
## 
## Step:  AIC=-2396.86
## Earthquake ~ Landslide + Aspect + Elevation + Flow + Lithology + 
##     NDWI + Plan + Precipitation + Slope
## 
##                 Df Sum of Sq    RSS     AIC
## - Elevation      1      0.05 165.05 -2398.5
## - Aspect         1      0.08 165.07 -2398.3
## <none>                       165.00 -2396.9
## - Flow           1      0.37 165.36 -2396.2
## + Curvature      1      0.06 164.94 -2395.3
## - Landslide      1      0.51 165.51 -2395.1
## + NDVI           1      0.03 164.96 -2395.1
## + Profile        1      0.02 164.97 -2395.0
## + X_id           1      0.01 164.98 -2395.0
## - Plan           1      0.73 165.73 -2393.5
## - Slope          1      0.82 165.82 -2392.8
## - NDWI           1      1.19 166.18 -2390.2
## - Lithology      1      1.91 166.90 -2384.9
## - Precipitation  1    338.71 503.71 -1046.2
## 
## Step:  AIC=-2398.47
## Earthquake ~ Landslide + Aspect + Flow + Lithology + NDWI + Plan + 
##     Precipitation + Slope
## 
##                 Df Sum of Sq    RSS     AIC
## - Aspect         1      0.07 165.12 -2399.9
## <none>                       165.05 -2398.5
## - Flow           1      0.39 165.44 -2397.6
## + Elevation      1      0.05 165.00 -2396.9
## + Curvature      1      0.04 165.00 -2396.8
## + NDVI           1      0.04 165.01 -2396.8
## + Profile        1      0.01 165.03 -2396.6
## + X_id           1      0.01 165.04 -2396.5
## - Landslide      1      0.58 165.62 -2396.2
## - Plan           1      0.79 165.83 -2394.7
## - Slope          1      1.09 166.14 -2392.5
## - NDWI           1      1.34 166.39 -2390.7
## - Lithology      1      2.47 167.52 -2382.5
## - Precipitation  1    339.24 504.29 -1046.8
## 
## Step:  AIC=-2399.95
## Earthquake ~ Landslide + Flow + Lithology + NDWI + Plan + Precipitation + 
##     Slope
## 
##                 Df Sum of Sq    RSS     AIC
## <none>                       165.12 -2399.9
## - Flow           1      0.41 165.53 -2399.0
## + Aspect         1      0.07 165.05 -2398.5
## + Elevation      1      0.05 165.07 -2398.3
## + Curvature      1      0.04 165.08 -2398.3
## + NDVI           1      0.04 165.08 -2398.2
## + Profile        1      0.01 165.11 -2398.0
## + X_id           1      0.01 165.11 -2398.0
## - Landslide      1      0.57 165.69 -2397.8
## - Plan           1      0.77 165.89 -2396.3
## - Slope          1      1.13 166.25 -2393.7
## - NDWI           1      1.50 166.62 -2391.0
## - Lithology      1      2.46 167.58 -2384.0
## - Precipitation  1    339.26 504.38 -1048.6
summary(earth_lm_best)
## 
## Call:
## lm(formula = Earthquake ~ Landslide + Flow + Lithology + NDWI + 
##     Plan + Precipitation + Slope, data = land)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77328 -0.31151  0.02506  0.33376  0.82892 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.292381   0.060641   4.821 1.61e-06 ***
## Landslide     -0.048383   0.023696  -2.042 0.041388 *  
## Flow          -0.019196   0.011124  -1.726 0.084665 .  
## Lithology      0.032942   0.007776   4.236 2.44e-05 ***
## NDWI           0.031545   0.009541   3.306 0.000974 ***
## Plan           0.024348   0.010247   2.376 0.017651 *  
## Precipitation  0.413928   0.008322  49.737  < 2e-16 ***
## Slope          0.026419   0.009203   2.871 0.004168 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3703 on 1204 degrees of freedom
## Multiple R-squared:  0.6961, Adjusted R-squared:  0.6943 
## F-statistic:   394 on 7 and 1204 DF,  p-value: < 2.2e-16
par(mfrow = c(2,2))
plot(earth_lm_best)

earth_pred <- predict(earth_lm_best, land)
mse(land$Earthquake, earth_pred)
## [1] 0.136237
rmse(land$Earthquake, earth_pred)
## [1] 0.369103
R2(land$Earthquake, earth_pred)
## [1] 0.6960885

model has average r2 score

#Random Forest Model

rfmd <- randomForest(Earthquake ~., land, ntree= 100)
## Warning in randomForest.default(m, y, ...): The response has five or fewer
## unique values.  Are you sure you want to do regression?
rfmd
## 
## Call:
##  randomForest(formula = Earthquake ~ ., data = land, ntree = 100) 
##                Type of random forest: regression
##                      Number of trees: 100
## No. of variables tried at each split: 4
## 
##           Mean of squared residuals: 0.0651664
##                     % Var explained: 85.46
earth_pred2 <- predict(rfmd, land)
mse(land$Earthquake, earth_pred2)
## [1] 0.01830075
rmse(land$Earthquake, earth_pred2)
## [1] 0.1352803
R2(land$Earthquake, earth_pred2)
## [1] 0.9630017
data_land_rfpred <- data.frame(actual = land$Earthquake, pred = earth_pred2)

head(data_land_rfpred)
##   actual     pred
## 1      2 2.036000
## 2      2 2.207667
## 3      3 2.865333
## 4      3 2.885000
## 5      2 2.078556
## 6      2 2.042167

R2 score is way better than before