a13_jmhartwell2

library(randomForest)

## Warning: package 'randomForest' was built under R version 4.0.5

## randomForest 4.6-14

## Type rfNews() to see new features/changes/bug fixes.

library(pROC)

## Warning: package 'pROC' was built under R version 4.0.5

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

library(raster)

## Warning: package 'raster' was built under R version 4.0.5

## Loading required package: sp

## Warning: package 'sp' was built under R version 4.0.4

library(rgdal)

## Warning: package 'rgdal' was built under R version 4.0.5

## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/jmhp2/OneDrive/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/jmhp2/OneDrive/Documents/R/win-library/4.0/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was C:/Users/jmhp2/OneDrive/Documents/R/win-library/4.0/rgdal/proj

library(tmap)

## Warning: package 'tmap' was built under R version 4.0.5

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.5

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:randomForest':
## 
##     margin

library(caret)

## Loading required package: lattice

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.0.5

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:raster':
## 
##     intersect, select, union

## The following object is masked from 'package:randomForest':
## 
##     combine

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(sf)

## Warning: package 'sf' was built under R version 4.0.5

## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1

library(Metrics)

## Warning: package 'Metrics' was built under R version 4.0.5

## 
## Attaching package: 'Metrics'

## The following objects are masked from 'package:caret':
## 
##     precision, recall

## The following object is masked from 'package:pROC':
## 
##     auc

library(car)

## Warning: package 'car' was built under R version 4.0.5

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(gvlma)
library(spdep)

## Warning: package 'spdep' was built under R version 4.0.5

## Loading required package: spData

## Warning: package 'spData' was built under R version 4.0.5

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

library(spgwr)

## Warning: package 'spgwr' was built under R version 4.0.5

## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)

library(ModelMetrics)

## 
## Attaching package: 'ModelMetrics'

## The following objects are masked from 'package:Metrics':
## 
##     auc, ce, logLoss, mae, mse, msle, precision, recall, rmse, rmsle

## The following objects are masked from 'package:caret':
## 
##     confusionMatrix, precision, recall, sensitivity, specificity

## The following object is masked from 'package:pROC':
## 
##     auc

## The following object is masked from 'package:base':
## 
##     kappa

lsm <- read.csv("C:/Users/jmhp2/Downloads/slope_failure_pred2/slope_failure_pred2/lsm_data2.csv")

#Data Preparation.

names(stack) <- c("slp","sp21","sp11","sp7","rph21","rph11","rph7","diss21","diss11","diss7","slpmn21","slpmn11","slpmn7","sei","hli","asp_lin","sar","ssr21","ssr11","ssr7","crossc21","crossc11","crossc7","planc21","planc11","planc7","proc21","proc11","proc7","longc21","longc11","longc7","us_dist","state_dist","local_dist","strm_dist","strm_cost","us_cost","state_cost","local_cost","rktyp","steve","unit","dspm","drain")

• Define the “steve”, “dspm”, and “drain” columns as factors.

lsm$steve <- as.factor(lsm$steve)
lsm$dspm <- as.factor(lsm$dspm)
lsm$drain <- as.factor(lsm$drain)

• Extract 250 slope failure (“slopeD”) and 250 pseudo-absence (“not”) samples for validation.

• Extract the remaining 1,500 slope failure (“slopeD”) and 1,500 pseudo-absence (“not”) samples for training. These should not overlap with the validation samples.

test <- lsm %>% group_by(class) %>% sample_n(250)
train <- setdiff(lsm, test)
test <- as.data.frame(test)
train <- as.data.frame(train)

#Training Data.

#All. 
set.seed(42)
#rf_model <- randomForest(y= train[,1], train[,2:44], ntree=501, importance=T, confusion=T, err.rate=T)
#rf_model
#importance(rf_model)


#Will not compute rf_model on my computer but when I sent over the files to Jeff, it worked for him. I do not know how to fix this error.

#Terrain Only. 
set.seed(42)
##rf_model_t <- randomForest(y= train[,1], train[,2:33], ntree=501, importance=T, confusion=T, err.rate=T)
#rf_model_t
#importance(rf_model_t)

#Will not compute rf_model on my computer but when I sent over the files to Jeff, it worked for him. I do not know how to fix this error.

#Litholopgy and soil.
set.seed(42)
#rf_model_ls <- randomForest(y= train[,1], train[,42:44], ntree=501, importance=T, confusion=T, err.rate=T)
#rf_model_ls
#importance(rf_model_ls)

#Will not compute rf_model on my computer but when I sent over the files to Jeff, it worked for him. I do not know how to fix this error.

#Distance only.
set.seed(42)
#rf_model_d <- randomForest(y= train[,1], train[,34:41], ntree=501, importance=T, confusion=T, err.rate=T)
#rf_model_d
#importance(rf_model_d)

#Will not compute rf_model on my computer but when I sent over the files to Jeff, it worked for him. I do not know how to fix this error.

#All Non-Terrain. 
set.seed(42)
#rf_model_nt <- randomForest(y= train[,1], train[,34:44], ntree=501, importance=T, confusion=T, err.rate=T)
#rf_model_nt
#importance(rf_model_nt)

#Data Validation.

#Predict the validation data with all the models.

#All.
#pred_test <- predict(rf_model, test, index=2, type="prob", norm.votes=TRUE, predict.all=FALSE, proximity=FALSE, nodes=FALSE)
#head(pred_test)

#pred_test <- data.frame(pred_test)
#pred_test_roc <- roc(test$class, pred_test$slopeD)
#auc(pred_test_roc)
#plot(pred_test_roc)

#ggroc(pred_test_roc, lwd=1.2, col="blue")+
#geom_abline(intercept = 1, slope = 1, color = "red", linetype = "dashed", lwd=1.2)

## code is same as Jeff's, though will not compute on my computer but will for him.

#Terrain Only. 
#pred_test_t <- predict(rf_model_t, test, index=2, type="prob", norm.votes=TRUE, predict.all=FALSE, proximity=FALSE, nodes=FALSE)
#head(pred_test_t)

#pred_test_t <- data.frame(pred_test_t)
#pred_test_t_roc <- roc(test$class, pred_test_t$slopeD)
#auc(pred_test_roc)
#plot(pred_test_t_roc)

#ggroc(pred_test_t_roc, lwd=1.2, col="blue")+
#geom_abline(intercept = 1, slope = 1, color = "red", linetype = "dashed", lwd=1.2)

# code worked for Jeff when ran on his computer, though my laptop will not run code.

#Litholopgy and soil.
#pred_test_ls <- predict(rf_model_ls, test, index=2, type="prob", norm.votes=TRUE, predict.all=FALSE, proximity=FALSE, nodes=FALSE)
#head(pred_test_ls)

#pred_test_ls <- data.frame(pred_test_ls)
#pred_test_ls_roc <- roc(test$class, pred_test_ls$slopeD)
#auc(pred_test_li_roc)
#plot(pred_test_ls_roc)

#ggroc(pred_test_ls_roc, lwd=1.2, col="blue")+
#geom_abline(intercept = 1, slope = 1, color = "red", linetype = "dashed", lwd=1.2)

##code worked for Jeff when ran on his computer, though my laptop will not run code.

#Distance.
#pred_test_d <- predict(rf_model_d, test, index=2, type="prob", norm.votes=TRUE, predict.all=FALSE, proximity=FALSE, nodes=FALSE)
#head(pred_test)

#pred_test_d <- data.frame(pred_test_d)
#pred_test_d_roc <- roc(test$class, pred_test_d$slopeD)
#auc(pred_test_d_roc)
#plot(pred_test_d_roc)

#ggroc(pred_test_d_roc, lwd=1.2, col="blue")+
#geom_abline(intercept = 1, slope = 1, color = "red", linetype = "dashed", lwd=1.2)

#Non-Terrain.
#pred_test_nt <- predict(rf_model_nt, test, index=2, type="prob", norm.votes=TRUE, predict.all=FALSE, proximity=FALSE, nodes=FALSE)
#head(pred_test_nt)

#pred_test_nt <- data.frame(pred_test_nt)
#pred_test_nt_roc <- roc(test$class, pred_test_nt$slopeD)
#auc(pred_test_nt_roc)
#plot(pred_test_nt_roc)

#ggroc(pred_test_nt_roc, lwd=1.2, col="blue")+
#geom_abline(intercept = 1, slope = 1, color = "red", linetype = "dashed", lwd=1.2)

#code worked for Jeff when ran on his computer, though my laptop will not run code.

#Create a ROC plot containing all ROC curves for comparison.

#ggroc(list(All=pred_test_roc, Terrain=pred_test_t_roc, LitholopgySoil=pred_test_ls_roc, Distance=pred_test_d_roc, NonTerrain=pred_test_nt_roc), lwd=1.2)+
#geom_abline(intercept = 1, slope = 1, color = "red", linetype = "dashed", lwd=1.2)+
#scale_color_manual(labels = c("All Variables", "Terrain Variables", "Litholopgy and Soil Variables", "Distance Variables", "Non-Terrain Variables"), values= c("", "#D65076", "#EFC050"))+
#Could not figure out the 5 values required to run function.
#ggtitle("ROC Model Comparison")+
#labs(x="Specificity", y="Sensitivity")+
#theme(axis.text.y = element_text(size=12))+
#theme(axis.text.x = element_text(size=12))+
#theme(plot.title = element_text(face="bold", size=18))+
#theme(axis.title = element_text(size=14))+
#theme(strip.text = element_text(size = 14))+
#theme(legend.title=element_blank())

#Paragraph.

Reciever Operator Charcteristic (ROC) models are used to plot sensitivity of data (probability of predicting that a real positive will be a positive) against the specificity of data (probability of predicting a real negative will be a positive). From this combined graph we can clearly see the different curves of AUC (area under curve) values of oach of the models. Here we can clearly see that the AUC of the ‘lithology and soil’ model curve was the least fraudulent data and the most legitimate in thier data prediction. The least legitimate data models are that of ‘all’ & ‘terrain’ ROC curves.

#predict(stack, rf_model, type="prob", index=2, na.rm=TRUE, progress="window", overwrite=TRUE, filename("C:/Users/jmhp2/Downloads/slope_failure_pred2/slope_failure_pred2/stack.img"))

#attempted to follow through similar code as to the RF section of the WV view website. 
# Worked when I sent this all to Jeff and he ran it, though will not work on my computer. We both have the same code.

#pred_test <- predict(rf_model, test, index=2, type="prob", norm.votes=TRUE, predict.all=FALSE, proximity=FALSE, nodes=FALSE)
#head(pred_test)

# Worked when I sent this all to Jeff and he ran it, though will not work on my computer. We both have the same code.

#Read the result back in and display it using tmap. Make sure to provide a legend, title, and use an appropriate color palette.

raster_result <- raster("C:/Users/jmhp2/Downloads/slope_failure_pred2/slope_failure_pred2/stack.img")

#tm_shape(raster_result)+
#tm_raster(palette="Blues")+
#tm_layout(legend.outside = TRUE)+
#tm_layout(title = "Wetland Probability", title.size = 1.5)


## would work for me orginally, though after shutting down rstudio, it will not load something about a vector of "7.6 mb."

```

a13_jmhartwell2

Jacob Hartwell

4/30/2021