HW11

#install.packages("rpart")

library(tidyverse)
library(rpart)
library(caret)
#library(skimr)
library(randomForest)
library(neuralnet)
library(rattle)
library(nnet)

train_raw <- read.csv2("train.csv",sep = ",", stringsAsFactors= TRUE)
test_raw  <- read.csv2("test.csv",sep = ",", stringsAsFactors= TRUE)
# dim(train_raw) 
# dim(test_raw)
# This is useful to look at data, from skimr package. Doesn't render in latex though. 
# skim(train_raw)

# Functions to  replace NAs with most frequent level or median 
replace_na_most  <- function(x){
fct_explicit_na(x,na_level= names(which.max(table(x)))) 
}
replace_na_med<-function(x){
x[is.na(x)] <- median(x,na.rm =TRUE) 
x
}
cleanup_minimal <- function(data){ 
nomis <- data %>%mutate_if(is.factor,replace_na_most)%>% mutate_if(is.numeric, replace_na_med) 
nomis 
}

train_minclean<-cleanup_minimal(train_raw) 
test_minclean <-cleanup_minimal(test_raw)

mod_rpart <- rpart(SalePrice~., data=train_minclean) 
# Try this command to make a nice tree plot!
# fancyRpartPlot(mod_rpart, caption =   NULL)

pred_rpart<-predict(mod_rpart,newdata =test_minclean) 
submission_rpart<-tibble(Id=test_raw$Id,SalePrice=pred_rpart) 
head(submission_rpart)

## # A tibble: 6 x 2
##      Id SalePrice
##   <int>     <dbl>
## 1  1461   118199.
## 2  1462   151246.
## 3  1463   185210.
## 4  1464   185210.
## 5  1465   249392.
## 6  1466   185210.

# Obviously, your  file  path  might   be different   here:
write_csv(submission_rpart, file="submission_rpart.csv")

# Training a random forest
mod_rf<- randomForest(SalePrice ~ .,data=train_minclean) 
# This thread gave me the solution:
trainX <- select(train_minclean, -SalePrice)
test_minclean <- rbind(trainX[1,] ,test_minclean) 
test_minclean <- test_minclean[-1,]
# Get mypredictions:
pred_rf <-predict(mod_rf, newdata  = test_minclean) 
submission_rf <- tibble(Id=test_raw$Id, SalePrice=pred_rf) 
write_csv(submission_rf, file="submission_rf.csv")

HW11

Harry O

2023-04-28