suppressMessages(library(readr))
suppressMessages(library("FRESA.CAD"))
suppressMessages(library(dplyr))
#install.packages("FRESA.CAD")
#setwd("~/GitHub/Exploring_Learning_Algorithms_4_CADx/synthetic experiments'")
#Import the dataset and convert variables
pokedex <- read_csv("../datasets/pokedex.csv",col_types = cols(`#` = col_skip()))
class(pokedex) <- "data.frame"
colnames(pokedex) <- c("Name","Type1","Type2","Total","HP","Attack","Defense",
"Sp_Atk","Sp_Def","Speed","Generation","Legendary")
pokedex$Legendary <- 1*(pokedex$Legendary == TRUE)
#character as numeric
pokedex$Type1 <- as.numeric(as.factor(pokedex$Type1))
pokedex$Type2 <- as.numeric(as.factor(pokedex$Type2))
#eliminate the NA from type 2
pokedex$Type2[is.na(pokedex$Type2)] <- 99
#save(pokedex,file = "tidy_pokedex.Rdata")
#model creation and test performance
#load("tidy_pokedex.Rdata")
pokedex$Name <- NULL
#pokedex$Sp_Atk <- NULL
#pokedex$Sp_Def <-NULL
model_bswims.knn <- HLCM_EM(Legendary ~ .,pokedex)
## [+-][+-][+-]( 0 )< 748 , 757 , 55 , 0 >
model_bswims.knn_bs <- predictionStats_binary(cbind(pokedex$Legendary,
predict(model_bswims.knn,pokedex)),
"LC_BSWIMS.KNN",
cex=0.8)
## LC_BSWIMS.KNN
## Warning in if (class(modelPredictions) == "data.frame") {: the condition has
## length > 1 and only the first element will be used
#function to create syntetic features based on permutations of the real features
Permutation_sampling<- function(theData,theOutcome,n_synt) {
theData$theOutcome <- NULL
n_obs <- dim(theData)[1]
n_var <- dim(theData)[2]
#empty the data so it get filled with the permutations
new_data = data.frame(matrix(ncol=0,nrow=n_obs))
for (i in 1:n_synt) {
#pick a random variable
#print(i)
synt_var = data.frame(matrix(ncol=2,nrow=n_obs))
synt_var$var <- theData[sample(1:n_var, 1)]
synt_var$order_i= rnorm(n_obs,mean=0,sd=1)
synt_var <- arrange(synt_var,order_i)
new_data <- cbind(new_data,synt_var$var)
#change the name of the new feature do synt i
names(new_data)[c(i)] <- paste("synt",i,sep="")
}
return(new_data)
}
set.seed(42)
#create 10 new synt features
synt_feat<- Permutation_sampling(theData=pokedex,theOutcome="Legendary",n_synt=10)
pokedex_10synt <- cbind(pokedex,synt_feat)
model_bswims.knn_10 <- HLCM_EM(Legendary ~ .,pokedex_10synt)
## [+-][+-][+-]( 0 )< 748 , 757 , 55 , 0 >
model_bswims.knn_bs_10 <- predictionStats_binary(cbind(pokedex_10synt$Legendary,
predict(model_bswims.knn_10,pokedex_10synt)),
"LC_BSWIMS.KNN 10", cex=0.89)
## LC_BSWIMS.KNN 10
## Warning in if (class(modelPredictions) == "data.frame") {: the condition has
## length > 1 and only the first element will be used
#create 100 new synt features
synt_feat100<- Permutation_sampling(pokedex,"Legendary",100)
#create a new dataframe
pokedex_100synt <- cbind(pokedex,synt_feat100)
model_bswims.knn_100 <- HLCM_EM(Legendary ~ .,pokedex_100synt)
## [+-][+-][+-]( 0 )< 748 , 757 , 55 , 0 >
model_bswims.knn_bs_100 <- predictionStats_binary(cbind(pokedex_100synt$Legendary,
predict(model_bswims.knn_100,pokedex_100synt)),
"LC_BSWIMS.KNN 100",
cex=0.89)
## LC_BSWIMS.KNN 100
## Warning in if (class(modelPredictions) == "data.frame") {: the condition has
## length > 1 and only the first element will be used
#create 1000 new synt features
synt_feat1k<- Permutation_sampling(pokedex,"Legendary",1000)
#create a new dataframe
pokedex_1ksynt <- cbind(pokedex,synt_feat1k)
model_bswims.knn_1k <- HLCM_EM(Legendary ~ .,pokedex_1ksynt)
## [+-][+-][+-]( 0 )< 748 , 757 , 55 , 0 >
model_bswims.knn_bs_1k <- predictionStats_binary(cbind(pokedex_1ksynt$Legendary,
predict(model_bswims.knn_1k,pokedex_1ksynt)),
"LC_BSWIMS.KNN 1k",
cex=0.89)
## LC_BSWIMS.KNN 1k
## Warning in if (class(modelPredictions) == "data.frame") {: the condition has
## length > 1 and only the first element will be used