pacman::p_load(readr, caret, dplyr, scatterplot3d, ggplot2, tidyr, doSNOW, parallel, ranger, e1071)
setwd("~/Desktop/Ubiqum/R_task6_Wifi")
validationData <- read.csv("validationData.csv", stringsAsFactors = FALSE)
Training.data<-read.csv("trainingData.csv", stringsAsFactors = FALSE )
head(Training.data[,515:529])
## WAP515 WAP516 WAP517 WAP518 WAP519 WAP520 LONGITUDE LATITUDE FLOOR
## 1 100 100 100 100 100 100 -7541.264 4864921 2
## 2 100 100 100 100 100 100 -7536.621 4864934 2
## 3 100 100 100 100 100 100 -7519.152 4864950 2
## 4 100 100 100 100 100 100 -7524.570 4864934 2
## 5 100 100 100 100 100 100 -7632.144 4864982 0
## 6 100 100 100 100 100 100 -7533.896 4864939 2
## BUILDINGID SPACEID RELATIVEPOSITION USERID PHONEID TIMESTAMP
## 1 1 106 2 2 23 1371713733
## 2 1 106 2 2 23 1371713691
## 3 1 103 2 2 23 1371714095
## 4 1 102 2 2 23 1371713807
## 5 0 122 2 11 13 1369909710
## 6 1 105 2 2 23 1371713841
head(validationData[,515:529])
## WAP515 WAP516 WAP517 WAP518 WAP519 WAP520 LONGITUDE LATITUDE FLOOR
## 1 100 100 100 100 100 100 -7515.917 4864890 1
## 2 100 100 100 100 100 100 -7383.867 4864840 4
## 3 100 100 100 100 100 100 -7374.302 4864847 4
## 4 100 100 100 100 100 100 -7365.825 4864843 4
## 5 100 100 100 100 100 100 -7641.499 4864922 2
## 6 100 100 100 100 100 100 -7338.807 4864825 2
## BUILDINGID SPACEID RELATIVEPOSITION USERID PHONEID TIMESTAMP
## 1 1 0 0 0 0 1380872703
## 2 2 0 0 0 13 1381155054
## 3 2 0 0 0 13 1381155095
## 4 2 0 0 0 13 1381155138
## 5 0 0 0 0 2 1380877774
## 6 2 0 0 0 12 1380874853
# Convert longitud and latitude values to absolute values into Training and validation data:
Training.data$LATITUDE<- Training.data$LATITUDE -min(Training.data$LATITUDE)
Training.data$LATITUDE<-round(Training.data$LATITUDE, digits = 1)
Training.data$LONGITUDE<-Training.data$LONGITUDE -min(Training.data$LONGITUDE)
Training.data$LONGITUDE<-round(Training.data$LONGITUDE, digits = 1)
plotForUSer<-function(builNum) {
builId<- paste("B.", builNum, sep = "" )
plotTitle<- paste("Building", builNum, ": Users in floors", sep=" ")
message(paste("building Id:", builId))
message(paste("Plot title:", plotTitle))
Training.data%>%filter(BUILDINGID==builId)%>%
ggplot()+
geom_point(aes(x=LONGITUDE, y= LATITUDE, color=USERID)) +
facet_grid(. ~ FLOOR) +
labs(title=plotTitle) +
theme_linedraw(base_size = 11, base_family = "") +
theme(plot.title = element_text(hjust = 0.5, face="bold"))
}
plotForUSer("1")
## building Id: B.1
## Plot title: Building 1 : Users in floors
plotForUSer("2")
## building Id: B.2
## Plot title: Building 2 : Users in floors
plotForUSer("3")
## building Id: B.3
## Plot title: Building 3 : Users in floors
plotForRelativePosition <- function(buildingNumber) {
buildingId<-paste("B.", buildingNumber, sep = "")
plotTitle<-paste("Building ", buildingNumber, ": Position in floors", sep = "")
message(paste("building Id:", buildingId))
message(paste("Plot title:", plotTitle))
Training.data%>%filter(BUILDINGID==buildingId)%>%
ggplot() +
geom_point(aes(x=LONGITUDE, y= LATITUDE, color=RELATIVEPOSITION)) +
facet_grid(. ~ FLOOR) +
scale_color_manual(values = c("blue", "green")) +
labs(title=plotTitle) +
theme_linedraw(base_size = 11, base_family = "") +
theme(plot.title = element_text(hjust = 0.5, face="bold"))
}
plotForRelativePosition("1")
## building Id: B.1
## Plot title: Building 1: Position in floors
plotForRelativePosition("2")
## building Id: B.2
## Plot title: Building 2: Position in floors
plotForRelativePosition("3")
## building Id: B.3
## Plot title: Building 3: Position in floors
plotForPhone<- function (buildID){
buildingID<-paste("B.",buildID,sep="")
plotTitle<- paste("Building", buildID, ": Phone ID in floors")
message(paste("Building ID:", buildingID))
message(paste( "Plot Title:", plotTitle))
Training.data%>%
filter(BUILDINGID==buildingID)%>%
ggplot()+
geom_point(aes(x=LONGITUDE, y= LATITUDE, color=PHONEID)) +
facet_grid(. ~ FLOOR) +
labs(title=plotTitle) +
theme_linedraw(base_size = 11, base_family = "") +
theme(plot.title = element_text(hjust = 0.5, face="bold"))
}
plotForPhone("1")
## Building ID: B.1
## Plot Title: Building 1 : Phone ID in floors
plotForPhone("2")
## Building ID: B.2
## Plot Title: Building 2 : Phone ID in floors
plotForPhone("3")
## Building ID: B.3
## Plot Title: Building 3 : Phone ID in floors
plot(Training.data$USERID,
xlab="USER NUMBER", ylab="frequency",
main="Number of locations by User",
col="pink")
# Number of location by PhoneId
plot(Training.data$PHONEID,
xlab="PHONE ID NUMBER",
ylab="frequency",
main="Number of locations by Phone",
col="turquoise3")
B1%>%
filter(FLOOR==1)%>%
group_by(LONGITUDE, LATITUDE, RELATIVEPOSITION)%>%
summarize(count=n())
## # A tibble: 67 x 4
## # Groups: LONGITUDE, LATITUDE [?]
## LONGITUDE LATITUDE RELATIVEPOSITION count
## <dbl> <dbl> <fct> <int>
## 1 -7691. 4864928. FrontDoor 20
## 2 -7690. 4864929. FrontDoor 19
## 3 -7684. 4864932. FrontDoor 20
## 4 -7684. 4864930. FrontDoor 20
## 5 -7683. 4864932. FrontDoor 20
## 6 -7682. 4864931. FrontDoor 20
## 7 -7677. 4864934. FrontDoor 29
## 8 -7675. 4864933. FrontDoor 20
## 9 -7675. 4864934. FrontDoor 20
## 10 -7669. 4864936. FrontDoor 29
## # ... with 57 more rows
B3%>%
filter(FLOOR==1)%>%
group_by(LONGITUDE, LATITUDE, RELATIVEPOSITION)%>%
summarize(count=n())
## # A tibble: 89 x 4
## # Groups: LONGITUDE, LATITUDE [?]
## LONGITUDE LATITUDE RELATIVEPOSITION count
## <dbl> <dbl> <fct> <int>
## 1 -7409. 4864797. FrontDoor 20
## 2 -7408. 4864812. Inside 20
## 3 -7406. 4864802. FrontDoor 20
## 4 -7405. 4864808. FrontDoor 20
## 5 -7404. 4864809. FrontDoor 20
## 6 -7399. 4864788. FrontDoor 20
## 7 -7396. 4864776. Inside 10
## 8 -7395. 4864786. FrontDoor 20
## 9 -7395. 4864837. Inside 27
## 10 -7392. 4864841. Inside 30
## # ... with 79 more rows
Counted the exact locations in Building1 Floor1 and in Building3 Floor1 in Training data: Same locations in Training Data have above 20 rows. The user has recorded his position several times on the same location. Hence, there is a lack of general representativity for Training data.
We don’t have the identity of the user in the Validation Data
plotForPhoneVal<- function (buildID){
buildingID<-paste("B.",buildID,sep="")
plotTitle<- paste("Building", buildID, ": Phone ID in floors - Validation")
message(paste("Building ID:", buildingID))
message(paste( "Plot Title:", plotTitle))
validationData%>%
filter(BUILDINGID==buildingID)%>%
ggplot()+
geom_point(aes(x=LONGITUDE, y= LATITUDE, color=PHONEID)) +
facet_grid(. ~ FLOOR) +
labs(title=plotTitle) +
theme_linedraw(base_size = 11, base_family = "") +
theme(plot.title = element_text(hjust = 0.5, face="bold"))
}
plotForPhoneVal("1")
## Building ID: B.1
## Plot Title: Building 1 : Phone ID in floors - Validation
plotForPhoneVal("2")
## Building ID: B.2
## Plot Title: Building 2 : Phone ID in floors - Validation
plotForPhoneVal("3")
## Building ID: B.3
## Plot Title: Building 3 : Phone ID in floors - Validation
B1_validation%>%
group_by(LONGITUDE, LATITUDE, RELATIVEPOSITION)%>%
summarize(count=n())
## # A tibble: 487 x 4
## # Groups: LONGITUDE, LATITUDE [?]
## LONGITUDE LATITUDE RELATIVEPOSITION count
## <dbl> <dbl> <fct> <int>
## 1 0. 183. <NA> 1
## 2 0. 184. <NA> 1
## 3 3.70 181. <NA> 1
## 4 4.20 180. <NA> 1
## 5 4.80 180. <NA> 2
## 6 5.20 180. <NA> 1
## 7 5.40 180. <NA> 1
## 8 5.70 187. <NA> 1
## 9 6.00 186. <NA> 1
## 10 6.20 186. <NA> 1
## # ... with 477 more rows
B2_validation%>%
group_by(LONGITUDE, LATITUDE, RELATIVEPOSITION)%>%
summarize(count=n())
## # A tibble: 288 x 4
## # Groups: LONGITUDE, LATITUDE [?]
## LONGITUDE LATITUDE RELATIVEPOSITION count
## <dbl> <dbl> <fct> <int>
## 1 124. 123. <NA> 1
## 2 125. 124. <NA> 1
## 3 125. 144. <NA> 1
## 4 126. 143. <NA> 1
## 5 126. 127. <NA> 1
## 6 126. 148. <NA> 1
## 7 127. 128. <NA> 1
## 8 127. 128. <NA> 1
## 9 127. 129. <NA> 1
## 10 129. 121. <NA> 1
## # ... with 278 more rows
Counted the exact locations in Building 1 and 2 in Validation Data. We can see as we have just one register/row by location in the Validation Data. Users were moving randomnly (as we were told). Even though we have less register, the representativity it’s much better.
For the lack of representativity of the locations of the Training data, I have joined Training and Validationa data in order to get a better perfomance of the prediction models.
waps_only_training<- Training.data[, c(1:520)]
remove_100<- apply(waps_only_training, 2, function(x) length(unique(x))==1)
Training.data<-Training.data[,-c(which(remove_100==TRUE))]
waps_only_validation<-validationData[,c(1:520)]
remove_100_validation<-apply(validationData, 2, function(x) length(unique(x))==1)
validationData<-validationData[,-c(which(remove_100_validation==TRUE))]
in.training <- (colnames(Training.data)%in%colnames(validationData))
Training.data<-Training.data[,-c(which(in.training==FALSE))]
in.validation<-(colnames(validationData)%in%colnames(Training.data))
validationData<-validationData[,-c(which(in.validation==FALSE))]
all.data<-rbind(Training.data,validationData)
all.data[all.data==100]<- -105
all.data[,c(1:312)]<-all.data[,c(1:312)] +105
waps.only.all<-all.data[,c(1:312)]
delete.zv.rows<-apply(waps.only.all, 1, function(x) length(unique(x))==1)
all.data<-all.data[-c(which(delete.zv.rows==TRUE)),]
building1<- all.data%>%filter(BUILDINGID=="B.1")
building2<- all.data%>%filter(BUILDINGID=="B.2")
building3<- all.data%>%filter(BUILDINGID=="B.3")
waps_only_b1<-building1[,c(1:312)]
deliting_b1cols<-apply(waps_only_b1, 2, function(x) length(unique(x))==1)
waps_only_b1<-waps_only_b1[,-c(which(deliting_b1cols==TRUE))]
waps_only_b2<-building2[,c(1:312)]
deliting_b2cols<-apply(waps_only_b2, 2, function(x) length(unique(x))==1)
waps_only_b2<-waps_only_b2[,-c(which(deliting_b2cols==TRUE))]
waps_only_b3<-building3[,c(1:312)]
deliting_b3cols<-apply(waps_only_b3, 2, function(x) length(unique(x))==1)
waps_only_b3<-waps_only_b3[,-c(which(deliting_b3cols==TRUE))]
match.1in2<-(colnames(waps_only_b1)%in%colnames(waps_only_b2))
waps_only_b1<-waps_only_b1[,-c(which(match.1in2==TRUE))]
match.1in3<-(colnames(waps_only_b1)%in%colnames(waps_only_b3))
waps_only_b1<-waps_only_b1[,-c(which(match.1in3==TRUE))]
match.2in3<-(colnames(waps_only_b2)%in%colnames(waps_only_b3))
waps_only_b2<-waps_only_b2[,-c(which(match.2in3==TRUE))]
Tried Knn and SVM radial model, but Knn gave the better results:
Created a formula for adding the waps.only columns to the model:
waps.used <- colnames(waps_only_b1)
waps.used <- c(waps.used,colnames(waps_only_b2))
waps.used <- c(waps.used,colnames(waps_only_b3))
f <- paste("BUILDINGID" , "~", paste(waps.used, collapse = " + " ))
f <- as.formula(f)
set.seed(998)
inTraining<-createDataPartition(all.data$BUILDINGID,
p=.70,
list = FALSE)
training<-all.data[inTraining,]
test<-all.data[-inTraining,]
cluster <- makeCluster(detectCores()-1)
registerDoSNOW(cluster)
fitControl <- trainControl(method = "cv",
number = 5,
allowParallel = TRUE,
verboseIter = TRUE)
knn.model.building<-train(f,
data=training,
method='knn',
trControl = fitControl,
preProcess = c("zv", "center", "scale"))
print(knn.model.building)
stopCluster(cluster)
registerDoSEQ(cluster)
rm(cluster)
prediction.building<-predict(knn.model.building, test)
confusionMatrix(test$BUILDINGID,prediction.building)
test$prediction<-prediction.building # create column in our test with the predictions of the model
test<-test%>%
mutate(error.model=abs(as.numeric(prediction)-(as.numeric(BUILDINGID)))) #create a column with 0 if obs and predicted are the same, otherwise !=0.
which(test$error.model!=0)
df.error<-test%>%filter(error.model!=0)
test$error.model<-as.character(error.model)
Confusion Matrix Building Prediction
ggplot() +
geom_point(data=test, aes(x= LONGITUDE, y= LATITUDE, color= error.model)) +
scale_color_manual(values = c("white", "black", "black")) +
ggtitle("Errors prediction Building") +
theme(plot.title = element_text(hjust = 0.5, face="bold"))
Analized the errors:
error.2<-df.error[2,]
waps.error.2<-apply(error.2,1,function(x) x!=0)
prova.error2<-error.2[,-c(which(waps.error.2==FALSE))]
A<-which(colnames(prova.error2)%in%colnames(waps_only_b1))
B<-which(colnames(prova.error2)%in%colnames(waps_only_b3)) # All Predicted waps are on B.3
C<-which(colnames(prova.error2)%in%colnames(waps_only_b2)) # All Observed Waps in B.2
Conclusion: Some Waps were very probably moved between both datasets ( Training and Validation). We know that there was 3 month between them.
Tried Knn and SVM radial model for each floor. SVM radial gave me the bests result; highest accuracy and Kappa.
building3$TIMESTAMP<-NULL
building3$PHONEID<-NULL
building3$LONGITUDE<-NULL
building3$LATITUDE<-NULL
building3$BUILDINGID<-NULL
set.seed(998)
inTraining<-createDataPartition(building3$FLOOR,
p=.70,
list = FALSE)
training<-building3[inTraining,]
test<-building3[-inTraining,]
numCores<-detectCores()-1
cluster<-makeCluster(numCores)
cluster <- makeCluster(detectCores()-1)
registerDoSNOW(cluster)
fitControl <- trainControl(method = "cv", number = 5, allowParallel = TRUE, verboseIter = TRUE)
svm.floor3<-train(FLOOR ~.,
data=training,
method='svmRadial',
trControl = fitControl,
preProcess = c("zv", "center", "scale"))
print(svm.floor3)
stopCluster(cluster)
registerDoSEQ(cluster)
rm(cluster)
prediction.floor.svm<-predict(svm.floor3, test)
confusionf3<-confusionMatrix(test$FLOOR,prediction.floor.svm)
# Accuracy= 98,87%
# KAPPA= 0.098
building2<-all.data%>%filter(BUILDINGID=="B.2")
building2$FLOOR<-as.numeric(building2$FLOOR)
building2$FLOOR<-as.factor(building2$FLOOR)
building2$TIMESTAMP<-NULL
building2$PHONEID<-NULL
building2$LONGITUDE<-NULL
building2$LATITUDE<-NULL
building2$BUILDINGID<-NULL
set.seed(248)
inTraining<-createDataPartition(building2$FLOOR,
p=.70,
list = FALSE)
training.f2<-building2[inTraining,]
test.f2<-building2[-inTraining,]
cluster <- makeCluster(detectCores()-1)
registerDoSNOW(cluster)
fitControl <- trainControl(method = "cv", number = 5, allowParallel = TRUE, verboseIter = TRUE)
svm.floor2<-train(FLOOR ~.,
data=training.f2,
method='svmRadial',
trControl = fitControl,
preProcess = c("zv", "center", "scale"))
print(svm.floor2)
stopCluster(cluster)
registerDoSEQ(cluster)
rm(cluster)
prediction.floor2.svm<-predict(svm.floor2, test.f2)
confusion.svmf2<-confusionMatrix(test.f2$FLOOR,prediction.floor2.svm)
# Accuracy = 98,96%
# KAPPA= 0.986
building1<- all.data%>%filter(BUILDINGID=="B.1")
building1$TIMESTAMP<-NULL
building1$PHONEID<-NULL
building1$LONGITUDE<-NULL
building1$LATITUDE<-NULL
building1$BUILDINGID<-NULL
building1$FLOOR<-as.numeric(building1$FLOOR)
building1$FLOOR<-as.factor(building1$FLOOR)
set.seed(998)
inTraining<-createDataPartition(building1$FLOOR,
p=.70,
list = FALSE)
training_f1<-building1[inTraining,]
test_f1<-building1[-inTraining,]
cluster <- makeCluster(detectCores()-1)
registerDoSNOW(cluster)
fitControl <- trainControl(method = "cv", number = 5, allowParallel = TRUE, verboseIter = TRUE)
svm.floor1<-train(FLOOR ~.,
data=training_f1,
method='svmRadial',
trControl = fitControl,
preProcess = c("zv", "center", "scale"))
print(svm.floor1)
stopCluster(cluster)
registerDoSEQ(cluster)
rm(cluster)
prediction.f1.svm<-predict(svm.floor1, test_f1)
confusion_f1<-confusionMatrix(test_f1$FLOOR,prediction.f1.svm)
# Accuracy= 98,38%
# KAPPA= 0.984
Tried Knn and Random Forest for each prediction. Random Forest gave me a better result; less RMSE and Rsquared.
set.seed(123)
inTraining.lat1<-createDataPartition(build1.lat$LATITUDE,
p=.70,
list = FALSE)
training.RF.lat1<-build1.lat[inTraining.lat1,]
test.RF.lat1<-build1.lat[-inTraining.lat1,]
numCores<-detectCores()-1
cluster<-makeCluster(numCores)
cluster <- makeCluster(detectCores()-1)
registerDoSNOW(cluster)
fitControl<- trainControl(method = "cv", number = 5, verboseIter = TRUE)
rf.lat1<- train(LATITUDE ~ .,
data = training.RF.lat1,
method = "ranger",
trControl = fitControl,
preProcess = c("zv", "medianImpute"))
stopCluster(cluster)
registerDoSEQ(cluster)
rm(cluster)
prediction.RF.lat1<-predict(rf.lat1, test.RF.lat1)
error.rf.lat1<-postResample(prediction.RF.lat1, test.RF.lat1$LATITUDE)
# RMSE= 2.57
# Rsquared= 0.99
set.seed(123)
inTraining.lat2<-createDataPartition(build2.lat$LATITUDE,
p=.70,
list = FALSE)
training.RF.lat2<-build2.lat[inTraining.lat2,]
test.RF.lat2<-build2.lat[-inTraining.lat2,]
numCores<-detectCores()-1
cluster<-makeCluster(numCores)
cluster <- makeCluster(detectCores()-1)
registerDoSNOW(cluster)
fitControl<- trainControl(method = "cv", number = 5, verboseIter = TRUE)
rf.lat2<- train(LATITUDE ~ .,
data = training.RF.lat2,
method = "ranger",
trControl = fitControl,
preProcess = c("zv", "medianImpute"))
stopCluster(cluster)
registerDoSEQ(cluster)
rm(cluster)
prediction.RF.lat2<-predict(rf.lat2, test.RF.lat2)
error.rf.lat2<-postResample(prediction.RF.lat2, test.RF.lat2$LATITUDE)
# RMSE= 3.67
# Rsquared =99%
set.seed(123)
inTraining.lat3<-createDataPartition(build3.lat$LATITUDE,
p=.70,
list = FALSE)
training.RF.lat3<-build3.lat[inTraining.lat3,]
test.RF.lat3<-build3.lat[-inTraining.lat3,]
numCores<-detectCores()-1
cluster<-makeCluster(numCores)
cluster <- makeCluster(detectCores()-1)
registerDoSNOW(cluster)
fitControl<- trainControl(method = "cv", number = 5, verboseIter = TRUE)
rf.lat3<- train(LATITUDE ~ .,
data = training.RF.lat3,
method = "ranger",
trControl = fitControl,
preProcess = c("zv", "medianImpute"))
stopCluster(cluster)
registerDoSEQ(cluster)
rm(cluster)
prediction.RF.lat3<-predict(rf.lat3, test.RF.lat3)
error.rf.lat3<-postResample(prediction.RF.lat3, test.RF.lat3$LATITUDE)
# RMSE = 3.40
# Rsquared= 0.98
Tried Knn and Random Forest for each prediction. Random Forest gave me a better result; less RMSE and Rsquared.
set.seed(123)
inTraining<-createDataPartition(build1.long$LONGITUDE,
p=.70,
list = FALSE)
training.RF.long1<-build1.long[inTraining,]
test.RF.long1<-build1.long[-inTraining,]
cluster <- makeCluster(detectCores()-1)
registerDoSNOW(cluster)
fitControl<- trainControl(method = "cv", number = 5, verboseIter = TRUE)
rf.long1<- train(LONGITUDE ~ .,
data = training.RF.long1,
method = "ranger",
trControl = fitControl,
preProcess = c("zv", "medianImpute"))
stopCluster(cluster)
registerDoSEQ(cluster)
rm(cluster)
prediction.RF.long1<-predict(rf.long1, test.RF.long1)
error.rf.long1<-postResample(prediction.RF.long1, test.RF.long1$LONGITUDE)
# RMSE= 3,14
# Rsquared= 0.98
set.seed(123)
inTraining<-createDataPartition(build2.long$LONGITUDE,
p=.70,
list = FALSE)
training.RF.long2<-build2.long[inTraining,]
test.RF.long2<-build2.long[-inTraining,]
cluster <- makeCluster(detectCores()-1)
registerDoSNOW(cluster)
fitControl<- trainControl(method = "cv", number = 5, verboseIter = TRUE)
rf.long2<- train(LONGITUDE ~ .,
data = training.RF.long2,
method = "ranger",
trControl = fitControl,
preProcess = c("zv", "medianImpute"))
stopCluster(cluster)
registerDoSEQ(cluster)
rm(cluster)
prediction.RF.long2<-predict(rf.long2, test.RF.long2)
error.rf.long2<-postResample(prediction.RF.long2, test.RF.long2$LONGITUDE)
# RMSE= 4.10
# Rsquared= 0,99
set.seed(123)
inTraining<-createDataPartition(build3.long$LONGITUDE,
p=.70,
list = FALSE)
training.RF.long3<-build3.long[inTraining,]
test.RF.long3<-build3.long[-inTraining,]
cluster <- makeCluster(detectCores()-1)
registerDoSNOW(cluster)
fitControl<- trainControl(method = "cv", number = 5, verboseIter = TRUE)
rf.long3<- train(LONGITUDE ~ .,
data = training.RF.long3,
method = "ranger",
trControl = fitControl,
preProcess = c("zv", "medianImpute"))
stopCluster(cluster)
registerDoSEQ(cluster)
rm(cluster)
prediction.RF.long3<-predict(rf.long3, test.RF.long3)
error.rf.long3<-postResample(prediction.RF.long3, test.RF.long3$LONGITUDE)
# RMSE= 5,36
# Rsquared= 0,98
We have seen as some WAPS probably have been moved into different buildings between the creation of both datasets.Thus really worsen the predcition of the models, specially for Building 2 and Building 3. However, the predictions of the models are very accurate and quite representative. But, we have to take into account that we mixed both datas ( Training and Validation) to create the models because the training data had a lack of representativity. Hence, it is difficult to gauge if our model can have a problem of overfitting.