Duomenų gavyba Namų darbai

Antanas Kaminskas

library(stats)
library(ggplot2)
library(plot3D)
library(philentropy)
## Warning: paketas 'philentropy' buvo sukurtas pagal R versiją 4.1.3
library(factoextra)
## Warning: paketas 'factoextra' buvo sukurtas pagal R versiją 4.1.3
library(corrplot)
library(RColorBrewer)
library(PerformanceAnalytics)
## Warning: paketas 'PerformanceAnalytics' buvo sukurtas pagal R versiją 4.1.3
library(mlr)
library(tidyverse)
library(fastDummies)
## Warning: paketas 'fastDummies' buvo sukurtas pagal R versiją 4.1.3
library(e1071)
library(corrplot)
library(caTools)
library(class)
library(tree)
## Warning: paketas 'tree' buvo sukurtas pagal R versiją 4.1.3
library(readr)
library(tidyverse)
library(mlr)
library(scales)
library(HDclassif)
library(GGally)
library(purrr)
library(dplyr)

Nuskaitomas pirmas duomenų rinkinys

Atribututų informacija

Attribute information:

For more information, read [Cortez and Morais, 2007].

  1. X - x- ašies specialiosios ilgio koordinatės Montesinho parke map: 1 to 9
  2. Y - y- ašies specialiosios pločio koordinatės Montesinho parke map: 2 to 9
  3. month - metų mėnesiai : “jan” to “dec”
  4. day - savaitės dienos: “mon” to “sun” perkoduota į darbo dienas ir savaitgalius
  5. FFMC - FFMC indeksas FWI sistemai: 18.7 to 96.20
  6. DMC - DMC indeksas FWI sistemai: 1.1 to 291.3
  7. DC - DC indeksas FWI sistemai: 7.9 to 860.6
  8. ISI - ISI indeksas FWI sistemai: 0.0 to 56.10
  9. temp - temperatūra Celsijais: 2.2 to 33.30
  10. RH - žmonių kaltės rodiklis %: 15.0 to 100
  11. wind - vėjo greitis km/h: 0.40 to 9.40
  12. rain - kritulių kiekis mm/m2 : 0.0 to 6.4
  13. area - išdegęs plotas hektarais: 0.00 to 1090.84
getwd()
## [1] "C:/Users/antanas.kaminskas/Desktop"
setwd("C:/Users/antanas.kaminskas/Desktop")
data1 <- read.csv2("C:/Users/antanas.kaminskas/Desktop/DG_FF2.csv",
                  header = TRUE, sep = ";" ,dec = ".")




head(data1)
##   X Y day FFMC  DMC    DC  ISI temp RH wind rain area
## 1 7 5  wd 86.2 26.2  94.3  5.1  8.2 51  6.7  0.0    0
## 2 7 4  wd 90.6 35.4 669.1  6.7 18.0 33  0.9  0.0    0
## 3 7 4 nwd 90.6 43.7 686.9  6.7 14.6 33  1.3  0.0    0
## 4 8 6  wd 91.7 33.3  77.5  9.0  8.3 97  4.0  0.2    0
## 5 8 6 nwd 89.3 51.3 102.2  9.6 11.4 99  1.8  0.0    0
## 6 8 6 nwd 92.3 85.3 488.0 14.7 22.2 29  5.4  0.0    0
min.max.norm <- function(x, x.max, x.min)
{
  return((x-x.min)/(x.max-x.min))
}
for(i in c(1,2,4,5,6))
{
  max <- max(data1[,i])
  min <- min(data1[,i])
  for(ii in 1:nrow(data1))
  {
    data1[ii,i] <- min.max.norm(data1[ii,i], max, min)
  }
}


ggplot(data1, aes(x=DC, fill=day)) +
  geom_bar()

Aprašomoji statistika

summary(data1)
##        X                Y              day                 FFMC       
##  Min.   :0.0000   Min.   :0.0000   Length:517         Min.   :0.0000  
##  1st Qu.:0.2500   1st Qu.:0.2857   Class :character   1st Qu.:0.9226  
##  Median :0.3750   Median :0.2857   Mode  :character   Median :0.9406  
##  Mean   :0.4587   Mean   :0.3285                      Mean   :0.9283  
##  3rd Qu.:0.7500   3rd Qu.:0.4286                      3rd Qu.:0.9574  
##  Max.   :1.0000   Max.   :1.0000                      Max.   :1.0000  
##       DMC               DC              ISI              temp      
##  Min.   :0.0000   Min.   :0.0000   Min.   : 0.000   Min.   : 2.20  
##  1st Qu.:0.2326   1st Qu.:0.5040   1st Qu.: 6.500   1st Qu.:15.50  
##  Median :0.3694   Median :0.7697   Median : 8.400   Median :19.30  
##  Mean   :0.3783   Mean   :0.6333   Mean   : 9.022   Mean   :18.89  
##  3rd Qu.:0.4869   3rd Qu.:0.8280   3rd Qu.:10.800   3rd Qu.:22.80  
##  Max.   :1.0000   Max.   :1.0000   Max.   :56.100   Max.   :33.30  
##        RH              wind            rain              area        
##  Min.   : 15.00   Min.   :0.400   Min.   :0.00000   Min.   :   0.00  
##  1st Qu.: 33.00   1st Qu.:2.700   1st Qu.:0.00000   1st Qu.:   0.00  
##  Median : 42.00   Median :4.000   Median :0.00000   Median :   0.52  
##  Mean   : 44.29   Mean   :4.018   Mean   :0.02166   Mean   :  12.85  
##  3rd Qu.: 53.00   3rd Qu.:4.900   3rd Qu.:0.00000   3rd Qu.:   6.57  
##  Max.   :100.00   Max.   :9.400   Max.   :6.40000   Max.   :1090.84
corrplot(cor(data1[,-3]), method = "number", type = "upper")

ggplot(data1, aes(x=as.factor(day) )) +
  geom_bar(color="red", fill=rgb(0.1,0.4,0.5,0.7) )+ ggtitle("Day") +
  xlab("Day") + ylab("Value")

data1_Task <- makeClassifTask(data = data1, target = "day")
lda <- makeLearner("classif.lda")
set.seed(50)
ldaModel <- train(lda, data1_Task)

ldaModelData <- getLearnerModel(ldaModel)
ldaPreds <- predict(ldaModelData)$x

head(ldaPreds)
##          LD1
## 1  0.4962508
## 2  1.1548667
## 3  1.8763079
## 4 -1.5180439
## 5 -2.5985542
## 6  0.6161613
df <- cbind(data1, ldaPreds)

ggplot(df, aes(x=LD1, fill=DC, color=day)) +
  geom_histogram()

qda <- makeLearner("classif.qda")
set.seed(50)
qdaModel <- train(qda, data1_Task)

kFold <- makeResampleDesc(method = "RepCV", folds = 3, reps = 10,
                          stratify = TRUE)
set.seed(50)

ldaCV <- resample(learner = lda, task = data1_Task, resampling = kFold,
                  measures = list(mmce, acc))
#qdaCV <- resample(learner = qda, task = data1_Task, resampling = kFold,

                 # measures = list(mmce, acc))
set.seed(50)
ldaCV$aggr
## mmce.test.mean  acc.test.mean 
##      0.3446954      0.6553046
#qdaCV$aggr

calculateConfusionMatrix(ldaCV$pred, relative = TRUE)
## Relative confusion matrix (normalized by row/column):
##         predicted
## true     nwd       wd        -err.-   
##   nwd    0.10/0.51 0.90/0.33 0.90     
##   wd     0.05/0.49 0.95/0.67 0.05     
##   -err.-      0.49      0.33 0.34     
## 
## 
## Absolute confusion matrix:
##         predicted
## true     nwd   wd -err.-
##   nwd    175 1615   1615
##   wd     167 3213    167
##   -err.- 167 1615   1782
#calculateConfusionMatrix(qdaCV$pred, relative = TRUE)

Modelio LDA tikslumas 65.53 %

QDA modelis neveikia, nes atsiranda klaida Error in checkClass(x, classes, ordered, null.ok) :

object ‘qdaCV’ not found

10-fold

kFold10 <- makeResampleDesc(method = "CV", iters = 10, stratify = TRUE)

set.seed(50)

ldaCVIA <- resample(learner = lda, task = data1_Task, resampling = kFold10, measures = list(mmce, acc))
## Resampling: cross-validation
## Measures:             mmce      acc
## [Resample] iter 1:    0.3725490 0.6274510
## [Resample] iter 2:    0.3461538 0.6538462
## [Resample] iter 3:    0.3269231 0.6730769
## [Resample] iter 4:    0.3461538 0.6538462
## [Resample] iter 5:    0.3846154 0.6153846
## [Resample] iter 6:    0.3461538 0.6538462
## [Resample] iter 7:    0.3269231 0.6730769
## [Resample] iter 8:    0.3653846 0.6346154
## [Resample] iter 9:    0.3076923 0.6923077
## [Resample] iter 10:   0.3000000 0.7000000
## 
## Aggregated Result: mmce.test.mean=0.3422549,acc.test.mean=0.6577451
## 
ldaCVIA$aggr
## mmce.test.mean  acc.test.mean 
##      0.3422549      0.6577451

Modelio kFolds tikslumas 65,77 %

LOO

LOO <- makeResampleDesc(method = "LOO")

set.seed(50)
lda_LOO <- resample(learner = lda, task = data1_Task, resampling = LOO,
                  measures = list(mmce, acc))
lda_LOO$aggr
## mmce.test.mean  acc.test.mean 
##      0.3462282      0.6537718

LDA LOO tikslumas 65,54 %

KNN Hold out confusion matrix

set.seed(50)

dat.d <- sample(1:nrow(data1),size=nrow(data1)*0.7,replace = FALSE) #random selection of 70% data.
 
train.loan <- data1[dat.d,-3] # 70% training data
test.loan <- data1[-dat.d,-3] # remaining 30% test data

train.loan_labels <- data1[dat.d,3]
test.loan_labels <-data1[-dat.d,3]


i=1
k.optm=1
for (i in 1:12)
  {
 knn.mod <- knn(train=train.loan, test=test.loan, cl=train.loan_labels, k=i)
 k.optm[i] <- 100 * sum(test.loan_labels == knn.mod)/NROW(test.loan_labels)
 k=i
 cat(k,'=',k.optm[i],''
 )
}
## 1 = 66.66667 2 = 57.69231 3 = 64.10256 4 = 56.41026 5 = 62.17949 6 = 64.74359 7 = 62.82051 8 = 62.17949 9 = 64.74359 10 = 64.10256 11 = 66.66667 12 = 67.30769
plot(k.optm, type="b", xlab="K- Value",ylab="Accuracy level")

KNN atvaizduoja 12

knn <- makeLearner("classif.knn", par.vals = list("k" = 12))
holdoutNoStrat <- makeResampleDesc(method = "Holdout", split = 0.9, stratify = FALSE)
kFoldCV <- resample(learner = knn, task = data1_Task, resampling = holdoutNoStrat, measures = list(mmce, acc))
## Resampling: holdout
## Measures:             mmce      acc
## [Resample] iter 1:    0.3653846 0.6346154
## 
## Aggregated Result: mmce.test.mean=0.3653846,acc.test.mean=0.6346154
## 

KNN metodas atvaizduoja 63,46 %

Confusion Matrix:

calculateConfusionMatrix(kFoldCV$pred, relative = TRUE)
## Relative confusion matrix (normalized by row/column):
##         predicted
## true     nwd       wd        -err.-   
##   nwd    0.12/0.29 0.88/0.31 0.88     
##   wd     0.14/0.71 0.86/0.69 0.14     
##   -err.-      0.71      0.31 0.37     
## 
## 
## Absolute confusion matrix:
##         predicted
## true     nwd wd -err.-
##   nwd      2 14     14
##   wd       5 31      5
##   -err.-   5 14     19

10-fold crossvalidation

kFold10 <- makeResampleDesc(method = "CV", iters = 10, stratify = TRUE)
#IAModel <- train(IAda, IATask)

ldaCVIA <- resample(learner = knn, task = data1_Task, resampling = kFold10, measures = list(mmce, acc))
## Resampling: cross-validation
## Measures:             mmce      acc
## [Resample] iter 1:    0.3269231 0.6730769
## [Resample] iter 2:    0.4313725 0.5686275
## [Resample] iter 3:    0.3269231 0.6730769
## [Resample] iter 4:    0.3725490 0.6274510
## [Resample] iter 5:    0.4230769 0.5769231
## [Resample] iter 6:    0.4117647 0.5882353
## [Resample] iter 7:    0.3846154 0.6153846
## [Resample] iter 8:    0.3846154 0.6153846
## [Resample] iter 9:    0.3461538 0.6538462
## [Resample] iter 10:   0.4230769 0.5769231
## 
## Aggregated Result: mmce.test.mean=0.3831071,acc.test.mean=0.6168929
## 
ldaCVIA$aggr
## mmce.test.mean  acc.test.mean 
##      0.3831071      0.6168929

KNN 10 -fold crossvalidation atvaizduoja 61,69 %

Logistic regression

Statistikoje logistinis modelis yra statistinis modelis, modeliuojantis vieno įvykio tikimybę, kai įvykio log-odds yra tiesinis vieno ar kelių nepriklausomų kintamųjų derinys. Regresinėje analizėje logistinė regresija yra logistinio modelio parametrų įvertinimas.

Hold out confusion matrix

data1_Task <- makeClassifTask(data = data1, target = "day")
logReg<-makeLearner("classif.logreg", predict.type="prob")

#logRegModel <- train(logReg, IA_Task)

logRegWrapper <- makeImputeWrapper("classif.logreg")

holdout <- makeResampleDesc(method = "Holdout", split = 0.9, stratify = TRUE)

irisLogReg <-resample(learner = logReg, task=data1_Task,
                     resampling = holdout, 
                     measures = list(acc))
## Resampling: holdout
## Measures:             acc
## [Resample] iter 1:    0.6730769
## 
## Aggregated Result: acc.test.mean=0.6730769
## 
calculateConfusionMatrix(irisLogReg$pred, relative = TRUE)
## Relative confusion matrix (normalized by row/column):
##         predicted
## true     nwd       wd        -err.-   
##   nwd    0.06/1.00 0.94/0.33 0.94     
##   wd     0.00/0.00 1.00/0.67 0.00     
##   -err.-      0.00      0.33 0.33     
## 
## 
## Absolute confusion matrix:
##         predicted
## true     nwd wd -err.-
##   nwd      1 17     17
##   wd       0 34      0
##   -err.-   0 17     17

Regrestion atvaizduoja 67,31 %

10-fold crossvalidation

kFold <- makeResampleDesc(method = "CV", iters = 10)
set.seed(50)
logRegwithImpute <- resample(logRegWrapper, data1_Task,
                             resampling = kFold,
                             measures = list(acc))
## Resampling: cross-validation
## Measures:             acc
## [Resample] iter 1:    0.6538462
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## [Resample] iter 2:    0.7692308
## [Resample] iter 3:    0.6730769
## [Resample] iter 4:    0.5882353
## [Resample] iter 5:    0.7115385
## [Resample] iter 6:    0.6730769
## [Resample] iter 7:    0.5490196
## [Resample] iter 8:    0.6274510
## [Resample] iter 9:    0.6538462
## [Resample] iter 10:   0.6346154
## 
## Aggregated Result: acc.test.mean=0.6533937
## 
calculateConfusionMatrix(logRegwithImpute$pred, relative = TRUE)
## Relative confusion matrix (normalized by row/column):
##         predicted
## true     nwd       wd        -err.-   
##   nwd    0.09/0.50 0.91/0.34 0.91     
##   wd     0.05/0.50 0.95/0.66 0.05     
##   -err.-      0.50      0.34 0.35     
## 
## 
## Absolute confusion matrix:
##         predicted
## true     nwd  wd -err.-
##   nwd     17 162    162
##   wd      17 321     17
##   -err.-  17 162    179

Regression 10-fold 65,34 %

Principal components metodas

PC <- prcomp(data1[,-3], scale = TRUE)
PC
## Standard deviations (1, .., p=11):
##  [1] 1.6936493 1.2527005 1.1413274 1.1031844 0.9942302 0.9649257 0.8223573
##  [8] 0.6893450 0.6804158 0.5420547 0.4634835
## 
## Rotation (n x k) = (11 x 11):
##              PC1         PC2         PC3         PC4         PC5         PC6
## X    -0.07556259 0.678153236 -0.10331699  0.05522838 -0.07695738  0.06027244
## Y    -0.06879035 0.669309745 -0.09826604  0.13918799 -0.11362354  0.08261238
## FFMC  0.42309818 0.053615017 -0.17567188 -0.25663655 -0.09165460  0.10704494
## DMC   0.42883564 0.110385775  0.43483082  0.03705563  0.09462031  0.15068344
## DC    0.43230750 0.001888655  0.38751278  0.18211511  0.05427566  0.07969746
## ISI   0.35802561 0.096922723 -0.12679514 -0.45672785 -0.09363709  0.20381273
## temp  0.48491200 0.018675558 -0.20732742  0.16337476 -0.04611169 -0.17995218
## RH   -0.23301870 0.136933977  0.68739686 -0.14745909  0.06331626  0.19032738
## wind -0.12280037 0.023814794 -0.08727304 -0.69586380  0.25295051  0.21750241
## rain  0.04870842 0.175589517  0.22381117 -0.34454231 -0.02947805 -0.88589973
## area  0.06844108 0.131060942 -0.14241010  0.13116221  0.93867554 -0.08218313
##              PC7         PC8         PC9        PC10         PC11
## X    -0.01457914  0.39275003  0.58078880 -0.12904118  0.051829627
## Y    -0.12517156 -0.34326853 -0.56717344  0.20545435 -0.033605505
## FFMC  0.34648591  0.59890885 -0.41707510 -0.01260941 -0.220627066
## DMC  -0.20301350 -0.02228905 -0.15734444 -0.56407070  0.444028372
## DC   -0.27361636  0.15313241  0.15261024  0.70608828 -0.017637296
## ISI   0.41893557 -0.50163283  0.28757769  0.16666976  0.223196190
## temp -0.23925105 -0.28574845  0.18166411 -0.28735575 -0.637069938
## RH    0.32136275 -0.09225706  0.02976710 -0.09814767 -0.519764225
## wind -0.60106243  0.04963205 -0.01587542  0.01499531 -0.130393154
## rain  0.01567295  0.01468460 -0.04354473  0.05242092  0.084866421
## area  0.22459490 -0.02824828 -0.01621226  0.03582682 -0.001268496
PC$sdev
##  [1] 1.6936493 1.2527005 1.1413274 1.1031844 0.9942302 0.9649257 0.8223573
##  [8] 0.6893450 0.6804158 0.5420547 0.4634835
PC$rotation
##              PC1         PC2         PC3         PC4         PC5         PC6
## X    -0.07556259 0.678153236 -0.10331699  0.05522838 -0.07695738  0.06027244
## Y    -0.06879035 0.669309745 -0.09826604  0.13918799 -0.11362354  0.08261238
## FFMC  0.42309818 0.053615017 -0.17567188 -0.25663655 -0.09165460  0.10704494
## DMC   0.42883564 0.110385775  0.43483082  0.03705563  0.09462031  0.15068344
## DC    0.43230750 0.001888655  0.38751278  0.18211511  0.05427566  0.07969746
## ISI   0.35802561 0.096922723 -0.12679514 -0.45672785 -0.09363709  0.20381273
## temp  0.48491200 0.018675558 -0.20732742  0.16337476 -0.04611169 -0.17995218
## RH   -0.23301870 0.136933977  0.68739686 -0.14745909  0.06331626  0.19032738
## wind -0.12280037 0.023814794 -0.08727304 -0.69586380  0.25295051  0.21750241
## rain  0.04870842 0.175589517  0.22381117 -0.34454231 -0.02947805 -0.88589973
## area  0.06844108 0.131060942 -0.14241010  0.13116221  0.93867554 -0.08218313
##              PC7         PC8         PC9        PC10         PC11
## X    -0.01457914  0.39275003  0.58078880 -0.12904118  0.051829627
## Y    -0.12517156 -0.34326853 -0.56717344  0.20545435 -0.033605505
## FFMC  0.34648591  0.59890885 -0.41707510 -0.01260941 -0.220627066
## DMC  -0.20301350 -0.02228905 -0.15734444 -0.56407070  0.444028372
## DC   -0.27361636  0.15313241  0.15261024  0.70608828 -0.017637296
## ISI   0.41893557 -0.50163283  0.28757769  0.16666976  0.223196190
## temp -0.23925105 -0.28574845  0.18166411 -0.28735575 -0.637069938
## RH    0.32136275 -0.09225706  0.02976710 -0.09814767 -0.519764225
## wind -0.60106243  0.04963205 -0.01587542  0.01499531 -0.130393154
## rain  0.01567295  0.01468460 -0.04354473  0.05242092  0.084866421
## area  0.22459490 -0.02824828 -0.01621226  0.03582682 -0.001268496
PC$center
##           X           Y        FFMC         DMC          DC         ISI 
##  0.45865571  0.32854380  0.92831846  0.37826444  0.63332947  9.02166344 
##        temp          RH        wind        rain        area 
## 18.88916828 44.28820116  4.01760155  0.02166344 12.84729207
PC$scale
##           X           Y        FFMC         DMC          DC         ISI 
##  0.28922223  0.17570006  0.07122724  0.22069773  0.29091848  4.55947718 
##        temp          RH        wind        rain        area 
##  5.80662535 16.31746924  1.79165260  0.29595912 63.65581847
head(PC$x)
##             PC1       PC2        PC3         PC4         PC5        PC6
## [1,] -3.3111267 0.8065462 -0.6469244 -1.05215166  0.09012872  0.3179227
## [2,] -0.2560234 0.1626998 -0.6168351  1.58821969 -0.75057626 -0.6038570
## [3,] -0.4807794 0.1715222 -0.4307642  1.35507053 -0.65094627 -0.4246828
## [4,] -3.0842255 2.2613875  1.1840076 -1.16676201 -0.42353215  0.3104463
## [5,] -2.7093024 2.1608966  1.3339775  0.06970309 -0.67154734  0.5998561
## [6,]  0.4758850 1.8551412 -1.5796407 -0.73950254 -0.54058097  0.3757952
##             PC7        PC8         PC9       PC10        PC11
## [1,] -0.3302591  0.4658783 -0.05992081 -0.1917307  0.22176275
## [2,]  0.7190309  0.8567897  0.82587507  0.8194701  0.09473749
## [3,]  0.6789872  1.0432865  0.70651928  0.9686412  0.49493249
## [4,]  2.0794635  0.1814565 -0.38587063 -0.3296094 -0.98402165
## [5,]  2.5387721 -0.3706907 -0.04616543 -0.6096974 -1.03675031
## [6,] -0.3689603 -0.4159585  0.38094223  0.2852692  0.08410152
eig.val <- get_eigenvalue(PC)
eig.val
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1   2.8684479        26.076799                    26.07680
## Dim.2   1.5692585        14.265986                    40.34279
## Dim.3   1.3026283        11.842075                    52.18486
## Dim.4   1.2170159        11.063781                    63.24864
## Dim.5   0.9884937         8.986306                    72.23495
## Dim.6   0.9310817         8.464379                    80.69933
## Dim.7   0.6762716         6.147924                    86.84725
## Dim.8   0.4751966         4.319969                    91.16722
## Dim.9   0.4629656         4.208779                    95.37600
## Dim.10  0.2938233         2.671121                    98.04712
## Dim.11  0.2148169         1.952881                   100.00000
res.var <- get_pca_var(PC)
res.var$coord   
##            Dim.1       Dim.2       Dim.3       Dim.4       Dim.5       Dim.6
## X    -0.12797653 0.849522889 -0.11791852  0.06092709 -0.07651335  0.05815842
## Y    -0.11650672 0.838444643 -0.11215372  0.15355003 -0.11296795  0.07971481
## FFMC  0.71657993 0.067163557 -0.20049913 -0.28311745 -0.09112578  0.10329042
## DMC   0.72629718 0.138280314  0.49628434  0.04087919  0.09407437  0.14539833
## DC    0.73217729 0.002365919  0.44227896  0.20090655  0.05396250  0.07690213
## ISI   0.60636982 0.121415142 -0.14471477 -0.50385506 -0.09309682  0.19666415
## temp  0.82127086 0.023394880 -0.23662847  0.18023250 -0.04584563 -0.17364049
## RH   -0.39465196 0.171537259  0.78454489 -0.16267457  0.06295094  0.18365179
## wind -0.20798076 0.029832804 -0.09960712 -0.76766612  0.25149104  0.20987368
## rain  0.08249498 0.219961074  0.25544182 -0.38009371 -0.02930796 -0.85482745
## area  0.11591518 0.164180106 -0.16253656  0.14469611  0.93325958 -0.07930061
##            Dim.7       Dim.8       Dim.9       Dim.10        Dim.11
## X    -0.01198926  0.27074028  0.39517786 -0.069947383  0.0240221750
## Y    -0.10293575 -0.23663045 -0.38591376  0.111367498 -0.0155755959
## FFMC  0.28493524  0.41285484 -0.28378448 -0.006834990 -0.1022569972
## DMC  -0.16694964 -0.01536484 -0.10705964 -0.305757184  0.2057998088
## DC   -0.22501042  0.10556106  0.10383841  0.382738483 -0.0081745950
## ISI   0.34451474 -0.34579809  0.19567240  0.090344132  0.1034477434
## temp -0.19674986 -0.19697927  0.12360713 -0.155762542 -0.2952713828
## RH    0.26427502 -0.06359695  0.02025400 -0.053201406 -0.2409021240
## wind -0.49428810  0.03421360 -0.01080188  0.008128276 -0.0604350710
## rain  0.01288877  0.01012276 -0.02962852  0.028415009  0.0393341827
## area  0.18469727 -0.01947281 -0.01103107  0.019420096 -0.0005879271
res.var$contrib 
##           Dim.1        Dim.2      Dim.3      Dim.4       Dim.5      Dim.6
## X     0.5709706 4.598918e+01  1.0674401  0.3050174  0.59224380  0.3632766
## Y     0.4732112 4.479755e+01  0.9656214  1.9373297  1.29103080  0.6824806
## FFMC 17.9012071 2.874570e-01  3.0860609  6.5862319  0.84005666  1.1458620
## DMC  18.3900010 1.218502e+00 18.9077845  0.1373119  0.89530033  2.2705499
## DC   18.6889775 3.567017e-04 15.0166153  3.3165912  0.29458473  0.6351685
## ISI  12.8182340 9.394014e-01  1.6077007 20.8600325  0.87679048  4.1539630
## temp 23.5139649 3.487765e-02  4.2984660  2.6691314  0.21262877  3.2382786
## RH    5.4297716 1.875091e+00 47.2514450  2.1744182  0.40089488  3.6224511
## wind  1.5079931 5.671444e-02  0.7616584 48.4226431  6.39839626  4.7307300
## rain  0.2372510 3.083168e+00  5.0091439 11.8709401  0.08689552 78.4818331
## area  0.4684181 1.717697e+00  2.0280638  1.7203525 88.11117777  0.6754066
##            Dim.7       Dim.8       Dim.9      Dim.10       Dim.11
## X     0.02125514 15.42525895 33.73156252  1.66516274 2.686310e-01
## Y     1.56679190 11.78332820 32.16857099  4.22114883 1.129330e-01
## FFMC 12.00524890 35.86918138 17.39516426  0.01589972 4.867630e+00
## DMC   4.12144801  0.04968015  2.47572732 31.81757538 1.971612e+01
## DC    7.48659109  2.34495341  2.32898848 49.85606565 3.110742e-02
## ISI  17.55070088 25.16354950  8.27009262  2.77788102 4.981654e+00
## temp  5.72410665  8.16521762  3.30018489  8.25733288 4.058581e+01
## RH   10.32740197  0.85113651  0.08860800  0.96329644 2.701548e+01
## wind 36.12760443  0.24633400  0.02520289  0.02248592 1.700237e+00
## rain  0.02456415  0.02156375  0.18961431  0.27479533 7.202309e-01
## area  5.04428690  0.07979654  0.02628372  0.12835609 1.609083e-04
res.var$cos2 
##            Dim.1        Dim.2       Dim.3       Dim.4        Dim.5       Dim.6
## X    0.016377993 7.216891e-01 0.013904776 0.003712110 0.0058542927 0.003382402
## Y    0.013573816 7.029894e-01 0.012578457 0.023577611 0.0127617581 0.006354452
## FFMC 0.513486795 4.510943e-03 0.040199902 0.080155491 0.0083039071 0.010668911
## DMC  0.527507595 1.912145e-02 0.246298146 0.001671108 0.0088499873 0.021140674
## DC   0.536083580 5.597571e-06 0.195610676 0.040363443 0.0029119515 0.005913937
## ISI  0.367684362 1.474164e-02 0.020942363 0.253869917 0.0086670186 0.038676788
## temp 0.674485830 5.473204e-04 0.055993033 0.032483754 0.0021018219 0.030151019
## RH   0.155750169 2.942503e-02 0.615510679 0.026463016 0.0039628206 0.033727978
## wind 0.043255997 8.899962e-04 0.009921577 0.589311277 0.0632477438 0.044046959
## rain 0.006805421 4.838287e-02 0.065250525 0.144471231 0.0008589568 0.730729961
## area 0.013436329 2.695511e-02 0.026418132 0.020936964 0.8709734397 0.006288587
##             Dim.7        Dim.8        Dim.9       Dim.10       Dim.11
## X    0.0001437424 0.0733003004 0.1561655432 4.892636e-03 5.770649e-04
## Y    0.0105957688 0.0559939706 0.1489294295 1.240272e-02 2.425992e-04
## FFMC 0.0811880895 0.1704491171 0.0805336330 4.671709e-05 1.045649e-02
## DMC  0.0278721826 0.0002360784 0.0114617668 9.348746e-02 4.235356e-02
## DC   0.0506296898 0.0111431380 0.0107824164 1.464887e-01 6.682400e-05
## ISI  0.1186904066 0.1195763224 0.0382876870 8.162062e-03 1.070144e-02
## temp 0.0387105080 0.0388008335 0.0152787220 2.426197e-02 8.718519e-02
## RH   0.0698412871 0.0040445714 0.0004102246 2.830390e-03 5.803383e-02
## wind 0.2443207305 0.0011705707 0.0001166807 6.606887e-05 3.652398e-03
## rain 0.0001661203 0.0001024702 0.0008778491 8.074127e-04 1.547178e-03
## area 0.0341130800 0.0003791904 0.0001216846 3.771401e-04 3.456583e-07
res.ind <- get_pca_ind(PC)
head(res.ind$coord) 
##        Dim.1     Dim.2      Dim.3       Dim.4       Dim.5      Dim.6      Dim.7
## 1 -3.3111267 0.8065462 -0.6469244 -1.05215166  0.09012872  0.3179227 -0.3302591
## 2 -0.2560234 0.1626998 -0.6168351  1.58821969 -0.75057626 -0.6038570  0.7190309
## 3 -0.4807794 0.1715222 -0.4307642  1.35507053 -0.65094627 -0.4246828  0.6789872
## 4 -3.0842255 2.2613875  1.1840076 -1.16676201 -0.42353215  0.3104463  2.0794635
## 5 -2.7093024 2.1608966  1.3339775  0.06970309 -0.67154734  0.5998561  2.5387721
## 6  0.4758850 1.8551412 -1.5796407 -0.73950254 -0.54058097  0.3757952 -0.3689603
##        Dim.8       Dim.9     Dim.10      Dim.11
## 1  0.4658783 -0.05992081 -0.1917307  0.22176275
## 2  0.8567897  0.82587507  0.8194701  0.09473749
## 3  1.0432865  0.70651928  0.9686412  0.49493249
## 4  0.1814565 -0.38587063 -0.3296094 -0.98402165
## 5 -0.3706907 -0.04616543 -0.6096974 -1.03675031
## 6 -0.4159585  0.38094223  0.2852692  0.08410152
head(res.ind$contrib)  
##         Dim.1       Dim.2      Dim.3        Dim.4       Dim.5      Dim.6
## 1 0.739288748 0.080181376 0.06214354 0.1759421474 0.001589505 0.02099736
## 2 0.004419995 0.003262791 0.05649723 0.4008984211 0.110236444 0.07575127
## 3 0.015586701 0.003626233 0.02755295 0.2918347448 0.082913614 0.03746719
## 4 0.641438091 0.630325592 0.20816031 0.2163603915 0.035100097 0.02002140
## 5 0.494968370 0.575549858 0.26423235 0.0007721778 0.088244738 0.07475080
## 6 0.015270967 0.424198284 0.37051485 0.0869145609 0.057181699 0.02933757
##        Dim.7      Dim.8        Dim.9     Dim.10      Dim.11
## 1 0.03119592 0.08834484 0.0015000857 0.02419950 0.044281076
## 2 0.14787113 0.29880279 0.2849636180 0.44206839 0.008081365
## 3 0.13185951 0.44304043 0.2085492971 0.61765948 0.220563115
## 4 1.23677556 0.01340236 0.0622077014 0.07151915 0.871867061
## 5 1.84346859 0.05593189 0.0008904192 0.24471012 0.967808244
## 6 0.03893566 0.07042651 0.0606287946 0.05357139 0.006368668
head(res.ind$cos2)
##        Dim.1       Dim.2      Dim.3        Dim.4        Dim.5       Dim.6
## 1 0.80234169 0.047606505 0.03062773 0.0810148154 0.0005944757 0.007396918
## 2 0.01002780 0.004049680 0.05820826 0.3858936223 0.0861858748 0.055784647
## 3 0.03777996 0.004808509 0.03032836 0.3001190227 0.0692564164 0.029478078
## 4 0.40916890 0.219968421 0.06030028 0.0585564750 0.0077158385 0.004145560
## 5 0.32427004 0.206281298 0.07861201 0.0002146327 0.0199225596 0.015895934
## 6 0.02946360 0.447750125 0.32463731 0.0711477413 0.0380192267 0.018373177
##         Dim.7       Dim.8        Dim.9      Dim.10       Dim.11
## 1 0.007982102 0.015883741 2.627623e-04 0.002690240 0.0035990257
## 2 0.079093634 0.112303923 1.043458e-01 0.102733649 0.0013730622
## 3 0.075351667 0.177900399 8.158640e-02 0.153354186 0.0400370107
## 4 0.186000004 0.001416301 6.404627e-03 0.004673148 0.0416504470
## 5 0.284733987 0.006070370 9.415117e-05 0.016421794 0.0474832264
## 6 0.017710914 0.022510325 1.887991e-02 0.010587454 0.0009202153
fviz_eig(PC)

fviz_pca_ind(PC,
             col.ind = "cos2", # Color by the quality of representation
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)
## Warning: ggrepel: 474 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

fviz_pca_var(PC,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)

PC atvaizduoja 44,4 %

Išvados

  1. LDA modelis atvaizduoja 65,53 % duomenų
  2. LDA 10-fold modelis atvaizduoja 65,77 % duomenų
  3. LDA LOO modelis atvaizduoja 65,54 % duomenų
  4. KNN modelis atvaizduoja 63,46 % duomenų
  5. KNN 10-fold modelis atvaizduoja 61,69 % duomenų
  6. Regression modelis atvaizduoja 67,31 % duomenų
  7. Regression modelis atvaizduoja 63,46 % duomenų
  8. PC pirmosios dvi komponentės modelis atvaizduoja 44,4 % duomenų