Integrantes:
Con una data apropiada, primero haga el análisis exploratorio y luego aplique el Análisis de Componentes Principales.
El conjunto de datos proviene de un estudio sobre incendios forestales en dos regiones de Argelia la región de Bejaia y la región de Sidi Bel-Abbes. La cronología de este conjunto de datos va de junio de 2012 a septiembre de 2012. Este estudio, se centro en determinar las características meteorológicas podían predecir los incendios forestales en estas regiones.
Fecha : (DD/MM/AAAA) Día, mes (‘junio’ a ‘septiembre’), año (2012)
Observaciones de datos meteorológicos
- Temperature : temperatura mediodía (temperatura máxima) en grados Celsius: 22 a 42
- RH : Humedad relativa en %: 21 a 90
- Ws :Velocidad del viento en km/h: 6 a 29
- Lluvia: total del día en mm: 0 a 16,8
Componentes FWI (Fire Weather Index) - Índice meteorológico de incendios
- Índice del Código de Humedad de Combustible Fino (FFMC) del sistema FWI: 28,6 a 92,5
- Índice del Código de Humedad de Duff (DMC) del sistema FWI: 1,1 a 65,9
- Índice del Código de Sequía (DC) del sistema FWI: 7 a 220,4
- Índice de dispersión inicial (ISI) del sistema FWI: 0 a 18,5
- Índice de acumulación (BUI) del sistema FWI: 1,1 a 68
- Índice meteorológico del incendio (FWI) del sistema FWI: 0 a 31,1
- Clases: “Incendio” y “No incendio
library(readr)
library(DT)
Dataset <- read_csv("DatasetN.txt")
dim(Dataset)
[1] 244 14
datatable(Dataset,options = list(searching = FALSE))
summary(Dataset)
day month year Temperature
Length:244 Length:244 Min. :2012 Min. :22.00
Class :character Class :character 1st Qu.:2012 1st Qu.:30.00
Mode :character Mode :character Median :2012 Median :32.00
Mean :2012 Mean :32.17
3rd Qu.:2012 3rd Qu.:35.00
Max. :2012 Max. :42.00
RH Ws Rain FFMC
Min. :21.00 Min. : 6.0 Min. : 0.0000 Min. :28.60
1st Qu.:52.00 1st Qu.:14.0 1st Qu.: 0.0000 1st Qu.:72.08
Median :63.00 Median :15.0 Median : 0.0000 Median :83.50
Mean :61.94 Mean :15.5 Mean : 0.7607 Mean :77.89
3rd Qu.:73.25 3rd Qu.:17.0 3rd Qu.: 0.5000 3rd Qu.:88.30
Max. :90.00 Max. :29.0 Max. :16.8000 Max. :96.00
DMC DC ISI BUI
Min. : 0.70 Min. : 6.90 Min. : 0.00 Min. : 1.10
1st Qu.: 5.80 1st Qu.: 13.28 1st Qu.: 1.40 1st Qu.: 6.00
Median :11.30 Median : 33.10 Median : 3.50 Median :12.45
Mean :14.67 Mean : 49.29 Mean : 4.76 Mean :16.67
3rd Qu.:20.75 3rd Qu.: 68.15 3rd Qu.: 7.30 3rd Qu.:22.52
Max. :65.90 Max. :220.40 Max. :19.00 Max. :68.00
FWI Classes
Min. : 0.000 Length:244
1st Qu.: 0.700 Class :character
Median : 4.450 Mode :character
Mean : 7.049
3rd Qu.:11.375
Max. :31.100
str(Dataset)
spc_tbl_ [244 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ day : chr [1:244] "01" "02" "03" "04" ...
$ month : chr [1:244] "06" "06" "06" "06" ...
$ year : num [1:244] 2012 2012 2012 2012 2012 ...
$ Temperature: num [1:244] 29 29 26 25 27 31 33 30 25 28 ...
$ RH : num [1:244] 57 61 82 89 77 67 54 73 88 79 ...
$ Ws : num [1:244] 18 13 22 13 16 14 13 15 13 12 ...
$ Rain : num [1:244] 0 1.3 13.1 2.5 0 0 0 0 0.2 0 ...
$ FFMC : num [1:244] 65.7 64.4 47.1 28.6 64.8 82.6 88.2 86.6 52.9 73.2 ...
$ DMC : num [1:244] 3.4 4.1 2.5 1.3 3 5.8 9.9 12.1 7.9 9.5 ...
$ DC : num [1:244] 7.6 7.6 7.1 6.9 14.2 22.2 30.5 38.3 38.8 46.3 ...
$ ISI : num [1:244] 1.3 1 0.3 0 1.2 3.1 6.4 5.6 0.4 1.3 ...
$ BUI : num [1:244] 3.4 3.9 2.7 1.7 3.9 7 10.9 13.5 10.5 12.6 ...
$ FWI : num [1:244] 0.5 0.4 0.1 0 0.5 2.5 7.2 7.1 0.3 0.9 ...
$ Classes : chr [1:244] "not fire" "not fire" "not fire" "not fire" ...
- attr(*, "spec")=
.. cols(
.. day = col_character(),
.. month = col_character(),
.. year = col_double(),
.. Temperature = col_double(),
.. RH = col_double(),
.. Ws = col_double(),
.. Rain = col_double(),
.. FFMC = col_double(),
.. DMC = col_double(),
.. DC = col_double(),
.. ISI = col_double(),
.. BUI = col_double(),
.. FWI = col_double(),
.. Classes = col_character()
.. )
- attr(*, "problems")=<externalptr>
Dataset.1=Dataset[, sapply(Dataset, is.numeric)]
head(Dataset.1)
# A tibble: 6 × 11
year Temperature RH Ws Rain FFMC DMC DC ISI BUI FWI
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2012 29 57 18 0 65.7 3.4 7.6 1.3 3.4 0.5
2 2012 29 61 13 1.3 64.4 4.1 7.6 1 3.9 0.4
3 2012 26 82 22 13.1 47.1 2.5 7.1 0.3 2.7 0.1
4 2012 25 89 13 2.5 28.6 1.3 6.9 0 1.7 0
5 2012 27 77 16 0 64.8 3 14.2 1.2 3.9 0.5
6 2012 31 67 14 0 82.6 5.8 22.2 3.1 7 2.5
Dataset.2=Dataset.1[,-1]
head(Dataset.2)
# A tibble: 6 × 10
Temperature RH Ws Rain FFMC DMC DC ISI BUI FWI
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 29 57 18 0 65.7 3.4 7.6 1.3 3.4 0.5
2 29 61 13 1.3 64.4 4.1 7.6 1 3.9 0.4
3 26 82 22 13.1 47.1 2.5 7.1 0.3 2.7 0.1
4 25 89 13 2.5 28.6 1.3 6.9 0 1.7 0
5 27 77 16 0 64.8 3 14.2 1.2 3.9 0.5
6 31 67 14 0 82.6 5.8 22.2 3.1 7 2.5
library(ade4)
library(FactoMineR)
library(psych)
library(car)
library(factoextra)
library(PerformanceAnalytics)
library(GGally)
library(epiDisplay)
tabla2=tab1(Dataset$Classes,cum.percent = TRUE,main="Tabla de Classes",
col = c('green2','yellow'))
ppClasses=round((prop.table(table(Dataset$Classes))*100),1)
ppClasses
fire not fire
56.6 43.4
barplot(ppClasses,ylab = "Porcentaje (%)",
xlab = "Classes",col=c('green2','yellow'),
main="Proporción de incendios",
cex.names =0.75)
Dataset.3<- as.data.frame(scale(Dataset.2))
head(Dataset.3)
Temperature RH Ws Rain FFMC DMC
1 -0.8729411 -0.33179644 0.8881648 -0.3804409 -0.8500537 -0.9114914
2 -0.8729411 -0.06305509 -0.8910816 0.2697523 -0.9407246 -0.8548939
3 -1.6985133 1.34783698 2.3115620 6.1715064 -2.1473446 -0.9842596
4 -1.9737040 1.81813434 -0.8910816 0.8699307 -3.4376608 -1.0812839
5 -1.4233226 1.01191030 0.1764663 -0.3804409 -0.9128258 -0.9438328
6 -0.3225596 0.34005693 -0.5352323 -0.3804409 0.3286676 -0.7174428
DC ISI BUI FWI
1 -0.8754391 -0.8327668 -0.9346352 -0.8816448
2 -0.8754391 -0.9049754 -0.8994280 -0.8951068
3 -0.8859390 -1.0734623 -0.9839253 -0.9354925
4 -0.8901389 -1.1456709 -1.0543396 -0.9489544
5 -0.7368409 -0.8568363 -0.8994280 -0.8816448
6 -0.5688431 -0.3995150 -0.6811435 -0.6124066
library(psych)
pairs.panels(Dataset.3)
Se observa una alta correlación entre DMC Y BUI (0.98).
Se observa una alta correlación entre DC Y BUI (0.94).
Se observa una alta correlación entre ISI Y FWI (0.92).
Se observa una alta correlación entre FWI Y DMC (0.88).
head(Dataset.3)
Temperature RH Ws Rain FFMC DMC
1 -0.8729411 -0.33179644 0.8881648 -0.3804409 -0.8500537 -0.9114914
2 -0.8729411 -0.06305509 -0.8910816 0.2697523 -0.9407246 -0.8548939
3 -1.6985133 1.34783698 2.3115620 6.1715064 -2.1473446 -0.9842596
4 -1.9737040 1.81813434 -0.8910816 0.8699307 -3.4376608 -1.0812839
5 -1.4233226 1.01191030 0.1764663 -0.3804409 -0.9128258 -0.9438328
6 -0.3225596 0.34005693 -0.5352323 -0.3804409 0.3286676 -0.7174428
DC ISI BUI FWI
1 -0.8754391 -0.8327668 -0.9346352 -0.8816448
2 -0.8754391 -0.9049754 -0.8994280 -0.8951068
3 -0.8859390 -1.0734623 -0.9839253 -0.9354925
4 -0.8901389 -1.1456709 -1.0543396 -0.9489544
5 -0.7368409 -0.8568363 -0.8994280 -0.8816448
6 -0.5688431 -0.3995150 -0.6811435 -0.6124066
round(cov(Dataset.3),3)
Temperature RH Ws Rain FFMC DMC DC ISI BUI
Temperature 1.000 -0.654 -0.278 -0.327 0.677 0.483 0.370 0.606 0.456
RH -0.654 1.000 0.236 0.223 -0.646 -0.405 -0.220 -0.688 -0.350
Ws -0.278 0.236 1.000 0.170 -0.163 -0.001 0.076 0.012 0.030
Rain -0.327 0.223 0.170 1.000 -0.544 -0.289 -0.297 -0.348 -0.299
FFMC 0.677 -0.646 -0.163 -0.544 1.000 0.602 0.504 0.741 0.590
DMC 0.483 -0.405 -0.001 -0.289 0.602 1.000 0.875 0.678 0.982
DC 0.370 -0.220 0.076 -0.297 0.504 0.875 1.000 0.504 0.942
ISI 0.606 -0.688 0.012 -0.348 0.741 0.678 0.504 1.000 0.641
BUI 0.456 -0.350 0.030 -0.299 0.590 0.982 0.942 0.641 1.000
FWI 0.567 -0.580 0.034 -0.325 0.691 0.875 0.737 0.922 0.857
FWI
Temperature 0.567
RH -0.580
Ws 0.034
Rain -0.325
FFMC 0.691
DMC 0.875
DC 0.737
ISI 0.922
BUI 0.857
FWI 1.000
sum(diag(cov(Dataset.3)))
[1] 10
library(corrplot)
i=cor(Dataset.3,method="pearson")
corrplot(i,sig.level=0.05,type="lower")
Ho: ρ = 0 (No hay correlación), si pvalor ≥ 0.05
H1: ρ ≠ 0 (Hay correlación), si pvalor < 0.05
options(scipen = 999)
res1=cor.mtest(Dataset.3,conf.level=0.05)
round(res1$p,4)
Temperature RH Ws Rain FFMC DMC DC ISI BUI
Temperature 0 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
RH 0 0.0000 0.0002 0.0004 0.0000 0.0000 0.0005 0.0000 0.0000
Ws 0 0.0002 0.0000 0.0077 0.0106 0.9846 0.2354 0.8491 0.6376
Rain 0 0.0004 0.0077 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
FFMC 0 0.0000 0.0106 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
DMC 0 0.0000 0.9846 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
DC 0 0.0005 0.2354 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
ISI 0 0.0000 0.8491 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
BUI 0 0.0000 0.6376 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
FWI 0 0.0000 0.5976 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
FWI
Temperature 0.0000
RH 0.0000
Ws 0.5976
Rain 0.0000
FFMC 0.0000
DMC 0.0000
DC 0.0000
ISI 0.0000
BUI 0.0000
FWI 0.0000
library(ade4)
acp = dudi.pca(Dataset.2,
scannf=FALSE, scale=TRUE,
nf=ncol(Dataset.2))
summary(acp)
Class: pca dudi
Call: dudi.pca(df = Dataset.2, scale = TRUE, scannf = FALSE, nf = ncol(Dataset.2))
Total inertia: 10
Eigenvalues:
Ax1 Ax2 Ax3 Ax4 Ax5
5.7174 1.5839 0.9256 0.8003 0.3900
Projected inertia (%):
Ax1 Ax2 Ax3 Ax4 Ax5
57.174 15.839 9.256 8.003 3.900
Cumulative projected inertia (%):
Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
57.17 73.01 82.27 90.27 94.17
(Only 5 dimensions (out of 10) are shown)
round(acp$eig,2)
[1] 5.72 1.58 0.93 0.80 0.39 0.25 0.22 0.09 0.02 0.00
library(factoextra)
fviz_eig(acp, addlabels=TRUE, hjust = 0.5)
fviz_pca_var(acp, col.var="steelblue")+theme_minimal()
Las puntuaciones de cada fecha en el análisis de componentes principales (PCA) se refieren a las coordenadas de las observaciones en el espacio de las componentes principales.
acp$li[1:10,]
Axis1 Axis2 Axis3 Axis4 Axis5 Axis6
1 -2.0544697 0.15360677 0.29609889 1.04285789 -0.1653572 -0.68683047
2 -2.2161034 0.79492998 -0.27716281 -0.52407327 -0.5838387 -0.33180045
3 -4.7242409 -3.25197605 4.16131985 -2.44639685 0.3913658 0.04268903
4 -4.3142345 -0.96058309 -0.77132302 -1.26265331 -1.2427533 1.09608181
5 -2.5479533 -0.27642615 -0.69812965 0.67651066 -0.5650277 0.01838239
6 -1.0762858 0.85805206 -0.57484908 0.34228397 -0.1179349 -0.07676674
7 0.3039348 1.42798533 -0.09344935 0.09801593 -0.3476391 -0.01543568
8 -0.2510460 0.03934705 -0.53279663 0.52912886 -0.4857654 0.28296735
9 -2.7751105 -0.88108741 -1.78390506 -0.44418824 -1.1118603 0.38005032
10 -1.5658021 0.05972506 -1.70464213 -0.42402949 -0.6093461 -0.02133149
Axis7 Axis8 Axis9 Axis10
1 0.75087013 0.101648202 0.059736998 -0.0008155778
2 0.35071706 -0.003679111 0.046894292 0.0026325908
3 -1.16454289 -0.146193233 0.025166852 0.0306828252
4 1.19374926 -0.253084043 -0.048473316 -0.0151257720
5 0.09934575 -0.034058910 0.033125807 0.0055057342
6 -0.48953889 -0.065502776 0.021956382 0.0061091708
7 -0.36404297 -0.157865933 0.001070873 0.0122211893
8 -0.83950829 -0.009834631 0.006825318 0.0235482529
9 0.34016695 -0.133855714 -0.133347771 -0.0333951719
10 -0.40177879 -0.128631394 -0.110525469 -0.0266855698
library(psych)
describe(acp$li)
vars n mean sd median trimmed mad min max range skew kurtosis
Axis1 1 244 0 2.40 -0.25 -0.09 2.49 -5.27 6.45 11.71 0.38 -0.31
Axis2 2 244 0 1.26 0.12 0.07 1.17 -4.27 2.65 6.92 -0.59 0.45
Axis3 3 244 0 0.96 -0.11 -0.09 0.79 -1.78 5.15 6.93 1.42 4.20
Axis4 4 244 0 0.90 0.08 0.06 0.80 -4.00 3.11 7.11 -0.87 2.82
Axis5 5 244 0 0.63 0.03 0.02 0.57 -2.01 1.69 3.69 -0.30 0.25
Axis6 6 244 0 0.50 0.04 0.01 0.46 -1.59 1.33 2.92 -0.28 0.30
Axis7 7 244 0 0.47 0.01 0.01 0.48 -1.94 1.45 3.39 -0.14 0.90
Axis8 8 244 0 0.30 -0.01 0.00 0.21 -1.05 1.25 2.29 0.10 2.23
Axis9 9 244 0 0.13 0.02 0.01 0.04 -1.48 0.23 1.71 -6.94 74.51
Axis10 10 244 0 0.06 0.00 0.00 0.03 -0.16 0.80 0.96 8.56 111.12
se
Axis1 0.15
Axis2 0.08
Axis3 0.06
Axis4 0.06
Axis5 0.04
Axis6 0.03
Axis7 0.03
Axis8 0.02
Axis9 0.01
Axis10 0.00
La media en las componentes es cercana a cero, lo que indica que las puntuaciones tienden a centrarse alrededor de cero en estas dimensiones; ello se da porque los datos centrados garantizan que las componentes principales capturen la variabilidad de manera óptima
Se observa una alta correlación entre DC Y BUI (0.94).
Se observa una alta correlación entre ISI Y FWI (0.92).
Se observa una alta correlación entre FWI Y DMC (0.88).
fviz_pca_ind(acp, repel = F,col.ind = 'steelblue')
library (adegenet)
library (factoextra)
grp <- find.clusters(Dataset.3, n.pca = 2, n.clust = 2)
grp$size
[1] 97 147
dapc.WIDIV <- dapc(Dataset.3, grp$grp, n.pca = 2,n.da=1)
scatter(dapc.WIDIV, posi.da = "bottomright", bg = "white", pch = 17:22,
cstar = 0)
contrib <- loadingplot(dapc.WIDIV$var.contr, axis=1,
thres=.10, lab.jitter=1)
dapc.WIDIV$var.contr
LD1
Temperature 0.05057852
RH 0.03749063
Ws 0.00407334
Rain 0.02283046
FFMC 0.08813542
DMC 0.17584803
DC 0.15698164
ISI 0.11699165
BUI 0.17994180
FWI 0.16712851
respca.f = PCA(Dataset.2, scale.unit=TRUE, ncp=10, graph=TRUE)
summary(respca.f)
Call:
PCA(X = Dataset.2, scale.unit = TRUE, ncp = 10, graph = TRUE)
Eigenvalues
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
Variance 5.717 1.584 0.926 0.800 0.390 0.249 0.222
% of var. 57.174 15.839 9.256 8.003 3.900 2.485 2.219
Cumulative % of var. 57.174 73.014 82.269 90.272 94.172 96.657 98.876
Dim.8 Dim.9 Dim.10
Variance 0.092 0.016 0.004
% of var. 0.924 0.161 0.039
Cumulative % of var. 99.800 99.961 100.000
Individuals (the 10 first)
Dist Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3
1 | 2.549 | -2.054 0.303 0.650 | -0.154 0.006 0.004 | 0.296
2 | 2.544 | -2.216 0.352 0.759 | -0.795 0.164 0.098 | -0.277
3 | 7.598 | -4.724 1.600 0.387 | 3.252 2.736 0.183 | 4.161
4 | 5.095 | -4.314 1.334 0.717 | 0.961 0.239 0.036 | -0.771
5 | 2.801 | -2.548 0.465 0.828 | 0.276 0.020 0.010 | -0.698
6 | 1.614 | -1.076 0.083 0.444 | -0.858 0.191 0.282 | -0.575
7 | 1.558 | 0.304 0.007 0.038 | -1.428 0.528 0.840 | -0.093
8 | 1.284 | -0.251 0.005 0.038 | -0.039 0.000 0.001 | -0.533
9 | 3.659 | -2.775 0.552 0.575 | 0.881 0.201 0.058 | -1.784
10 | 2.471 | -1.566 0.176 0.402 | -0.060 0.001 0.001 | -1.705
ctr cos2
1 0.039 0.013 |
2 0.034 0.012 |
3 7.668 0.300 |
4 0.263 0.023 |
5 0.216 0.062 |
6 0.146 0.127 |
7 0.004 0.004 |
8 0.126 0.172 |
9 1.409 0.238 |
10 1.287 0.476 |
Variables
Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr
Temperature | 0.714 8.906 0.509 | -0.441 12.261 0.194 | 0.109 1.274
RH | -0.662 7.665 0.438 | 0.506 16.186 0.256 | -0.398 17.146
Ws | -0.094 0.154 0.009 | 0.664 27.867 0.441 | 0.447 21.544
Rain | -0.470 3.857 0.221 | 0.270 4.603 0.073 | 0.589 37.505
FFMC | 0.834 12.167 0.696 | -0.294 5.469 0.087 | -0.030 0.098
DMC | 0.895 14.011 0.801 | 0.339 7.235 0.115 | -0.094 0.959
DC | 0.787 10.824 0.619 | 0.477 14.361 0.227 | -0.246 6.542
ISI | 0.871 13.255 0.758 | -0.098 0.609 0.010 | 0.306 10.101
BUI | 0.884 13.678 0.782 | 0.399 10.028 0.159 | -0.146 2.315
FWI | 0.941 15.484 0.885 | 0.148 1.381 0.022 | 0.153 2.517
cos2
Temperature 0.012 |
RH 0.159 |
Ws 0.199 |
Rain 0.347 |
FFMC 0.001 |
DMC 0.009 |
DC 0.061 |
ISI 0.093 |
BUI 0.021 |
FWI 0.023 |
Conclusiones:
*Gráfica PCA individual
-Las observaciones 173, 201 y 235, presenta mas cercania con la COMP1, esto indica que estan mas correlacionadas con esa COMPONENTE.
-Las observaciones 112, 168 y 176, presentan mas cercania con la COMP2, eso indica que es tan mas correlacionadas con esa COMPONENTE.
Gráfica PCA variables
las variables FWI e ISI son las que presentan mas cercania con kla COMP1, por lo que indica que existe mas correclacion con esa COMPONENTE.
La variable Ws esta mas cercano a la COMP2, esto indica que esta mas correlacionado con la COMP2.
Variables:
Las variables FWI y DMC explican el 15.484% y 14.011% de la variabilidad de la COMP1. Ambas variables son las que explican mas a esa componente.
La variable WS es la que explica mas con 27.867% de la variabilidad de la COMP2. Le sigue la variable RH con 16.86% de la variablidad de la COMP.2
La COMP1 explica 50.9% de la variable Temperatura, la COMP2 el 19.4%, la COMP3 el 1.2%.
## se tipifica porque no tienen varianza 1 y media 0 (normal estandarizada)
plot(respca.f, label = "none")
fviz_pca_var(respca.f, col.var = "steelblue")
respca.f$eig
eigenvalue percentage of variance cumulative percentage of variance
comp 1 5.717432571 57.17432571 57.17433
comp 2 1.583922773 15.83922773 73.01355
comp 3 0.925567905 9.25567905 82.26923
comp 4 0.800262738 8.00262738 90.27186
comp 5 0.389989518 3.89989518 94.17176
comp 6 0.248549203 2.48549203 96.65725
comp 7 0.221916672 2.21916672 98.87641
comp 8 0.092379425 0.92379425 99.80021
comp 9 0.016124848 0.16124848 99.96146
comp 10 0.003854347 0.03854347 100.00000
respca.f$var
$coord
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
Temperature 0.71356004 -0.44069554 0.10860698 0.092336126 0.48972495
RH -0.66197722 0.50633312 -0.39836448 -0.040955352 0.10109273
Ws -0.09381214 0.66437506 0.44654557 -0.556390319 0.18735800
Rain -0.46962242 0.27001302 0.58917859 0.574714280 0.05070323
FFMC 0.83405573 -0.29431803 -0.03007563 -0.211177601 0.06395776
DMC 0.89501483 0.33851930 -0.09420141 0.183501573 -0.02701680
DC 0.78667881 0.47693233 -0.24607253 0.144593234 0.10774844
ISI 0.87053344 -0.09822417 0.30576974 -0.149193976 -0.23243340
BUI 0.88431591 0.39853760 -0.14636840 0.169555596 0.01627007
FWI 0.94090260 0.14789405 0.15262645 0.002965365 -0.17760966
Dim.6 Dim.7 Dim.8 Dim.9 Dim.10
Temperature 0.173819045 -0.07890029 -0.005345948 -0.0007979015 -0.0001573676
RH 0.310596567 0.19564128 -0.008463698 0.0019377020 0.0006781233
Ws -0.060423828 -0.03925156 -0.023154179 -0.0010240204 0.0001295950
Rain -0.009875458 0.16195267 0.014920535 0.0003648080 0.0002802592
FFMC -0.149209992 0.38134505 -0.018062897 -0.0109907302 0.0014424623
DMC -0.020437033 -0.03429713 -0.193715047 0.0307700474 0.0317839834
DC -0.120240386 -0.02523883 0.212434123 0.0104111689 0.0164916900
ISI 0.225882669 0.03702586 0.071527110 0.0722586739 -0.0035962286
BUI -0.049763699 -0.02091704 -0.056988999 0.0057405024 -0.0502092922
FWI 0.165767612 -0.02451073 0.013287420 -0.0984328258 0.0059651093
$cor
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
Temperature 0.71356004 -0.44069554 0.10860698 0.092336126 0.48972495
RH -0.66197722 0.50633312 -0.39836448 -0.040955352 0.10109273
Ws -0.09381214 0.66437506 0.44654557 -0.556390319 0.18735800
Rain -0.46962242 0.27001302 0.58917859 0.574714280 0.05070323
FFMC 0.83405573 -0.29431803 -0.03007563 -0.211177601 0.06395776
DMC 0.89501483 0.33851930 -0.09420141 0.183501573 -0.02701680
DC 0.78667881 0.47693233 -0.24607253 0.144593234 0.10774844
ISI 0.87053344 -0.09822417 0.30576974 -0.149193976 -0.23243340
BUI 0.88431591 0.39853760 -0.14636840 0.169555596 0.01627007
FWI 0.94090260 0.14789405 0.15262645 0.002965365 -0.17760966
Dim.6 Dim.7 Dim.8 Dim.9 Dim.10
Temperature 0.173819045 -0.07890029 -0.005345948 -0.0007979015 -0.0001573676
RH 0.310596567 0.19564128 -0.008463698 0.0019377020 0.0006781233
Ws -0.060423828 -0.03925156 -0.023154179 -0.0010240204 0.0001295950
Rain -0.009875458 0.16195267 0.014920535 0.0003648080 0.0002802592
FFMC -0.149209992 0.38134505 -0.018062897 -0.0109907302 0.0014424623
DMC -0.020437033 -0.03429713 -0.193715047 0.0307700474 0.0317839834
DC -0.120240386 -0.02523883 0.212434123 0.0104111689 0.0164916900
ISI 0.225882669 0.03702586 0.071527110 0.0722586739 -0.0035962286
BUI -0.049763699 -0.02091704 -0.056988999 0.0057405024 -0.0502092922
FWI 0.165767612 -0.02451073 0.013287420 -0.0984328258 0.0059651093
$cos2
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
Temperature 0.509167927 0.194212556 0.0117954771 0.00852596011 0.2398305218
RH 0.438213845 0.256373231 0.1586942573 0.00167734082 0.0102197398
Ws 0.008800718 0.441394223 0.1994029455 0.30957018725 0.0351030198
Rain 0.220545214 0.072907030 0.3471314097 0.33029650377 0.0025708172
FFMC 0.695648963 0.086623104 0.0009045434 0.04459597935 0.0040905947
DMC 0.801051540 0.114595319 0.0088739047 0.03367282737 0.0007299073
DC 0.618863558 0.227464449 0.0605516898 0.02090720345 0.0116097258
ISI 0.757828478 0.009647987 0.0934951359 0.02225884240 0.0540252861
BUI 0.782014633 0.158832222 0.0214237088 0.02874910027 0.0002647153
FWI 0.885297696 0.021872651 0.0232948331 0.00000879339 0.0315451905
Dim.6 Dim.7 Dim.8 Dim.9
Temperature 0.03021306057 0.0062252561 0.00002857916 0.0000006366468
RH 0.09647022740 0.0382755102 0.00007163419 0.0000037546889
Ws 0.00365103893 0.0015406852 0.00053611600 0.0000010486179
Rain 0.00009752468 0.0262286666 0.00022262236 0.0000001330849
FFMC 0.02226362160 0.1454240492 0.00032626823 0.0001207961511
DMC 0.00041767232 0.0011762931 0.03752551935 0.0009467958189
DC 0.01445775036 0.0006369988 0.04512825680 0.0001083924382
ISI 0.05102298001 0.0013709142 0.00511612743 0.0052213159489
BUI 0.00247642573 0.0004375227 0.00324774599 0.0000329533675
FWI 0.02747890122 0.0006007758 0.00017655553 0.0096890211897
Dim.10
Temperature 0.00000002476457
RH 0.00000045985122
Ws 0.00000001679487
Rain 0.00000007854524
FFMC 0.00000208069749
DMC 0.00101022160035
DC 0.00027197583927
ISI 0.00001293286001
BUI 0.00252097301854
FWI 0.00003558252860
$contrib
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
Temperature 8.9055345 12.2614915 1.27440429 1.065395114 61.49665837
RH 7.6645214 16.1859678 17.14560935 0.209598765 2.62051652
Ws 0.1539278 27.8671556 21.54384831 38.683568843 9.00101622
Rain 3.8574170 4.6029410 37.50469390 41.273507813 0.65920162
FFMC 12.1671564 5.4688969 0.09772847 5.572667228 1.04889864
DMC 14.0106863 7.2349057 0.95875242 4.207721510 0.18716075
DC 10.8241514 14.3608295 6.54211209 2.612542413 2.97693277
ISI 13.2546990 0.6091198 10.10138049 2.781441811 13.85300978
BUI 13.6777238 10.0277756 2.31465554 3.592457689 0.06787754
FWI 15.4841825 1.3809165 2.51681513 0.001098813 8.08872778
Dim.6 Dim.7 Dim.8 Dim.9 Dim.10
Temperature 12.15576644 2.8052224 0.03093671 0.0039482342 0.0006425101
RH 38.81333205 17.2476948 0.07754345 0.0232851122 0.0119307181
Ws 1.46894011 0.6942629 0.58034135 0.0065031178 0.0004357384
Rain 0.03923757 11.8191510 0.24098696 0.0008253405 0.0020378355
FFMC 8.95743030 65.5309256 0.35318279 0.7491304816 0.0539831459
DMC 0.16804412 0.5300607 40.62107913 5.8716573430 26.2099320938
DC 5.81685646 0.2870441 48.85098254 0.6722075053 7.0563411790
ISI 20.52832173 0.6177608 5.53816765 32.3805592722 0.3355396306
BUI 0.99635231 0.1971563 3.51565945 0.2043638960 65.4059778601
FWI 11.05571891 0.2707214 0.19111997 60.0875196971 0.9231792885
Conclusiones:
## para confirmar la retencion de dos componentes
library(paran)
paran(Dataset.2,iterations=5000,graph=TRUE,color=2)
Using eigendecomposition of correlation matrix.
Computing: 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
Results of Horn's Parallel Analysis for component retention
5000 iterations, using the mean estimate
--------------------------------------------------
Component Adjusted Unadjusted Estimated
Eigenvalue Eigenvalue Bias
--------------------------------------------------
1 5.387352 5.717432 0.330079
2 1.358137 1.583922 0.225785
--------------------------------------------------
Adjusted eigenvalues > 1 indicate dimensions to retain.
(2 components retained)
library(pacman)
p_load(dplyr,corrplot,DataExplorer,psych,gtools,mvnormtest)
library(readr)
datos=read.delim('DatasetN.txt',header = TRUE,sep =",")
str(datos)
'data.frame': 244 obs. of 14 variables:
$ day : int 1 2 3 4 5 6 7 8 9 10 ...
$ month : int 6 6 6 6 6 6 6 6 6 6 ...
$ year : int 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 ...
$ Temperature: int 29 29 26 25 27 31 33 30 25 28 ...
$ RH : int 57 61 82 89 77 67 54 73 88 79 ...
$ Ws : int 18 13 22 13 16 14 13 15 13 12 ...
$ Rain : num 0 1.3 13.1 2.5 0 0 0 0 0.2 0 ...
$ FFMC : num 65.7 64.4 47.1 28.6 64.8 82.6 88.2 86.6 52.9 73.2 ...
$ DMC : num 3.4 4.1 2.5 1.3 3 5.8 9.9 12.1 7.9 9.5 ...
$ DC : num 7.6 7.6 7.1 6.9 14.2 22.2 30.5 38.3 38.8 46.3 ...
$ ISI : num 1.3 1 0.3 0 1.2 3.1 6.4 5.6 0.4 1.3 ...
$ BUI : num 3.4 3.9 2.7 1.7 3.9 7 10.9 13.5 10.5 12.6 ...
$ FWI : num 0.5 0.4 0.1 0 0.5 2.5 7.2 7.1 0.3 0.9 ...
$ Classes : chr "not fire " "not fire " "not fire " "not fire " ...
plot_missing(datos)
colSums(is.na(datos))
day month year Temperature RH Ws
0 0 0 0 0 0
Rain FFMC DMC DC ISI BUI
0 0 0 0 0 0
FWI Classes
0 0
correl=round(cor(datos_num),2)
round(solve(correl),3)
Temperature RH Ws Rain FFMC DMC DC ISI
Temperature 2.325 0.701 0.394 -0.028 -0.725 0.548 0.021 -0.459
RH 0.701 2.821 -0.367 0.420 0.835 1.929 0.172 1.340
Ws 0.394 -0.367 1.296 -0.238 0.094 0.467 -0.467 -0.933
Rain -0.028 0.420 -0.238 1.554 0.974 0.313 0.705 0.595
FFMC -0.725 0.835 0.094 0.974 4.062 1.342 0.398 -3.648
DMC 0.548 1.929 0.467 0.313 1.342 43.135 13.147 0.722
DC 0.021 0.172 -0.467 0.705 0.398 13.147 14.877 1.200
ISI -0.459 1.340 -0.933 0.595 -3.648 0.722 1.200 23.318
BUI -0.911 -2.550 -0.184 -0.824 -4.150 -52.880 -27.933 10.727
FWI 0.269 -0.059 0.334 -0.543 3.853 -2.909 0.393 -28.427
BUI FWI
Temperature -0.911 0.269
RH -2.550 -0.059
Ws -0.184 0.334
Rain -0.824 -0.543
FFMC -4.150 3.853
DMC -52.880 -2.909
DC -27.933 0.393
ISI 10.727 -28.427
BUI 89.183 -17.716
FWI -17.716 41.628
Ho: Las variables no están correlacionadas (ρ = 0)
H1: Las variables estan correlacionadas (ρ ≠ 0)
round(cor.mtest(datos_num,alpha=0.05)$p,3)
Temperature RH Ws Rain FFMC DMC DC ISI BUI FWI
Temperature 0 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
RH 0 0.000 0.000 0.000 0.000 0.000 0.001 0.000 0.000 0.000
Ws 0 0.000 0.000 0.008 0.011 0.985 0.235 0.849 0.638 0.598
Rain 0 0.000 0.008 0.000 0.000 0.000 0.000 0.000 0.000 0.000
FFMC 0 0.000 0.011 0.000 0.000 0.000 0.000 0.000 0.000 0.000
DMC 0 0.000 0.985 0.000 0.000 0.000 0.000 0.000 0.000 0.000
DC 0 0.001 0.235 0.000 0.000 0.000 0.000 0.000 0.000 0.000
ISI 0 0.000 0.849 0.000 0.000 0.000 0.000 0.000 0.000 0.000
BUI 0 0.000 0.638 0.000 0.000 0.000 0.000 0.000 0.000 0.000
FWI 0 0.000 0.598 0.000 0.000 0.000 0.000 0.000 0.000 0.000
res1=cor.mtest(datos_num,conf.level=0.05)
i=cor(datos_num,method="pearson")
corrplot(i, p.mat = res1$p, sig.level = 0.05)
En está prueba de aceptar la hipótesis nula se determina que la matriz de correlación de las variables es una matriz de identidad, lo que implica que no están correlacionadas.
options(scipen=0)
esfer=cortest.bartlett(cor(datos_num),n=dim(datos_num))
str(esfer)
List of 3
$ chisq : num [1:2] 3344.6 67.7
$ p.value: num [1:2] 0 0.016
$ df : num 45
esfer$p.value[1]
[1] 0
mshapiro.test(t(datos_num))
Shapiro-Wilk normality test
data: Z
W = 0.35548, p-value < 2.2e-16
Con un valor de p-value menor que 2.2e-16, se rechaza la hipótesis nula, lo que lleva a la conclusión de que no se cumple la normalidad multivariada
Dado que no vamos a llevar a cabo un proceso de inferencia estadística, podemos continuar con el análisis factorial, incluso si no se cumple la suposición de normalidad multivariada.
kmo = psych::KMO(datos_num)
kmo$MSA #KMO general
[1] 0.7697472
kmo$MSAi #KMO por cada variable
Temperature RH Ws Rain FFMC DMC
0.9234204 0.8928050 0.5919317 0.7998613 0.8439772 0.7269029
DC ISI BUI FWI
0.6899307 0.7633394 0.6777123 0.8035108
Dado que el índice KMO general (0.7697472) y los valores individuales para cada variable son todos mayores que 0.5, podemos concluir que los datos son adecuados para realizar un análisis factorial.
De encontrarse variables con un índice KMO por debajo de 0.5, es recomendable considerar la eliminación de dichas variables, comenzando por la variable que tenga el índice más bajo.
datos_num_sr <- principal(r=datos_num,nfactors=2,rotate="none")
datos_num_sr$values
[1] 5.717432571 1.583922773 0.925567905 0.800262738 0.389989518 0.248549203
[7] 0.221916672 0.092379425 0.016124848 0.003854347
print(datos_num_sr)
Principal Components Analysis
Call: principal(r = datos_num, nfactors = 2, rotate = "none")
Standardized loadings (pattern matrix) based upon correlation matrix
PC1 PC2 h2 u2 com
Temperature 0.71 -0.44 0.70 0.297 1.7
RH -0.66 0.51 0.69 0.305 1.9
Ws -0.09 0.66 0.45 0.550 1.0
Rain -0.47 0.27 0.29 0.707 1.6
FFMC 0.83 -0.29 0.78 0.218 1.2
DMC 0.90 0.34 0.92 0.084 1.3
DC 0.79 0.48 0.85 0.154 1.6
ISI 0.87 -0.10 0.77 0.233 1.0
BUI 0.88 0.40 0.94 0.059 1.4
FWI 0.94 0.15 0.91 0.093 1.0
PC1 PC2
SS loadings 5.72 1.58
Proportion Var 0.57 0.16
Cumulative Var 0.57 0.73
Proportion Explained 0.78 0.22
Cumulative Proportion 0.78 1.00
Mean item complexity = 1.4
Test of the hypothesis that 2 components are sufficient.
The root mean square of the residuals (RMSR) is 0.08
with the empirical chi square 156.63 with prob < 1.3e-20
Fit based upon off diagonal values = 0.98
plot(datos_num_sr$values,type="b",pch=20,col="steelblue",lwd=2)
abline(h=1,lty=3,col="tomato",lwd=2)
comunalidad = data.frame(comunalidad = datos_num_sr$communality)
comunalidad$variables = rownames(comunalidad)
comunalidad = dplyr::arrange(comunalidad, desc(comunalidad))
print(comunalidad)
comunalidad variables
BUI 0.9408469 BUI
DMC 0.9156469 DMC
FWI 0.9071703 FWI
DC 0.8463280 DC
FFMC 0.7822721 FFMC
ISI 0.7674765 ISI
Temperature 0.7033805 Temperature
RH 0.6945871 RH
Ws 0.4501949 Ws
Rain 0.2934522 Rain
Conclusiones:
Representan la contribución de cada variable a los componentes principales
datos_num_sr$loadings# Se omiten los valores menores de 0.10
Loadings:
PC1 PC2
Temperature 0.714 -0.441
RH -0.662 0.506
Ws 0.664
Rain -0.470 0.270
FFMC 0.834 -0.294
DMC 0.895 0.339
DC 0.787 0.477
ISI 0.871
BUI 0.884 0.399
FWI 0.941 0.148
PC1 PC2
SS loadings 5.717 1.584
Proportion Var 0.572 0.158
Cumulative Var 0.572 0.730
datos_num_cr <- principal(r=datos_num,nfactors=2,rotate="varimax")
datos_num_cr
Principal Components Analysis
Call: principal(r = datos_num, nfactors = 2, rotate = "varimax")
Standardized loadings (pattern matrix) based upon correlation matrix
RC1 RC2 h2 u2 com
Temperature 0.33 0.77 0.70 0.297 1.3
RH -0.25 -0.80 0.69 0.305 1.2
Ws 0.31 -0.60 0.45 0.550 1.5
Rain -0.23 -0.49 0.29 0.707 1.4
FFMC 0.51 0.72 0.78 0.218 1.8
DMC 0.93 0.24 0.92 0.084 1.1
DC 0.92 0.07 0.85 0.154 1.0
ISI 0.65 0.58 0.77 0.233 2.0
BUI 0.95 0.19 0.94 0.059 1.1
FWI 0.85 0.42 0.91 0.093 1.5
RC1 RC2
SS loadings 4.33 2.97
Proportion Var 0.43 0.30
Cumulative Var 0.43 0.73
Proportion Explained 0.59 0.41
Cumulative Proportion 0.59 1.00
Mean item complexity = 1.4
Test of the hypothesis that 2 components are sufficient.
The root mean square of the residuals (RMSR) is 0.08
with the empirical chi square 156.63 with prob < 1.3e-20
Fit based upon off diagonal values = 0.98
#Comunalidad sin rotación
datos_num_sr$communality
Temperature RH Ws Rain FFMC DMC
0.7033805 0.6945871 0.4501949 0.2934522 0.7822721 0.9156469
DC ISI BUI FWI
0.8463280 0.7674765 0.9408469 0.9071703
#Comunalidad con rotación
datos_num_cr$communality
Temperature RH Ws Rain FFMC DMC
0.7033805 0.6945871 0.4501949 0.2934522 0.7822721 0.9156469
DC ISI BUI FWI
0.8463280 0.7674765 0.9408469 0.9071703
#Cargas sin rotación
datos_num_sr$loadings
Loadings:
PC1 PC2
Temperature 0.714 -0.441
RH -0.662 0.506
Ws 0.664
Rain -0.470 0.270
FFMC 0.834 -0.294
DMC 0.895 0.339
DC 0.787 0.477
ISI 0.871
BUI 0.884 0.399
FWI 0.941 0.148
PC1 PC2
SS loadings 5.717 1.584
Proportion Var 0.572 0.158
Cumulative Var 0.572 0.730
#Cargas con rotación
datos_num_cr$loadings
Loadings:
RC1 RC2
Temperature 0.327 0.772
RH -0.247 -0.796
Ws 0.308 -0.596
Rain -0.227 -0.492
FFMC 0.510 0.723
DMC 0.926 0.242
DC 0.918
ISI 0.653 0.584
BUI 0.952 0.187
FWI 0.853 0.424
RC1 RC2
SS loadings 4.332 2.969
Proportion Var 0.433 0.297
Cumulative Var 0.433 0.730
#Sin rotación
loadings=datos_num_sr$loadings
par(mfcol=c(1,2))
plot(loadings,xlim=c(-1,1),ylim=c(-1,1),xlab='Factor1',ylab='Factor2')
text(loadings,row.names(loadings),pos=c(1,2,2,2,1))
abline(v=0,h=0,lty=2)
#Con rotacion
rot.loadings=datos_num_cr$loadings
plot(rot.loadings,xlim=c(-1,1),ylim=c(-1,1),xlab='Factor1 Rot',ylab='Factor2 Rot')
text(rot.loadings,row.names(loadings),pos=c(1,2,2,2,1))
abline(v=0,h=0,lty=2)
par(mfcol=c(1,1))
#Scores sin rotación
head(datos_num_sr$scores,5)
PC1 PC2
[1,] -0.8574473 -0.1218012
[2,] -0.9249063 -0.6303331
[3,] -1.9716950 2.5786272
[4,] -1.8005759 0.7616863
[5,] -1.0634062 0.2191898
#Scores con rotación
head(datos_num_cr$scores,5)
RC1 RC2
[1,] -0.7696475 -0.39710722
[2,] -1.1190657 -0.02152464
[3,] -0.1147497 -3.24403020
[4,] -1.0271441 -1.66349473
[5,] -0.7401618 -0.79437855
plot_correlation(datos_num_cr$scores)
Conclusiones:
x=c(1,2,3,3)
m <- matrix(x, ncol = 2)
#m
layout(m)
nf <- layout(m)
#layout.show(nf)
load <- datos_num_sr$loadings[,1:2]
plot(load, pch=16, xlim=c(-1,1), ylim=c(-1,1),col="chartreuse3",
xlab="Factor 1",ylab="Factor 2")
abline(h=0,lty=3,col="brown1",lwd=2)
abline(v=0,lty=3,col="burlywood1",lwd=2)
text(load,pos=1,labels=names(datos),cex=1.1)#agrega los nombres a las variables
# Grafica de circulo de correlaciones
library(ade4)
s.corcircle(load,grid=FALSE)
fa.diagram(datos_num_sr)
Conclusiones: