Creando datos de tres localidades

library(faux)
## 
## ************
## Welcome to faux. For support and examples visit:
## https://debruine.github.io/faux/
## - Get and set global package options with: faux_options()
## ************
set.seed(123)
loc1 = rnorm_multi(
  n = 50, vars = 3,
  mu = c(20, 30, 2),
  sd = c(2, 3, 0.2),
  r = 0.7,
  varnames = c('A','Ar','MO'))

loc2 = rnorm_multi(
  n = 50, vars = 3,
  mu = c(23, 33, 2.4),
  sd = c(2, 3, 0.2),
  r = 0.8,
  varnames = c('A','Ar','MO'))

loc3 = rnorm_multi(
  n = 50, vars = 3,
  mu = c(25, 35, 2.7),
  sd = c(2, 3, 0.2),
  r = 0.76,
  varnames = c('A','Ar','MO'))

Haciendo la matriz con las tres localidades

library(rgl)
datos = rbind(loc1, loc2, loc3)
datos$loc = gl(3, 50, 150)

plot3d(datos$A,
            datos$Ar,
            datos$MO,
            col=datos$loc, type = 's')

revi

library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(lattice)

revisión de métodos

library(caret)

metodos = c('lda2', 'stepLDA')
model = lapply(metodos, function(mi){
  train(x = datos[,-c(3,4)],
        y = datos[, 4],
        method = mi)
})
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.68;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.62667;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.64667;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.03
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.66;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.05
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.68;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.05
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.67333;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.03
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.62667;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.68667;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.64;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.04
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.68667;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.64667;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.72667;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.05
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.61333;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.05
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.70667;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.04
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.64;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.05
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.63333;  in: "Ar";  variables (1): Ar 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.68;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.69333;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.05
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.56;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.73333;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.66667;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.68;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.72;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.08
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.73333;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.68;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
##  `stepwise classification', using 10-fold cross-validated correctness rate of method lda'.
## 150 observations of 2 variables in 3 classes; direction: both
## stop criterion: improvement less than 5%.
## correctness rate: 0.68667;  in: "A";  variables (1): A 
## 
##  hr.elapsed min.elapsed sec.elapsed 
##        0.00        0.00        0.06
model[[1]]$method
## [1] "lda2"
model[[2]]$method
## [1] "stepLDA"

Segunda parte

y_pred_m1 = predict(model[[1]],
                    datos[,-4])
y_pred_m2 = predict(model[[2]],
                   datos[,-4])

Ahora la matriz de confusión

confusionMatrix(data = y_pred_m1, reference = datos$loc)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1 39 10  4
##          2 10 29 11
##          3  1 11 35
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6867          
##                  95% CI : (0.6059, 0.7598)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.53            
##                                           
##  Mcnemar's Test P-Value : 0.6149          
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.7800   0.5800   0.7000
## Specificity            0.8600   0.7900   0.8800
## Pos Pred Value         0.7358   0.5800   0.7447
## Neg Pred Value         0.8866   0.7900   0.8544
## Prevalence             0.3333   0.3333   0.3333
## Detection Rate         0.2600   0.1933   0.2333
## Detection Prevalence   0.3533   0.3333   0.3133
## Balanced Accuracy      0.8200   0.6850   0.7900

otra matriz

confusionMatrix(data = y_pred_m2, reference = datos$loc)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1 39 10  4
##          2 10 28 11
##          3  1 12 35
## 
## Overall Statistics
##                                          
##                Accuracy : 0.68           
##                  95% CI : (0.599, 0.7537)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.52           
##                                          
##  Mcnemar's Test P-Value : 0.6055         
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.7800   0.5600   0.7000
## Specificity            0.8600   0.7900   0.8700
## Pos Pred Value         0.7358   0.5714   0.7292
## Neg Pred Value         0.8866   0.7822   0.8529
## Prevalence             0.3333   0.3333   0.3333
## Detection Rate         0.2600   0.1867   0.2333
## Detection Prevalence   0.3533   0.3267   0.3200
## Balanced Accuracy      0.8200   0.6750   0.7850

Mcnemar’s la prueba, es útil cuando se tiene unna opinioon previa el experimento, contexto de ciencia y tecnologia de alimentos. Catas

Componentes principales

pca1 = prcomp(datos [,-4])
cumsum(pca1$sdev/sum(pca1$sdev))
## [1] 0.7633397 0.9723028 1.0000000
biplot(pca1)

#Analisis de correspondencia #Tablas de contigencia

set.seed(123)

n_fem = 60
n_mas = 40
genero = rep(c('F','M'), c(n_fem, n_mas))
acuerdo = c(
  sample(c('SI','NO'), n_fem, T, c(0.8, 0.2)),
  sample(c('SI','NO'), n_mas, T, c(0.2, 0.8))
)

tbl2 = table(genero, acuerdo)
addmargins(tbl2)
##       acuerdo
## genero  NO  SI Sum
##    F    12  48  60
##    M    34   6  40
##    Sum  46  54 100

Se llama tabla de conteos, se pueden tener diferentes muestreos, puedo tener valores fijo de algunos estratos, se llama muestreo estratitifcado. Puede ser aleatorio, cuando tomó una muestra de tamaño fijo, ese es muestreo aleatorio.

En el ejemplo el genero y el acuerdo es al azar Prueba de hipotesis de independencia a través de estadisticos \(\chi^2\) El genero es independiente de la decisión

Para las tablas de contigencia se sulen hacer tres graficos_ Gráfico de mosaico: es para un único muestreo

#mosaicplot(tbl, cex= 2)

La caja esta en función de la frecuencia

#Gráfico de asociación

assocplot(tbl2)

#Consultar la utilidad: en que casos se usand cada gráficos: Mosaico, Asociación y Tamiz #Analays categorical analays uses in R

barplot(tbl2)

barplot(t(tbl2), legend.text = T)

tbl_prop = prop.table(tbl2, margin = 2)
barplot(tbl_prop, legend.text = T)

# Como exportar a Excel

#library(writexl)
#datos2 = data.frame(genero, acuerdo)
#writexl::write_xlsx(datos2, 'datos_052523.xlsx')

#Prueba de Chi cuadrado \[\chi^2 = \sum\ left(\frac{(O_i- E_i)^2}{E_i}\right)\]

ct =chisq.test(tbl2, correct = F)
ifelse(ct$p.value < 0.005, 'Rechaza Ho', 'No Rechaza Ho' )
## [1] "Rechaza Ho"