Clase Discriminación y Clasificación

library(readxl)
datos = read_excel("discriminantes_051923.xlsx")
head(datos)

datos$g = as.factor(datos$g)
datos

library(rgl)
plot3d(datos[,-4], type='s',
       col=as.numeric(datos$g))

library(ggplot2)
ggplot(datos)+
  aes(DE, fill=g)+
  geom_density(alpha=0.5)

ggplot(datos)+
  aes(DL, fill=g)+
  geom_density(alpha=0.5)

ggplot(datos)+
  aes(BIO, fill=g)+
  geom_density(alpha=0.5)

ggplot(datos)+
  aes(DE, DL, color=g)+
  geom_density_2d()

ggplot(datos)+
  aes(DE, BIO, color=g)+
  geom_density_2d()

ggplot(datos)+
  aes(DL, BIO, color=g)+
  geom_density_2d()

set.seed(123)
muest = sample(x = c(T, F), size = nrow(datos),
               replace = T, prob = c(0.8,0.2))
X_train = datos[muest, -4]
y_train = datos[muest, 4]
X_test = datos[!muest, -4]
y_test = datos[!muest, 4]

C12 = 1
C21 = 2

cont = table(datos$g)
P1 = cont['enferma']/sum(cont)
P2 = cont['sana']/sum(cont)

mu1 = colMeans(X_train[y_train$g=='enferma',])
mu2 = colMeans(X_train[y_train$g=='sana',])
mu_d = mu1 - mu2
mu_s = mu1 + mu2


S1 = cov(X_train[y_train$g=='enferma',])
S2 = cov(X_train[y_train$g=='sana',])

n1 = nrow(X_train[y_train$g=='enferma',])
n2 = nrow(X_train[y_train$g=='sana',])

Sp = ((n1-1)/((n1-1)+(n2-1)))*S1 + ((n2-1)/((n1-1)+(n2-1)))*S2
Spi = solve(Sp)

X0 = unlist(X_test[1,])
model_izq = t(mu_d) %*% Spi %*% X0 - (1 / 2) * t(mu_d) %*% Spi %*% mu_s

model_der = log((C12/C21)*(P2/P1))

ifelse(model_izq >= model_der,
       'enferma', 'sana')

##      [,1]     
## [1,] "enferma"

part1 = t(mu_d) %*% Spi %*% t(X_test)
part2 = (1 / 2) * t(mu_d) %*% Spi %*% mu_s
mod_izq = part1 - part2[1,1]

y_pred = sapply(mod_izq, function(x0_i){
  ifelse(x0_i >= model_der,
         'enferma', 'sana')
})

Clase 18 de mayo - 2023

Clase Discriminación y Clasificación