library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(pander)
library(readxl)
##Hipotesis nula para modelos balanceados
\[ H_0: \mu_1 = \mu_2 =.... =\mu_n \\\mu_a = H_0 \text {es falsa}\] modelo de diseño \[ Y_{ij}=\\m\mu_i + \epsilon_{ij}\\i = 1...9\\j=1...r\]
P-valor del estudio
set.seed(123)
malato = rnorm(120, 10, 1.2)
vino = gl(3,40,120,c('v1','v2','v3'))
datos = data.frame(Malato = malato, Vino = vino)
mod1 = aov(datos$Malato~datos$Vino)
summary(mod1)
## Df Sum Sq Mean Sq F value Pr(>F)
## datos$Vino 2 0.08 0.0413 0.035 0.965
## Residuals 117 137.01 1.1710
EJ1 <- read_excel("C:\\Users\\yarvis\\Downloads\\EJ1 (1).xlsx")
dfe1 = read_excel("C:\\Users\\yarvis\\Downloads\\EJ1 (1).xlsx", sheet = "isolation")
dfe1=data.frame(EJ1)
df1=dfe1[,-1]
summary(df1)
## INC PROF.RAIZ ALTURA
## Length:50 Length:50 Length:50
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
df1$ALTURA=as.numeric(df1$ALTURA)
df1$PROF.RAIZ = as.numeric(df1$PROF.RAIZ)
df1$INC=as.numeric(df1$INC)
model1 = glm(df1$INC ~ df1$PROF.RAIZ + df1$ALTURA, family = binomial)
summary(model1)
##
## Call:
## glm(formula = df1$INC ~ df1$PROF.RAIZ + df1$ALTURA, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8189 -0.3089 0.0490 0.3635 2.1192
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.6417 2.9218 2.273 0.02302 *
## df1$PROF.RAIZ 0.5807 0.2478 2.344 0.01909 *
## df1$ALTURA -1.3719 0.4769 -2.877 0.00401 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 68.029 on 49 degrees of freedom
## Residual deviance: 28.402 on 47 degrees of freedom
## AIC: 34.402
##
## Number of Fisher Scoring iterations: 6
model2 = glm(df1$INC ~ df1$PROF.RAIZ + df1$ALTURA, binomial)
summary(model2)
##
## Call:
## glm(formula = df1$INC ~ df1$PROF.RAIZ + df1$ALTURA, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8189 -0.3089 0.0490 0.3635 2.1192
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.6417 2.9218 2.273 0.02302 *
## df1$PROF.RAIZ 0.5807 0.2478 2.344 0.01909 *
## df1$ALTURA -1.3719 0.4769 -2.877 0.00401 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 68.029 on 49 degrees of freedom
## Residual deviance: 28.402 on 47 degrees of freedom
## AIC: 34.402
##
## Number of Fisher Scoring iterations: 6
Comparacion de modelos
\[H_0: El modelo mas pequeño es el mejor \]
anova(model1, model2, test= 'Chi')
## Analysis of Deviance Table
##
## Model 1: df1$INC ~ df1$PROF.RAIZ + df1$ALTURA
## Model 2: df1$INC ~ df1$PROF.RAIZ + df1$ALTURA
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 47 28.402
## 2 47 28.402 0 0
Modelo \[ln(\frac{p_i}{1-p_i}= \beta_0+\beta_1X_1+\beta_2X_2+\epsilon_i\\ln(\frac{p_i}{1-p_i}=6.64 +0.58 PR-1.37A\\ln(\frac{p_i}{1-p_i}=exp(6.64 +0.58 PR-1.37A)\\ln(\frac{p_i}{1-p_i}=exp(6.64) *exp(0.58 PR)*exp(-1.37A)\] Probabilidades estimadas
num = 765.1*exp(0.58*df1$PROF.RAIZ) * exp(-1.37*df1$ALTURA)
den = 1 + num
pi=num/den
plot(df1$INC, pi)
pi_ = ifelse(pi>0.5,1, 0)
table(df1$INC, pi_)
## pi_
## 0 1
## 0 17 4
## 1 3 26
(porcentaje_clasi = 100 * (17+26)/(17+26+4+3))
## [1] 86
pi_ = ifelse(pi>0.5, 1, 0)
tb = table(df1$INC, pi_) # matriz de confusion
porcentaje_clasi = 100 * sum(diag(tb))/sum(tb)
porcentaje_clasi
## [1] 86
se clasifico el 86% de los datos
pi_2 = ifelse(pi>0.55,1, 0)
tb = table(df1$INC, pi_2) # matriz de confución
porcentaje_clasi2 = 100 * sum(diag(tb))/sum(tb)
porcentaje_clasi2
## [1] 84
El modelon no acerto en 7 datos
(porcentaje_clasi2 = 100 * (17+26)/(17+26+4+3))
## [1] 86
porcentaje = c()
cont =1
matrix = list()
for(i in seq(0.01, 0.99, 0.01)){
pi_ = ifelse(pi>i,1,0)
tb = table(dfe1$INC, pi_)
pr = 100*sum(diag(tb))/sum(tb)
porcentaje[cont] = pr
cont = cont +1
}
plot(seq(0.01, 0.99, 0.01), porcentaje, pch = 19 , cex = 0.8,ylab = 'Porcentaje de clasificación', xlab = 'Valor punto de corte')
abline(v = c(0.44, 0.5), col = c('red', 'blue'))
punto de mayor clasificacion (pi)
seq(0.01,0.99,0.01)[which.max(porcentaje)]
## [1] 0.43
punto de corte para mayor clasificación : 0.43