\[\beta + \beta X\]
#Su codominio estara siempre en tre 0 o 1
curve(1/(1+exp(-x)),-5,5)
curve(pnorm(x), add = TRUE, col = "blue" )
# odd ratio = dividir para su complemento
# GLM dada una cierta transformacion se puede obtener linealidad
uu <- "https://raw.githubusercontent.com/vmoprojs/DataLectures/master/wells.dat"
datos <- read.csv(url(uu),sep="",dec=".",header=TRUE)
names(datos)[1]="Switch"
plot(jitter(datos$Switch)~datos$arsenic, main="Cambio VS contenido de arsenico")
dist1 <- datos$dist/100
ajuste2 <- glm(Switch~dist1,family=binomial(link="logit"), x = T, data = datos)
summary(ajuste2)
##
## Call:
## glm(formula = Switch ~ dist1, family = binomial(link = "logit"),
## data = datos, x = T)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4406 -1.3058 0.9669 1.0308 1.6603
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.60596 0.06031 10.047 < 2e-16 ***
## dist1 -0.62188 0.09743 -6.383 1.74e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4118.1 on 3019 degrees of freedom
## Residual deviance: 4076.2 on 3018 degrees of freedom
## AIC: 4080.2
##
## Number of Fisher Scoring iterations: 4
# install.packages("erer")
library(erer)
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
ea <- maBina(w = ajuste2, x.mean = TRUE, rev.dum = TRUE)
## Warning in f.xb * coef(w): Recycling array of length 1 in array-vector arithmetic is deprecated.
## Use c() or as.vector() instead.
ea$out
## effect error t.value p.value
## (Intercept) 0.148 0.014 10.404 0
## dist1 -0.152 0.024 -6.382 0
# es una derivada, esta derivada se tiene que dar un valor de x,
#Ejemplo 2
uu <- "https://raw.githubusercontent.com/vmoprojs/DataLectures/master/tabla15_7.csv"
datos <- read.csv(url(uu),sep=";",dec=".",header=TRUE)
table(datos$GRADE)
##
## 0 1
## 21 11
ajuste1 <- glm(GRADE~GPA+TUCE+PSI,family=binomial(link="logit"),x=T, data = datos)
summary(ajuste1)
##
## Call:
## glm(formula = GRADE ~ GPA + TUCE + PSI, family = binomial(link = "logit"),
## data = datos, x = T)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9551 -0.6453 -0.2570 0.5888 2.0966
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -13.02135 4.93127 -2.641 0.00828 **
## GPA 2.82611 1.26293 2.238 0.02524 *
## TUCE 0.09516 0.14155 0.672 0.50143
## PSI 2.37869 1.06456 2.234 0.02545 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 41.183 on 31 degrees of freedom
## Residual deviance: 25.779 on 28 degrees of freedom
## AIC: 33.779
##
## Number of Fisher Scoring iterations: 5
ea <- maBina(w = ajuste1, x.mean = TRUE, rev.dum = TRUE)
## Warning in f.xb * coef(w): Recycling array of length 1 in array-vector arithmetic is deprecated.
## Use c() or as.vector() instead.
ea$out
## effect error t.value p.value
## (Intercept) -2.460 0.818 -3.008 0.006
## GPA 0.534 0.237 2.252 0.032
## TUCE 0.018 0.026 0.685 0.499
## PSI 0.456 0.181 2.521 0.018
mean(datos$GPA)
## [1] 3.117188
mean(datos$TUCE)
## [1] 21.9375
mean(datos$PSI)
## [1] 0.4375
# ln(p/1-p) =
#Ejemplo 3
uu <- "https://raw.githubusercontent.com/vmoprojs/DataLectures/master/tabla15_7.csv"
datos <- read.csv(url(uu),sep=";",dec=".",header=TRUE)
table(datos$GRADE)
##
## 0 1
## 21 11
ajuste1 <- glm(GRADE~GPA+TUCE+PSI,family=binomial(link="probit"),x=T, data = datos)
summary(ajuste1)
##
## Call:
## glm(formula = GRADE ~ GPA + TUCE + PSI, family = binomial(link = "probit"),
## data = datos, x = T)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9392 -0.6508 -0.2229 0.5934 2.0451
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.45231 2.57152 -2.898 0.00376 **
## GPA 1.62581 0.68973 2.357 0.01841 *
## TUCE 0.05173 0.08119 0.637 0.52406
## PSI 1.42633 0.58695 2.430 0.01510 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 41.183 on 31 degrees of freedom
## Residual deviance: 25.638 on 28 degrees of freedom
## AIC: 33.638
##
## Number of Fisher Scoring iterations: 6
ea <- maBina(w = ajuste1, x.mean = TRUE, rev.dum = TRUE)
## Warning in f.xb * coef(w): Recycling array of length 1 in array-vector arithmetic is deprecated.
## Use c() or as.vector() instead.
ea$out
## effect error t.value p.value
## (Intercept) -2.445 0.765 -3.198 0.003
## GPA 0.533 0.227 2.353 0.026
## TUCE 0.017 0.026 0.645 0.524
## PSI 0.464 0.171 2.712 0.011
with(ajuste1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))
## [1] 0.001404896
library(tibble)
library(ggplot2)
df <- tibble(
x = rnorm(10000),
y = rnorm(10000)
)
ggplot(df, aes(x, y)) +
geom_hex() +
coord_fixed()
ggplot(df, aes(x, y)) +
geom_hex() +
viridis::scale_fill_viridis() +
coord_fixed()
# Ejemplo 3
rm(list=ls())
admisiones <- read.csv(file="https://stats.idre.ucla.edu/stat/data/binary.csv",
header=T, sep=",", dec=".")
head(admisiones)
## admit gre gpa rank
## 1 0 380 3.61 3
## 2 1 660 3.67 3
## 3 1 800 4.00 1
## 4 1 640 3.19 4
## 5 0 520 2.93 4
## 6 1 760 3.00 2
# estudiantes aplicando a un posgrado 1 si es admitido 0 si no
aj.adm <- glm(admit~gre+gpa+factor(rank),
family = binomial(link="probit"),x=T, data=admisiones) #modelo probit
summary(aj.adm)
##
## Call:
## glm(formula = admit ~ gre + gpa + factor(rank), family = binomial(link = "probit"),
## data = admisiones, x = T)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6163 -0.8710 -0.6389 1.1560 2.1035
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.386836 0.673946 -3.542 0.000398 ***
## gre 0.001376 0.000650 2.116 0.034329 *
## gpa 0.477730 0.197197 2.423 0.015410 *
## factor(rank)2 -0.415399 0.194977 -2.131 0.033130 *
## factor(rank)3 -0.812138 0.208358 -3.898 9.71e-05 ***
## factor(rank)4 -0.935899 0.245272 -3.816 0.000136 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 499.98 on 399 degrees of freedom
## Residual deviance: 458.41 on 394 degrees of freedom
## AIC: 470.41
##
## Number of Fisher Scoring iterations: 4