Graficar funcion

\[\beta + \beta X\]

#Su codominio estara siempre en tre 0 o 1 
curve(1/(1+exp(-x)),-5,5)
curve(pnorm(x), add = TRUE, col = "blue" )

# odd ratio = dividir para su complemento
# GLM dada una cierta transformacion se puede obtener linealidad

uu <- "https://raw.githubusercontent.com/vmoprojs/DataLectures/master/wells.dat"
datos <- read.csv(url(uu),sep="",dec=".",header=TRUE)
names(datos)[1]="Switch"
plot(jitter(datos$Switch)~datos$arsenic, main="Cambio VS contenido de arsenico")
dist1 <- datos$dist/100
ajuste2 <- glm(Switch~dist1,family=binomial(link="logit"), x = T, data = datos)
summary(ajuste2)
## 
## Call:
## glm(formula = Switch ~ dist1, family = binomial(link = "logit"), 
##     data = datos, x = T)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4406  -1.3058   0.9669   1.0308   1.6603  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.60596    0.06031  10.047  < 2e-16 ***
## dist1       -0.62188    0.09743  -6.383 1.74e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4118.1  on 3019  degrees of freedom
## Residual deviance: 4076.2  on 3018  degrees of freedom
## AIC: 4080.2
## 
## Number of Fisher Scoring iterations: 4
# install.packages("erer")
library(erer)
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

ea <- maBina(w = ajuste2, x.mean = TRUE, rev.dum = TRUE)
## Warning in f.xb * coef(w): Recycling array of length 1 in array-vector arithmetic is deprecated.
##   Use c() or as.vector() instead.
ea$out
##             effect error t.value p.value
## (Intercept)  0.148 0.014  10.404       0
## dist1       -0.152 0.024  -6.382       0
# es una derivada, esta derivada se tiene que dar un valor de x, 

#Ejemplo 2  
uu <- "https://raw.githubusercontent.com/vmoprojs/DataLectures/master/tabla15_7.csv"
datos <- read.csv(url(uu),sep=";",dec=".",header=TRUE)
table(datos$GRADE)
## 
##  0  1 
## 21 11
ajuste1 <- glm(GRADE~GPA+TUCE+PSI,family=binomial(link="logit"),x=T, data = datos)
summary(ajuste1)
## 
## Call:
## glm(formula = GRADE ~ GPA + TUCE + PSI, family = binomial(link = "logit"), 
##     data = datos, x = T)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9551  -0.6453  -0.2570   0.5888   2.0966  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -13.02135    4.93127  -2.641  0.00828 **
## GPA           2.82611    1.26293   2.238  0.02524 * 
## TUCE          0.09516    0.14155   0.672  0.50143   
## PSI           2.37869    1.06456   2.234  0.02545 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 41.183  on 31  degrees of freedom
## Residual deviance: 25.779  on 28  degrees of freedom
## AIC: 33.779
## 
## Number of Fisher Scoring iterations: 5
ea <- maBina(w = ajuste1, x.mean = TRUE, rev.dum = TRUE)
## Warning in f.xb * coef(w): Recycling array of length 1 in array-vector arithmetic is deprecated.
##   Use c() or as.vector() instead.
ea$out
##             effect error t.value p.value
## (Intercept) -2.460 0.818  -3.008   0.006
## GPA          0.534 0.237   2.252   0.032
## TUCE         0.018 0.026   0.685   0.499
## PSI          0.456 0.181   2.521   0.018
mean(datos$GPA)
## [1] 3.117188
mean(datos$TUCE)
## [1] 21.9375
mean(datos$PSI)
## [1] 0.4375
# ln(p/1-p) = 
#Ejemplo 3 
uu <- "https://raw.githubusercontent.com/vmoprojs/DataLectures/master/tabla15_7.csv"
datos <- read.csv(url(uu),sep=";",dec=".",header=TRUE)
table(datos$GRADE)
## 
##  0  1 
## 21 11
ajuste1 <- glm(GRADE~GPA+TUCE+PSI,family=binomial(link="probit"),x=T, data = datos)
summary(ajuste1)
## 
## Call:
## glm(formula = GRADE ~ GPA + TUCE + PSI, family = binomial(link = "probit"), 
##     data = datos, x = T)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9392  -0.6508  -0.2229   0.5934   2.0451  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -7.45231    2.57152  -2.898  0.00376 **
## GPA          1.62581    0.68973   2.357  0.01841 * 
## TUCE         0.05173    0.08119   0.637  0.52406   
## PSI          1.42633    0.58695   2.430  0.01510 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 41.183  on 31  degrees of freedom
## Residual deviance: 25.638  on 28  degrees of freedom
## AIC: 33.638
## 
## Number of Fisher Scoring iterations: 6
ea <- maBina(w = ajuste1, x.mean = TRUE, rev.dum = TRUE)
## Warning in f.xb * coef(w): Recycling array of length 1 in array-vector arithmetic is deprecated.
##   Use c() or as.vector() instead.
ea$out
##             effect error t.value p.value
## (Intercept) -2.445 0.765  -3.198   0.003
## GPA          0.533 0.227   2.353   0.026
## TUCE         0.017 0.026   0.645   0.524
## PSI          0.464 0.171   2.712   0.011
with(ajuste1, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))
## [1] 0.001404896
library(tibble)
library(ggplot2)
df <- tibble(
  x = rnorm(10000),
  y = rnorm(10000)
)
ggplot(df, aes(x, y)) +
  geom_hex() +
  coord_fixed()

ggplot(df, aes(x, y)) +
  geom_hex() +
  viridis::scale_fill_viridis() +
  coord_fixed()

# Ejemplo 3
rm(list=ls())
admisiones <- read.csv(file="https://stats.idre.ucla.edu/stat/data/binary.csv", 
                      header=T, sep=",", dec=".")
head(admisiones)
##   admit gre  gpa rank
## 1     0 380 3.61    3
## 2     1 660 3.67    3
## 3     1 800 4.00    1
## 4     1 640 3.19    4
## 5     0 520 2.93    4
## 6     1 760 3.00    2
# estudiantes aplicando a un posgrado 1 si es admitido 0 si no
aj.adm <- glm(admit~gre+gpa+factor(rank), 
             family = binomial(link="probit"),x=T, data=admisiones)           #modelo probit 
summary(aj.adm)
## 
## Call:
## glm(formula = admit ~ gre + gpa + factor(rank), family = binomial(link = "probit"), 
##     data = admisiones, x = T)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6163  -0.8710  -0.6389   1.1560   2.1035  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -2.386836   0.673946  -3.542 0.000398 ***
## gre            0.001376   0.000650   2.116 0.034329 *  
## gpa            0.477730   0.197197   2.423 0.015410 *  
## factor(rank)2 -0.415399   0.194977  -2.131 0.033130 *  
## factor(rank)3 -0.812138   0.208358  -3.898 9.71e-05 ***
## factor(rank)4 -0.935899   0.245272  -3.816 0.000136 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 499.98  on 399  degrees of freedom
## Residual deviance: 458.41  on 394  degrees of freedom
## AIC: 470.41
## 
## Number of Fisher Scoring iterations: 4

22-08-19

R Markdown