library(glmtoolbox)
library(MASS)
data("quine")
View(quine)
library(ggplot2)
ggplot(data = quine,mapping = aes(x=Eth,Days))+geom_boxplot(mapping = aes(x=Eth,Days), col="green")
ggplot(data = quine,mapping = aes(x=Age,y=Days))+geom_boxplot(mapping = aes(x=Age,Days), col="#C710F0")
ggplot(data = quine,mapping = aes(x=Sex,y=Days))+geom_boxplot(mapping = aes(x=Sex,Days), col="#F05010")
ggplot(data = quine,mapping = aes(x=Lrn,y=Days))+geom_boxplot(mapping = aes(x=Lrn,y=Days), col="#58D68D")
#B) Poisson
mod<-glm(Days ~ 1+ Eth*Age + Sex + Lrn, family = poisson(link="log"),data = quine)
summary(mod)
##
## Call:
## glm(formula = Days ~ 1 + Eth * Age + Sex + Lrn, family = poisson(link = "log"),
## data = quine)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.5817 -2.8466 -0.8881 1.7952 8.2457
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.39776 0.08376 28.626 < 2e-16 ***
## EthN 0.13576 0.10041 1.352 0.176379
## AgeF1 0.11939 0.09509 1.255 0.209306
## AgeF2 0.74159 0.08561 8.662 < 2e-16 ***
## AgeF3 0.52651 0.09605 5.482 4.21e-08 ***
## SexM 0.16296 0.04274 3.813 0.000138 ***
## LrnSL 0.35793 0.05211 6.869 6.46e-12 ***
## EthN:AgeF1 -1.02887 0.13649 -7.538 4.78e-14 ***
## EthN:AgeF2 -1.23462 0.12828 -9.624 < 2e-16 ***
## EthN:AgeF3 -0.17669 0.12755 -1.385 0.165966
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2073.5 on 145 degrees of freedom
## Residual deviance: 1542.8 on 136 degrees of freedom
## AIC: 2151.3
##
## Number of Fisher Scoring iterations: 5
Este resultado nos pone a pensar ya que en el análisis descriptivo no se notaba dependencias significativas. El problema es que existe evidencia de sobre dispersión ya que:
# Estimación del parámetro de dispersión
phi = 1542.8/136
phi
## [1] 11.34412
Se dice que existe sobre dispersión.
#C) Binomial negativa
mod1<-overglm(Days ~ 1+ Eth*Age + Sex + Lrn, family =" nb1(log)",data = quine)
summary(mod1)
##
## Sample size: 146
## Family: Negative Binomial type I with log link
## *************************************************************
## Estimate Std.Error z-value Pr(>|z|)
## (Intercept) 2.53409 0.26142 9.69368 < 2.22e-16
## EthN 0.05698 0.34740 0.16402 0.8697159
## AgeF1 0.08732 0.33725 0.25892 0.7956991
## AgeF2 0.70638 0.32864 2.14940 0.0316030
## AgeF3 0.40050 0.33257 1.20428 0.2284819
## SexM 0.11275 0.15912 0.70863 0.4785551
## LrnSL 0.22754 0.18186 1.25116 0.2108767
## EthN:AgeF1 -0.89843 0.44391 -2.02391 0.0429792
## EthN:AgeF2 -1.18060 0.44722 -2.63985 0.0082942
## EthN:AgeF3 -0.10128 0.46350 -0.21850 0.8270373
##
## phi 0.72470 0.09346
## *************************************************************
## -2*log-likelihood: 1082.688
## AIC: 1104.688
## BIC: 1137.508
mod2<-overglm(Days ~ 1+ Eth*Age + Sex + Lrn, family =" nb2(log)",data = quine)
summary(mod2)
##
## Sample size: 146
## Family: Negative Binomial type II with log link
## *************************************************************
## Estimate Std.Error z-value Pr(>|z|)
## (Intercept) 2.56503 0.25752 9.96046 < 2e-16
## EthN -0.08293 0.31219 -0.26564 0.790519
## AgeF1 0.14901 0.27278 0.54627 0.584881
## AgeF2 0.64724 0.25419 2.54625 0.010889
## AgeF3 0.33543 0.29845 1.12389 0.261058
## SexM 0.12328 0.13553 0.90962 0.363021
## LrnSL 0.20170 0.15759 1.27987 0.200591
## EthN:AgeF1 -0.59411 0.39844 -1.49110 0.135936
## EthN:AgeF2 -0.93940 0.39575 -2.37375 0.017609
## EthN:AgeF3 -0.02794 0.40559 -0.06888 0.945083
##
## phi 11.72217 1.73654
## *************************************************************
## -2*log-likelihood: 1086.893
## AIC: 1108.893
## BIC: 1141.713
envelope(mod2)
##
|
| | 0%
|
|+ | 4%
|
|++ | 8%
|
|+++ | 12%
|
|++++ | 16%
|
|+++++ | 20%
|
|++++++ | 24%
|
|+++++++ | 28%
|
|++++++++ | 32%
|
|+++++++++ | 36%
|
|++++++++++ | 40%
|
|+++++++++++ | 44%
|
|++++++++++++ | 48%
|
|+++++++++++++ | 52%
|
|++++++++++++++ | 56%
|
|+++++++++++++++ | 60%
|
|++++++++++++++++ | 64%
|
|+++++++++++++++++ | 68%
|
|++++++++++++++++++ | 72%
|
|+++++++++++++++++++ | 76%
|
|++++++++++++++++++++ | 80%
|
|+++++++++++++++++++++ | 84%
|
|++++++++++++++++++++++ | 88%
|
|+++++++++++++++++++++++ | 92%
|
|++++++++++++++++++++++++ | 96%
|
|+++++++++++++++++++++++++| 100%
mod3<-overglm(Days ~ 1+ Eth*Age + Sex + Lrn, family =" nbf(log)",data = quine)
summary(mod3)
##
## Sample size: 146
## Family: Negative Binomial with log link
## *************************************************************
## Estimate Std.Error z-value Pr(>|z|)
## (Intercept) 2.51974 0.26714 9.43225 < 2e-16
## EthN 0.01963 0.34349 0.05716 0.954416
## AgeF1 0.11121 0.32957 0.33743 0.735794
## AgeF2 0.71547 0.31223 2.29152 0.021933
## AgeF3 0.40551 0.32826 1.23533 0.216706
## SexM 0.12824 0.15400 0.83272 0.405005
## LrnSL 0.24989 0.18042 1.38510 0.166023
## EthN:AgeF1 -0.87691 0.44196 -1.98413 0.047242
## EthN:AgeF2 -1.17630 0.43890 -2.68015 0.007359
## EthN:AgeF3 -0.08468 0.45143 -0.18757 0.851211
##
## phi 1.42088 1.20055
## tau -0.24657 0.30491 -0.80866 0.418708
## *************************************************************
## -2*log-likelihood: 1082.048
## AIC: 1106.048
## BIC: 1141.851
# Como tou no es significativamente diferente de cero, etnonces el mejor modelo de conteo es el binomial negativo 1.
summary(mod1)
##
## Sample size: 146
## Family: Negative Binomial type I with log link
## *************************************************************
## Estimate Std.Error z-value Pr(>|z|)
## (Intercept) 2.53409 0.26142 9.69368 < 2.22e-16
## EthN 0.05698 0.34740 0.16402 0.8697159
## AgeF1 0.08732 0.33725 0.25892 0.7956991
## AgeF2 0.70638 0.32864 2.14940 0.0316030
## AgeF3 0.40050 0.33257 1.20428 0.2284819
## SexM 0.11275 0.15912 0.70863 0.4785551
## LrnSL 0.22754 0.18186 1.25116 0.2108767
## EthN:AgeF1 -0.89843 0.44391 -2.02391 0.0429792
## EthN:AgeF2 -1.18060 0.44722 -2.63985 0.0082942
## EthN:AgeF3 -0.10128 0.46350 -0.21850 0.8270373
##
## phi 0.72470 0.09346
## *************************************************************
## -2*log-likelihood: 1082.688
## AIC: 1104.688
## BIC: 1137.508
stepCriterion(mod1,criterion="aic",direction="forward")
##
## Family: Negative Binomial I
## Link function: log
##
## Initial model:
## ~ 1
##
##
## Step 0 :
## df AIC BIC P(Chisq>)(*)
## + Eth 1 1112.6 1121.6 0.0005027
## + Age 3 1117.8 1132.7 0.0109613
## <none> 1122.3 1128.2
## + Sex 1 1123.3 1132.2 0.3192143
## + Lrn 1 1124.0 1132.9 0.5911313
##
## Step 1 : + Eth
##
## df AIC BIC P(Chisq>)(*)
## + Age 3 1107.8 1125.7 0.008401
## <none> 1112.6 1121.6
## + Sex 1 1112.9 1124.8 0.182067
## + Lrn 1 1114.6 1126.5 0.810363
##
## Step 2 : + Age
##
## df AIC BIC P(Chisq>)(*)
## + Age:Eth 3 1102.6 1129.5 0.0082808
## + Lrn 1 1107.4 1128.3 0.1176848
## <none> 1107.8 1125.7
## + Sex 1 1109.7 1130.5 0.6999193
## - Eth 1 1117.8 1132.7 0.0004077
##
## Step 3 : + Age:Eth
##
## df AIC BIC P(Chisq>)(*)
## <none> 1102.6 1129.5
## + Lrn 1 1103.2 1133.0 0.2251
## + Sex 1 1104.2 1134.1 0.5223
##
##
## Final model:
## ~ Eth + Age + Eth:Age
##
## ****************************************************************************
## (*) p-values of the Wald test
stepCriterion(mod1,criterion="bic",direction="forward")
##
## Family: Negative Binomial I
## Link function: log
##
## Initial model:
## ~ 1
##
##
## Step 0 :
## df AIC BIC P(Chisq>)(*)
## + Eth 1 1112.6 1121.6 0.0005027
## <none> 1122.3 1128.2
## + Sex 1 1123.3 1132.2 0.3192143
## + Age 3 1117.8 1132.7 0.0109613
## + Lrn 1 1124.0 1132.9 0.5911313
##
## Step 1 : + Eth
##
## df AIC BIC P(Chisq>)(*)
## <none> 1112.6 1121.6
## + Sex 1 1112.9 1124.8 0.182067
## + Age 3 1107.8 1125.7 0.008401
## + Lrn 1 1114.6 1126.5 0.810363
##
##
## Final model:
## ~ Eth
##
## ****************************************************************************
## (*) p-values of the Wald test
stepCriterion(mod1,criterion="aic",direction="backward")
##
## Family: Negative Binomial I
## Link function: log
##
## Initial model:
## ~ 1 + Eth * Age + Sex + Lrn
##
##
## Step 0 :
## df AIC BIC P(Chisq>)(*)
## - Sex 1 1103.2 1133.0 0.47856
## - Lrn 1 1104.2 1134.1 0.21088
## <none> 1104.7 1137.5
## - Age:Eth 3 1109.2 1133.0 0.01185
##
## Step 1 : - Sex
##
## df AIC BIC P(Chisq>)(*)
## - Lrn 1 1102.6 1129.5 0.22515
## <none> 1103.2 1133.0
## - Age:Eth 3 1107.4 1128.3 0.01342
##
## Step 2 : - Lrn
##
## df AIC BIC P(Chisq>)(*)
## <none> 1102.6 1129.5
## + Sex 1 1104.2 1134.1 0.522257
## - Age:Eth 3 1107.8 1125.7 0.008281
##
##
## Final model:
## ~ Eth + Age + Eth:Age
##
## ****************************************************************************
## (*) p-values of the Wald test
modf1<-overglm(Days ~ 1+ Eth*Age, family =" nb1(log)",data = quine)
summary(modf1)
##
## Sample size: 146
## Family: Negative Binomial type I with log link
## *************************************************************
## Estimate Std.Error z-value Pr(>|z|)
## (Intercept) 2.62801 0.24945 10.53516 < 2.22e-16
## EthN 0.13110 0.34550 0.37944 0.7043597
## AgeF1 0.17838 0.31950 0.55829 0.5766463
## AgeF2 0.82673 0.31724 2.60601 0.0091604
## AgeF3 0.37084 0.33375 1.11115 0.2665030
## EthN:AgeF1 -0.99157 0.43939 -2.25673 0.0240252
## EthN:AgeF2 -1.23924 0.44655 -2.77512 0.0055181
## EthN:AgeF3 -0.17626 0.46361 -0.38020 0.7037984
##
## phi 0.73671 0.09447
## *************************************************************
## -2*log-likelihood: 1084.638
## AIC: 1102.638
## BIC: 1129.49
modf2<-overglm(Days ~ 1+ Eth, family =" nb1(log)",data = quine)
summary(modf2)
##
## Sample size: 146
## Family: Negative Binomial type I with log link
## *************************************************************
## Estimate Std.Error z-value Pr(>|z|)
## (Intercept) 3.05550 0.11492 26.58775 < 2.22e-16
## EthN -0.55556 0.15968 -3.47931 0.00050271
##
## phi 0.86418 0.10668
## *************************************************************
## -2*log-likelihood: 1106.634
## AIC: 1112.634
## BIC: 1121.585
chi<-1106-10.84
pchisq(chi,1,lower.tail = T)
## [1] 1
#EL MEJOR MODELO ES EL BN1 HACIENDO USO DE ETH
envelope(modf2)
##
|
| | 0%
|
|+ | 4%
|
|++ | 8%
|
|+++ | 12%
|
|++++ | 16%
|
|+++++ | 20%
|
|++++++ | 24%
|
|+++++++ | 28%
|
|++++++++ | 32%
|
|+++++++++ | 36%
|
|++++++++++ | 40%
|
|+++++++++++ | 44%
|
|++++++++++++ | 48%
|
|+++++++++++++ | 52%
|
|++++++++++++++ | 56%
|
|+++++++++++++++ | 60%
|
|++++++++++++++++ | 64%
|
|+++++++++++++++++ | 68%
|
|++++++++++++++++++ | 72%
|
|+++++++++++++++++++ | 76%
|
|++++++++++++++++++++ | 80%
|
|+++++++++++++++++++++ | 84%
|
|++++++++++++++++++++++ | 88%
|
|+++++++++++++++++++++++ | 92%
|
|++++++++++++++++++++++++ | 96%
|
|+++++++++++++++++++++++++| 100%
# SEGUNDO EJERCICIO DE MODELOS ESTADÍSTICOS
data("orobanche")
View(orobanche)
data("orobanche")
ggplot(data = orobanche,mapping=aes(x=specie,y= germinated/seeds))+geom_boxplot(mapping=aes(x=specie,y=germinated/seeds), col=3)
ggplot(data = orobanche,mapping=aes(x=extract,y= germinated/seeds))+geom_boxplot(mapping=aes(x=extract,y=germinated/seeds), col=2)
Como se evidencia dependencia de la proporción con respecto al extracto
y tambien a especies, se procede a ajustar un modelo lineal
generalizado.
mod<-glm(germinated/seeds~specie*extract,weights= seeds,family = binomial(link="logit"),data=orobanche)
summary(mod)
##
## Call:
## glm(formula = germinated/seeds ~ specie * extract, family = binomial(link = "logit"),
## data = orobanche, weights = seeds)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.01617 -1.24398 0.05995 0.84695 2.12123
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4122 0.1842 -2.238 0.0252 *
## specieAegyptiaca 75 -0.1459 0.2232 -0.654 0.5132
## extractCucumber 0.5401 0.2498 2.162 0.0306 *
## specieAegyptiaca 75:extractCucumber 0.7781 0.3064 2.539 0.0111 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 98.719 on 20 degrees of freedom
## Residual deviance: 33.278 on 17 degrees of freedom
## AIC: 117.87
##
## Number of Fisher Scoring iterations: 4
phi=98.719/20
phi
## [1] 4.93595
mod1<-overglm(cbind(germinated,seeds-germinated)~specie*extract,family ="bb(logit)" ,data=orobanche)
summary(mod1)
##
## Sample size: 21
## Family: Beta-Binomial with logit link
## *************************************************************
## Estimate Std.Error z-value Pr(>|z|)
## (Intercept) -0.44456 0.21826 -2.03688 0.041662
## specieAegyptiaca 75 -0.09739 0.27367 -0.35587 0.721937
## extractCucumber 0.52214 0.29682 1.75911 0.078559
## specieAegyptiaca 75:extractCucumber 0.79792 0.37795 2.11116 0.034758
##
## phi 0.01252 0.01164
## *************************************************************
## -2*log-likelihood: 107.534
## AIC: 117.534
## BIC: 122.756
envelope(mod)
##
|
| | 0%
|
|+ | 4%
|
|++ | 8%
|
|+++ | 12%
|
|++++ | 16%
|
|+++++ | 20%
|
|++++++ | 24%
|
|+++++++ | 28%
|
|++++++++ | 32%
|
|+++++++++ | 36%
|
|++++++++++ | 40%
|
|+++++++++++ | 44%
|
|++++++++++++ | 48%
|
|+++++++++++++ | 52%
|
|++++++++++++++ | 56%
|
|+++++++++++++++ | 60%
|
|++++++++++++++++ | 64%
|
|+++++++++++++++++ | 68%
|
|++++++++++++++++++ | 72%
|
|+++++++++++++++++++ | 76%
|
|++++++++++++++++++++ | 80%
|
|+++++++++++++++++++++ | 84%
|
|++++++++++++++++++++++ | 88%
|
|+++++++++++++++++++++++ | 92%
|
|++++++++++++++++++++++++ | 96%
|
|+++++++++++++++++++++++++| 100%
data("cars")
View(cars)
library("survival")
library("survminer")
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
# help(lung)
head(lung, 10)
## inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1 3 306 2 74 1 1 90 100 1175 NA
## 2 3 455 2 68 1 0 90 90 1225 15
## 3 3 1010 1 56 1 0 90 90 NA 15
## 4 5 210 2 57 1 1 90 60 1150 11
## 5 1 883 2 60 1 0 100 90 NA 0
## 6 12 1022 1 74 1 1 50 80 513 0
## 7 7 310 2 68 2 2 70 60 384 10
## 8 11 361 2 71 2 2 60 80 538 1
## 9 1 218 2 53 1 1 70 80 825 16
## 10 7 166 2 61 1 2 70 70 271 34
with(lung, Surv(time, status))[1:60]
## [1] 306 455 1010+ 210 883 1022+ 310 361 218 166 170 654
## [13] 728 71 567 144 613 707 61 88 301 81 624 371
## [25] 394 520 574 118 390 12 473 26 533 107 53 122
## [37] 814 965+ 93 731 460 153 433 145 583 95 303 519
## [49] 643 765 735 189 53 246 689 65 5 132 687 345
Estimador Kaplan-Meier
fit <- survfit(Surv(time, status) ~ sex, data = lung)
print(fit)
## Call: survfit(formula = Surv(time, status) ~ sex, data = lung)
##
## n events median 0.95LCL 0.95UCL
## sex=1 138 112 270 212 310
## sex=2 90 53 426 348 550
summary(fit)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL 0.95UCL
## sex=1 138 138 138 112 326.0841 22.91156 270 212 310
## sex=2 90 90 90 53 460.6473 34.68985 426 348 550
d <- data.frame(time = fit$time,
n.risk = fit$n.risk,
n.event = fit$n.event,
n.censor = fit$n.censor,
surv = fit$surv,
upper = fit$upper,
lower = fit$lower
)
head(d)
## time n.risk n.event n.censor surv upper lower
## 1 11 138 3 0 0.9782609 1.0000000 0.9542301
## 2 12 135 1 0 0.9710145 0.9994124 0.9434235
## 3 13 134 2 0 0.9565217 0.9911586 0.9230952
## 4 15 132 1 0 0.9492754 0.9866017 0.9133612
## 5 26 131 1 0 0.9420290 0.9818365 0.9038355
## 6 30 130 1 0 0.9347826 0.9768989 0.8944820
ggsurvplot(fit,
pval = TRUE, conf.int = TRUE,
risk.table = TRUE, # Agregamos la tabla de riesgo
risk.table.col = "strata", # Cambiamos el color de la tabla por grupos
linetype = "strata", # Cambiamos el tipo de línea por grupos
surv.median.line = "hv", # Líneas para la mediana
ggtheme = theme_bw(), # Elegimos el tema de ggplot2
palette = c("#E7B800", "#2E9FDF"))
surv_diff <- survdiff(Surv(time, status) ~ sex, data = lung)
surv_diff
## Call:
## survdiff(formula = Surv(time, status) ~ sex, data = lung)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=1 138 112 91.6 4.55 10.3
## sex=2 90 53 73.4 5.68 10.3
##
## Chisq= 10.3 on 1 degrees of freedom, p= 0.001
alpha <- 0.05
qchisq(p = 1-alpha, df = 1)
## [1] 3.841459
ggsurvplot(fit,
conf.int = TRUE,
risk.table.col = "strata", # Colores por grupo
ggtheme = theme_bw(), # Tema de ggplot2
palette = c("#E7B800", "#2E9FDF"),
fun = "event") # Seleccionamos ’event’
fit2 <- survfit( Surv(time, status) ~ sex + ph.ecog, data = lung )
ggsurv <- ggsurvplot(fit2, fun = "event", conf.int = TRUE,
ggtheme = theme_bw())
ggsurv$plot +theme_bw() +
theme (legend.position = "right")+
facet_grid(ph.ecog ~ 1)
# MODELO DE COX PARA ANÁLISIS DE SUPERVIVENCIA.
res.cox<-coxph(Surv(time,status)~sex,data=lung)
res.cox
## Call:
## coxph(formula = Surv(time, status) ~ sex, data = lung)
##
## coef exp(coef) se(coef) z p
## sex -0.5310 0.5880 0.1672 -3.176 0.00149
##
## Likelihood ratio test=10.63 on 1 df, p=0.001111
## n= 228, number of events= 165
summary(res.cox)
## Call:
## coxph(formula = Surv(time, status) ~ sex, data = lung)
##
## n= 228, number of events= 165
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sex -0.5310 0.5880 0.1672 -3.176 0.00149 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sex 0.588 1.701 0.4237 0.816
##
## Concordance= 0.579 (se = 0.021 )
## Likelihood ratio test= 10.63 on 1 df, p=0.001
## Wald test = 10.09 on 1 df, p=0.001
## Score (logrank) test = 10.33 on 1 df, p=0.001
dhyper(2,7,25,5)
## [1] 0.2398498
380/1771
## [1] 0.214568
#Binomial{
dbinom(3,5,1/7)
## [1] 0.02141965
360/16807
## [1] 0.02141965