## Stimulus Intensity Trial Yes
## 1 1 10 49 2
## 2 2 20 48 3
## 3 3 30 48 13
## 4 4 40 50 31
## 5 5 50 47 38
## 6 6 60 49 48
## Stimulus Intensity Trial Yes Prop
## 1 1 10 49 2 0.04081633
## 2 2 20 48 3 0.06250000
## 3 3 30 48 13 0.27083333
## 4 4 40 50 31 0.62000000
## 5 5 50 47 38 0.80851064
## 6 6 60 49 48 0.97959184
# use glm to fit a psychophysical function
summary(m0 <- glm(cbind(Yes, Trial - Yes) ~ Intensity, data = dta,
family = binomial(link = "probit")))##
## Call:
## glm(formula = cbind(Yes, Trial - Yes) ~ Intensity, family = binomial(link = "probit"),
## data = dta)
##
## Deviance Residuals:
## 1 2 3 4 5 6
## 1.0388 -0.7613 -0.3124 0.4486 -0.5979 0.7050
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.874139 0.286787 -10.02 <2e-16 ***
## Intensity 0.077472 0.007367 10.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 184.5899 on 5 degrees of freedom
## Residual deviance: 2.8119 on 4 degrees of freedom
## AIC: 26.549
##
## Number of Fisher Scoring iterations: 4
ot <- theme_set(theme_bw())
## fitted psychophysical curve
ggplot() +
geom_line(aes(x=newxval[,1], y = phat)) +
geom_point(aes(x = dta$Intensity, y = dta$Prop)) +
geom_vline(xintercept = 35, lty = 2, col = "gray") +
geom_hline(yintercept = .5, lty = 2, col = "gray") +
labs(x = "Intensity", y = "Probability of correnct responses")##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## sex ghq c nc
## 1 men 0 0 18
## 2 men 1 0 8
## 3 men 2 1 1
## 4 men 3 0 0
## 5 men 4 1 0
## 6 men 5 3 0
# rename
dta21 <- dta2 %>%
mutate(Total = c + nc) %>%
dplyr::rename(Gender=sex, Score=ghq, Case=c, Noncase=nc)
head(dta21)## Gender Score Case Noncase Total
## 1 men 0 0 18 18
## 2 men 1 0 8 8
## 3 men 2 1 1 2
## 4 men 3 0 0 0
## 5 men 4 1 0 1
## 6 men 5 3 0 3
## Gender Case Noncase
## 1 men 8 27
## 2 women 22 63
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: Matrix
## Loading required package: lme4
##
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is D:/sheu
ggplot(dta21, aes(x=Score, y=Case/Total, color=Gender)) +
geom_point(pch=1, size=rel(2))+
geom_smooth(method="bayesglm",
formula=y ~ x,
method.args=list(family=binomial),
glm.control= 100,
aes(weight=Total),
se=FALSE) +
scale_color_manual(values=c("black", "red3"))+
scale_x_continuous(breaks=seq(0, 10, by=1))+
scale_y_continuous(breaks=seq(0, 1, by=.1))+
labs(x="Score on GHQ",
y="Proportion of cases") +
theme_minimal()+
theme(legend.position=c(.1, .9))## Warning: Ignoring unknown parameters: glm.control
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing missing values (geom_point).
full model
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = cbind(Case, Noncase) ~ Score + Gender + Score:Gender,
## family = binomial, data = dta21)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.94972 0.00000 0.00000 0.06319 0.48718
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -43.38 21986.72 -0.002 0.998
## Score 21.69 10993.36 0.002 0.998
## Genderwomen 40.38 21986.72 0.002 0.999
## Score:Genderwomen -20.45 10993.36 -0.002 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.1763 on 16 degrees of freedom
## Residual deviance: 1.5422 on 13 degrees of freedom
## AIC: 22.018
##
## Number of Fisher Scoring iterations: 23
decrease interaction
##
## Call:
## glm(formula = cbind(Case, Noncase) ~ Score + Gender, family = binomial,
## data = dta21)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.25768 0.00000 0.02623 0.23648 0.82922
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.0721 0.9758 -4.173 3.01e-05 ***
## Score 1.4328 0.2907 4.929 8.26e-07 ***
## Genderwomen 0.7938 0.9289 0.855 0.393
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.1763 on 16 degrees of freedom
## Residual deviance: 4.9419 on 14 degrees of freedom
## AIC: 23.418
##
## Number of Fisher Scoring iterations: 6
decrease gender
##
## Call:
## glm(formula = cbind(Case, Noncase) ~ Score, family = binomial,
## data = dta21)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.41618 0.00000 0.04739 0.33149 0.53195
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.4536 0.5633 -6.131 8.73e-10 ***
## Score 1.4402 0.2979 4.834 1.34e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.176 on 16 degrees of freedom
## Residual deviance: 5.744 on 15 degrees of freedom
## AIC: 22.22
##
## Number of Fisher Scoring iterations: 6
## Analysis of Deviance Table
##
## Model 1: cbind(Case, Noncase) ~ Score
## Model 2: cbind(Case, Noncase) ~ Score + Gender
## Model 3: cbind(Case, Noncase) ~ Score + Gender + Score:Gender
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 15 5.7440
## 2 14 4.9419 1 0.8021 0.37047
## 3 13 1.5422 1 3.3997 0.06521 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
There is no significant differences between 3 models
The proportion of deviance accounted for.
## [1] 0.9405854
If the model fits well, residual deviance approximately follows the χ2n−p random variable with residual degrees of freedom.
## [1] 0.9866052
| cbind(Case, Noncase) | ||||
|---|---|---|---|---|
| Predictors | Odds Ratios | std. Error | CI | p |
| (Intercept) | 0.02 | 0.98 | 0.00 – 0.08 | <0.001 |
| Score | 4.19 | 0.29 | 2.57 – 8.20 | <0.001 |
| Gender [women] | 2.21 | 0.93 | 0.42 – 17.73 | 0.393 |
The Gender factor doesn’t make statisical significant on patient.
Conditional on gender, one point increases in GHQ is associated with the increase of 157% to 720% in the odds for being a case.
dta2_p <- dta21 %>%
mutate(Prop = Case/Total,
phat = predict(a1, newdata = dta21[, c(1,2)], type="response"),
elgit = log((Case+1/120)/(Noncase+(1/120))))
dta2_m1 <- broom::augment(a1, newdata = dta2_p)
ggplot(dta2_m1,
aes(Score, .fitted,
color=Gender)) +
geom_line(alpha=.5) +
geom_point(data=dta2_p,
aes(Score, elgit))+
scale_color_manual(values=c("black", "red3"))+
labs(x="Score on GHQ",
y="Empirical and fitted log-odds")+
theme_minimal()+
theme(legend.position = c(.1, .9))There is no interaction between gender and score.
##
## Attaching package: 'faraway'
## The following objects are masked from 'package:arm':
##
## fround, logit, pfround
beta <- coef(a1)
with(dta21, plot(Score, Case/Total,
col = Gender,
bty = 'n',
xlab = "GHQ score"))
grid()
curve(ilogit(beta[1] + beta[2]*x), add=T, col=1)
curve(ilogit(beta[1] + beta[2]*x + beta[3]), add=T, col=2)## Loading required package: stats4
##
## Attaching package: 'bbmle'
## The following object is masked from 'package:dplyr':
##
## slice
## 'data.frame': 1308 obs. of 2 variables:
## $ nV : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Race: Factor w/ 2 levels "Black","White": 2 2 2 2 2 2 2 2 2 2 ...
## nV Race
## 1 0 White
## 2 0 White
## 3 0 White
## 4 0 White
## 5 0 White
## 6 0 White
| NVictims | Race |
|---|---|
| 0 | 1189 |
| 1 | 76 |
| 2 | 26 |
| 3 | 11 |
| 4 | 3 |
| 5 | 2 |
| 6 | 1 |
想了很久不知道code怎麼再把race分兩類?
| Race | NVictims |
|---|---|
| Black | 0.5220126 |
| White | 0.0922541 |
| Race | NVictims |
|---|---|
| Black | 1.1498288 |
| White | 0.1552448 |
##
## Call: glm.nb(formula = NVictims ~ Race, data = dta3, init.theta = 0.2023119205,
## link = log)
##
## Coefficients:
## (Intercept) RaceWhite
## -0.6501 -1.7331
##
## Degrees of Freedom: 1307 Total (i.e. Null); 1306 Residual
## Null Deviance: 471.6
## Residual Deviance: 412.6 AIC: 1002
| NVictims | ||||
|---|---|---|---|---|
| Predictors | Incidence Rate Ratios | std. Error | CI | p |
| (Intercept) | 0.52 | 0.21 | 0.35 – 0.80 | 0.002 |
| Race [White] | 0.18 | 0.24 | 0.11 – 0.28 | <0.001 |
I see the lecture note but.. .
##
## Call:
## glm.nb(formula = NVictims ~ Race + 1, data = dta3, init.theta = 0.2023119205,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7184 -0.3899 -0.3899 -0.3899 3.5072
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6501 0.2077 -3.130 0.00175 **
## RaceWhite -1.7331 0.2385 -7.268 3.66e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.2023) family taken to be 1)
##
## Null deviance: 471.57 on 1307 degrees of freedom
## Residual deviance: 412.60 on 1306 degrees of freedom
## AIC: 1001.8
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.2023
## Std. Err.: 0.0409
##
## 2 x log-likelihood: -995.7980