Replicate the analysis reported in the brightness discrimination example.
Consider the outcome of a brightness discrimination experiment. On each trial, two patches of light were presented: one was a standatd with an intensity of 35 units, and another was a variable intensity comparison light. The subject was asked to indicate which one of the lights was brighter.
Source: Gegenfurtner, K. (1992). Praxis: Brent’s algorithm for function minimization. Behavior Research Methods, Instruments, and Computers. 24(4), 560-564.
Stimulu Intensity Trial “Yes” 1 10 49 2 2 20 48 3 3 30 48 13 4 40 50 31 5 50 47 38 6 60 49 48
Column 1: Stimulus condition Column 2: Intensity of the variable (comparison) light Column 3: Number of comparisons Column 4: Number of times comparison light was judged brighter
pacman::p_load(knitr, simstudy, tidyverse, nlme, rms, ggplot2, lme4)
# input data
dta <- read.table("data/brightness.txt", h=T)
#
head(dta)
Stimulus Intensity Trial Yes
1 1 10 49 2
2 2 20 48 3
3 3 30 48 13
4 4 40 50 31
5 5 50 47 38
6 6 60 49 48
# observed proportions
dta$Prop <- dta$Yes/dta$Trial
# use glm to fit a psychophysical function
summary(m0 <- glm(cbind(Yes, Trial - Yes) ~ Intensity, data = dta,
family = binomial(link = "probit")))
Call:
glm(formula = cbind(Yes, Trial - Yes) ~ Intensity, family = binomial(link = "probit"),
data = dta)
Deviance Residuals:
1 2 3 4 5 6
1.039 -0.761 -0.312 0.449 -0.598 0.705
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.87414 0.28679 -10.0 <2e-16
Intensity 0.07747 0.00737 10.5 <2e-16
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 184.5899 on 5 degrees of freedom
Residual deviance: 2.8119 on 4 degrees of freedom
AIC: 26.55
Number of Fisher Scoring iterations: 4
1 - pchisq(m0$deviance, m0$df.residual)
[1] 0.58978
# new values in the intensity range
newxval <- with(dta, data.frame(Intensity = seq(5,70, by = 0.5)))
# predicted probabilities
phat <- predict(m0, newdata = newxval, type = "response" )
ot <- theme_set(theme_bw())
# fitted psychophysical curve
ggplot() +
geom_line(aes(x=newxval[,1], y = phat)) +
geom_point(aes(x = dta$Intensity, y = dta$Prop)) +
geom_vline(xintercept = 35, lty = 2, col = "gray") +
geom_hline(yintercept = .5, lty = 2, col = "gray") +
labs(x = "Intensity", y = "Probability of correnct responses")
In my view, the model may be fit here, the stimulus intensity is positively associated with brightness discrimination.
Silvapulle (1981) reported a study on the relation between psychiatric diagnosis and the score on a 12-item General Health Questionnaire (GHQ) for 120 patients admitted to a hospital. Download the ghq.rda data file to your “data” folder and load it to your R session with the command
Column 1: Gender ID Column 2: GHQ score Column 3: Number of cases Column 4: Number of non-cases
The patient was classified by psychiatrists as either a ‘case’ or a ‘non-case’. The question of interest was whether GHQ score, together with patient’s gender, could be used to detect psychiatric cases. Perform an appropriate analysis.
load("data/ghq.rda")
dta <- ghq
head(dta)
sex ghq c nc
1 men 0 0 18
2 men 1 0 8
3 men 2 1 1
4 men 3 0 0
5 men 4 1 0
6 men 5 3 0
dta <- dta %>%
mutate(Total = c + nc) %>%
dplyr::rename(Gender=sex, Score=ghq, Case=c, Noncase=nc)
aggregate(cbind(Case, Noncase) ~ Gender, data=dta, sum)
Gender Case Noncase
1 men 8 27
2 women 22 63
t(aggregate(cbind(Case, Noncase) ~ Score, data=dta, sum))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
Score 0 1 2 3 4 5 6 7 8 9 10
Case 2 2 5 3 3 6 1 3 3 1 1
Noncase 60 22 6 1 1 0 0 0 0 0 0
ggplot(dta, aes(x=Score, y=Case/Total, color=Gender)) +
geom_point(pch=1, size=rel(2))+
stat_smooth(method="glm",
formula=y ~ x,
method.args=list(family=binomial),
aes(weight=Total),
se=FALSE) +
scale_color_manual(values=c("black", "red3"))+
scale_x_continuous(breaks=seq(0, 10, by=1))+
scale_y_continuous(breaks=seq(0, 1, by=.1))+
labs(x="Score on GHQ",
y="Proportion of cases") +
theme_minimal()+
theme(legend.position=c(.1, .9))
m0 <- glm(cbind(Case, Noncase) ~ Score + Gender + Score:Gender, data=dta, family=binomial)
m1 <- glm(cbind(Case, Noncase) ~ Score + Gender, data=dta, family=binomial)
m2 <- glm(cbind(Case, Noncase) ~ Score, data=dta, family=binomial)
anova(m2, m1, m0, test="Chisq")
Analysis of Deviance Table
Model 1: cbind(Case, Noncase) ~ Score
Model 2: cbind(Case, Noncase) ~ Score + Gender
Model 3: cbind(Case, Noncase) ~ Score + Gender + Score:Gender
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 15 5.74
2 14 4.94 1 0.8 0.370
3 13 1.54 1 3.4 0.065
m3 <- glm(cbind(Case, Noncase) ~ Gender, data=dta, family=binomial(link=logit))
anova(m3, m1, test="Chisq")
Analysis of Deviance Table
Model 1: cbind(Case, Noncase) ~ Gender
Model 2: cbind(Case, Noncase) ~ Score + Gender
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 15 83.1
2 14 4.9 1 78.1 <2e-16
The results revealed that the GHQ score may be more predictable to psychiatric illness than gender, we can only stay score term in model here.
1 - pchisq(m2$deviance, m2$df.residual)
[1] 0.98374
sjPlot::tab_model(m2, show.r2=FALSE, show.obs=FALSE, show.se=TRUE)
| cbind(Case, Noncase) | ||||
|---|---|---|---|---|
| Predictors | Odds Ratios | std. Error | CI | p |
| (Intercept) | 0.03 | 0.56 | 0.01 – 0.08 | <0.001 |
| Score | 4.22 | 0.30 | 2.56 – 8.37 | <0.001 |
dta_p <- dta %>%
mutate(Prop=Case/Total,
phat=predict(m2,
newdata=dta[, c(1,2)],
type="response"),
elgit=log((Case+1/120)/(Noncase+(1/120))))
dta_m1 <- broom::augment(m2, newdata = dta_p)
ggplot(dta_m1,
aes(Score, .fitted,
color=Gender)) +
geom_line(alpha=.5) +
geom_point(data=dta_p,
aes(Score, elgit))+
scale_color_manual(values=c("black", "red3"))+
labs(x="Score on GHQ",
y="Empirical and fitted log-odds")+
theme_minimal()+
theme(legend.position = c(.1, .9))
Reproduce the results of analysis (SAS output) reported in the victims example.
A recent General Social Survey asked subjects, “Within the past 12 months, how many people have you known personally that were victims of homicide?” The responses by race, for those who identified themselves as black or as white, were collected.
Source: Agresti, A. (2002). Categorical Data Analysis. p. 561.
Column 1: Number of victims personally known to the respondent Column 2: Race ID
dta <- read.csv("data/victims.csv", h=T)
head(dta)
nV Race
1 0 White
2 0 White
3 0 White
4 0 White
5 0 White
6 0 White
dta$nV_f <- factor(dta$nV)
aggregate(nV ~ nV_f*Race, data=dta, length)
nV_f Race nV
1 0 Black 119
2 1 Black 16
3 2 Black 12
4 3 Black 7
5 4 Black 3
6 5 Black 2
7 0 White 1070
8 1 White 60
9 2 White 14
10 3 White 4
11 6 White 1
dta_sum <- as.data.frame(aggregate(nV ~ nV_f*Race, data=dta, length))
aggregate(nV ~ Race, data=dta, mean)
Race nV
1 Black 0.522013
2 White 0.092254
aggregate(nV ~ Race, data=dta, var)
Race nV
1 Black 1.14983
2 White 0.15524
dta_sum <- dta_sum %>%
mutate(Total = sum(nV))
ggplot(dta_sum,
aes(x=nV_f,
y=nV/Total,
color=Race)) +
geom_point(pch=1, size=rel(2))+
scale_color_manual(values=c("black", "red3"))+
#scale_x_continuous(breaks=seq(0, 10, by=1))+
#scale_y_continuous(breaks=seq(0, 1, by=.1))+
labs(x="Number of victims",
y="Proportion of cases") +
theme_minimal()+
theme(legend.position=c(.9, .9))
library(MASS)
MASS::stepAIC(glm.nb(nV ~ Race, data=dta),
scope=list(upper = ~ Race, lower= ~ 1), trace=FALSE)
Call: glm.nb(formula = nV ~ Race, data = dta, init.theta = 0.2023119205,
link = log)
Coefficients:
(Intercept) RaceWhite
-0.65 -1.73
Degrees of Freedom: 1307 Total (i.e. Null); 1306 Residual
Null Deviance: 472
Residual Deviance: 413 AIC: 1000
I’m not sure why the intercept is different from the SAS output. Probably, it’s caused by that I didn’t do log-transoformation?
sjPlot::tab_model(m1 <- glm.nb(nV ~ Race, data=dta), show.se=T, show.r2=F, show.obs=F, transform=NULL)
| nV | ||||
|---|---|---|---|---|
| Predictors | Log-Mean | std. Error | CI | p |
| (Intercept) | -0.65 | 0.21 | -1.05 – -0.23 | 0.002 |
| Race [White] | -1.73 | 0.24 | -2.21 – -1.27 | <0.001 |
pchisq(deviance(m1), df=m1$df.residual, lower.tail=FALSE)
[1] 1