This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(Rcpp)
library(htmltools)
library(knitr)
library(skimr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(ggplot2)
library(patchwork)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:purrr':
##
## compact
library(PKNCA)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(broom)
library(glue)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
raw dataset
rm(list = ls())
# file.choose()
"/global/pkms2/data/Test/Test.PC/PoissonERM/PoissonERM-Example-main/obsdata.csv"%>%
read_csv() %>%
clean_names() %>%
filter(flag==2)->raw
## Rows: 3000 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (13): PROT, ID, SEX, RACE, LOCATION, AGE, BWT, TIME, CAVE1, CAVE2, FLAG,...
## lgl (1): C
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw %>%
count('dv')
## dv freq
## 1 0 482
## 2 1 504
## 3 NA 14
cleaned_raw <- raw %>% filter(!is.na(dv))
summary(raw$cave2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 33.58 48.58 53.92 70.05 194.39
summary(cleaned_raw$cave2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 33.57 48.62 53.99 70.01 194.39
#Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 33.58 48.58 53.92 70.05 194.39
raw$cave2squart<-ifelse(raw$cave2<=33.58,"1Q",
ifelse(raw$cave2<=48.58,"2Q",
ifelse(raw$cave2<=70.05,"3Q","4Q")))
cleaned_raw <- raw %>% filter(!is.na(dv)) ####mutate(time=300)
# Fit the logistic regression model
POI <-glm(dv ~ cave2+offset(log(time)), family = poisson(link = "log"), data = cleaned_raw)
LOG <- glm(formula = dv ~ cave2, family = binomial(link = "logit"), data = cleaned_raw)
summary(LOG)
##
## Call:
## glm(formula = dv ~ cave2, family = binomial(link = "logit"),
## data = cleaned_raw)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8751 -1.1382 0.6841 1.1664 1.4926
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.715971 0.135863 -5.27 1.37e-07 ***
## cave2 0.014233 0.002266 6.28 3.38e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1366.4 on 985 degrees of freedom
## Residual deviance: 1323.1 on 984 degrees of freedom
## AIC: 1327.1
##
## Number of Fisher Scoring iterations: 4
summary(POI)
##
## Call:
## glm(formula = dv ~ cave2 + offset(log(time)), family = poisson(link = "log"),
## data = cleaned_raw)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6357 -0.9383 -0.1656 0.5523 2.0911
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.302518 0.090915 -69.323 < 2e-16 ***
## cave2 0.006055 0.001316 4.601 4.21e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 826.12 on 985 degrees of freedom
## Residual deviance: 806.16 on 984 degrees of freedom
## AIC: 1818.2
##
## Number of Fisher Scoring iterations: 5
cleaned_raw <- cleaned_raw %>%
mutate(
pred_poi = predict(POI, type = "response"),
pred_log = predict(LOG, type = "response")
)
logistic_eq <- paste0("Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * cave2, AIC=", round(AIC(LOG), 2))
poisson_eq <- paste0("Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * cave2 + log(time), AIC=", round(AIC(POI),2))
sm_cave <- ddply(cleaned_raw, c("cave2squart"), summarise,
mc = median(cave2),
n = length(dv),
rsp = sum(dv),
rate = round(rsp/n, dig = 2),
lo = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[1], dig = 4),
NA),
hi = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[2], dig = 4),
NA)
)
ggplot(cleaned_raw, aes(x = cave2, y = dv)) +
geom_point(color = "blue")+
theme_bw() +
stat_smooth(method = glm, method.args = list(family = "binomial"),
aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +
stat_smooth(method = glm, method.args = list(family = "poisson"),
aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
xlab("cave2") + ylab("Rate of DV (AE2)") +
labs(title = "Comparison of Logistic and Poisson Fits",
color = "Model", linetype = "Model") + # Add labels for the legend
theme(legend.position = "top") +
annotate("text", x = 0, y = 0.2, label = logistic_eq, size = 4, hjust = 0) +
annotate("text", x = 0, y = 0.1, label = poisson_eq, size = 4, hjust = 0)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
ggplot(cleaned_raw, aes(x = cave2, y = dv)) +
geom_point(color = "blue", alpha = 0.6) + # Raw data points
geom_line(aes(y = pred_poi, color = "Poisson", linetype = "Poisson"), size = 1) + # Poisson fit
geom_line(aes(y = pred_log, color = "Logistic", linetype = "Logistic"), size = 1) + # Logistic fit
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
xlab("cave2") + ylab("Rate of DV (AE2)") +
labs(
title = "Comparison of Logistic and Poisson Fits",
color = "Model", linetype = "Model"
) + # Add labels for the legend
theme_bw() +
theme(legend.position = "top") +
annotate("text", x = 0, y = 0.2, label = logistic_eq, size = 3, hjust = 0) +
annotate("text", x = 0, y = 0.1, label = poisson_eq, size = 3, hjust = 0)
########more cut, 10
cutoff_10 <- quantile(cleaned_raw$cave2, probs = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9), na.rm = TRUE)
raww <- cleaned_raw %>%
mutate(cave2_10 = cut(
cave2,
breaks = c(-Inf, cutoff_10, Inf),
labels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")
))
p <- raww %>% summarise(mean_dv = mean(dv, na.rm = TRUE)) %>% pull(mean_dv)
n <- nrow(raww)
POI <- glm(dv ~ cave2 + offset(log(time)), family = poisson(link = "log"), data = raww)
LOG <- glm(dv ~ cave2, family = binomial(link = "logit"), data = raww)
POI_summary <- tidy(POI) %>% mutate(model = "Poisson")
LOG_summary <- tidy(LOG) %>% mutate(model = "Logistic")
logistic_p_value <- LOG_summary %>% filter(term == "cave2") %>% pull(p.value)
poisson_p_value <- POI_summary %>% filter(term == "cave2") %>% pull(p.value)
logistic_p_value_int <- LOG_summary %>% filter(term == "(Intercept)") %>% pull(p.value)
poisson_p_value_int <- POI_summary %>% filter(term == "(Intercept)") %>% pull(p.value)
combined_summary <- bind_rows(POI_summary, LOG_summary)
sm_cave_10 <- raww %>%
group_by(cave2_10) %>%
dplyr::summarise(
mc = median(cave2),
n = n(),
rsp = sum(dv),
rate = round(rsp / n, 2),
lo = ifelse(rsp > 0 & rsp < n, round(prop.test(rsp, n)$conf.int[1], 4), NA),
hi = ifelse(rsp > 0 & rsp < n, round(prop.test(rsp, n)$conf.int[2], 4), NA)
)
ggplot(raww, aes(x = cave2, y = dv)) +
geom_point(color = "blue")+
theme_bw() +
stat_smooth(method = glm, method.args = list(family = "binomial"),
aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +
stat_smooth(method = glm, method.args = list(family = "poisson"),
aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +
geom_errorbar(data = sm_cave_10, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave_10, aes(x = mc, y = rate), color = "black") +
xlab("cave2") + ylab("Rate of DV (AE2)") +
labs(title = "Comparison of Logistic and Poisson Fits",
color = "Model", linetype = "Model") + # Add labels for the legend
theme(legend.position = "top") +
annotate("text", x = 0, y = 0.2, label = logistic_eq, size = 5, hjust = 0) +
annotate("text", x = 0, y = 0.1, label = poisson_eq, size = 5, hjust = 0)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
#######cut 20
########more cut
cutoff_20 <- quantile(cleaned_raw$cave2, probs = c(0.2, 0.4, 0.6, 0.8, 0.9, 0.93, 0.96, 0.99, 0.995 ), na.rm = TRUE)
raww <- cleaned_raw %>%
mutate(cave2_20 = cut(
cave2,
breaks = c(-Inf, cutoff_20, Inf),
labels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")
))
p <- raww %>% summarise(mean_dv = mean(dv, na.rm = TRUE)) %>% pull(mean_dv)
n <- nrow(raww)
POI <- glm(dv ~ cave2 + offset(log(time)), family = poisson(link = "log"), data = raww)
LOG <- glm(dv ~ cave2, family = binomial(link = "logit"), data = raww)
POI_summary <- tidy(POI) %>% mutate(model = "Poisson")
LOG_summary <- tidy(LOG) %>% mutate(model = "Logistic")
logistic_p_value <- LOG_summary %>% filter(term == "cave2") %>% pull(p.value)
poisson_p_value <- POI_summary %>% filter(term == "cave2") %>% pull(p.value)
logistic_p_value_int <- LOG_summary %>% filter(term == "(Intercept)") %>% pull(p.value)
poisson_p_value_int <- POI_summary %>% filter(term == "(Intercept)") %>% pull(p.value)
combined_summary <- bind_rows(POI_summary, LOG_summary)
sm_cave_20 <- raww %>%
group_by(cave2_20) %>%
dplyr::summarise(
mc = median(cave2),
n = n(),
rsp = sum(dv),
rate = round(rsp / n, 2),
lo = ifelse(rsp > 0 & rsp < n, round(prop.test(rsp, n)$conf.int[1], 4), NA),
hi = ifelse(rsp > 0 & rsp < n, round(prop.test(rsp, n)$conf.int[2], 4), NA)
)
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
ggplot(raww, aes(x = cave2, y = dv)) +
geom_point(color = "blue")+
theme_bw() +
stat_smooth(method = glm, method.args = list(family = "binomial"),
aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +
stat_smooth(method = glm, method.args = list(family = "poisson"),
aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +
geom_errorbar(data = sm_cave_20, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave_20, aes(x = mc, y = rate), color = "black") +
xlab("cave2") + ylab("Rate of DV (AE2)") +
labs(title = "Comparison of Logistic and Poisson Fits",
color = "Model", linetype = "Model") + # Add labels for the legend
theme(legend.position = "top") +
annotate("text", x = 0, y = 0.2, label = logistic_eq, size = 5, hjust = 0) +
annotate("text", x = 0, y = 0.1, label = poisson_eq, size = 5, hjust = 0)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
pull from the dataset, from both positive and negative
n<-20
id0 <- raw %>% pull(id) %>% unique %>% sample(size=n)
raw %>%
filter(id%in%id0) %>%
ggplot(aes(x = id %>% as.factor ,y = time, fill = dv %>% as.factor)) +
geom_col(alpha=.2) +
theme(legend.position="top")+
coord_flip()
ID <- raw %>% pull(id) %>% unique
N<-50
data1 <- tibble(n=seq(5 %>% log, 500 %>% log, length.out=N) %>% exp %>% round(0)) %>%
mutate(
var=map(n, .progress = T, ~{
.x -> n
id0 <- sample(ID, n)
raww <- raw %>% filter(id %in%id0)
p <- raww %>%
summarise(mean(dv, na.rm=T)) %>%
clean_names() %>%
pull(mean_dv_na_rm_t)
POI <- glm(dv ~ cave2 + offset(log(time)),
family = poisson(link = "log"), data = raww)
LOG <- glm(dv ~ cave2, family = binomial(link = "logit"), data = raww)
summary(POI)
POI %>%
tidy %>%
mutate(flag="poisson") %>%
full_join(LOG %>% tidy %>% mutate(flag="logistic"))%>%
mutate(p=p) %>%
suppressMessages()
}
)
) %>%
suppressMessages()
## Warning: glm.fit: fitted rates numerically 0 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
nn<-0.5
data1 %>%
unnest() %>% filter(term=="cave2") %>%
clean_names() %>%
ggplot(aes(x = n ,y = p_value, col = flag %>% as.factor())) +
geom_point(position = position_dodge(nn),aes(alpha=p)) +
geom_line(position = position_dodge(nn)) +
theme(legend.position="top")+
geom_hline(yintercept = 0.05, linetype="dotted")->p
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(var)`
p %>% ggplotly
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
data1 %>%
unnest() %>% filter(term=="cave2") %>%
clean_names() %>%
ggplot(aes(x = n ,y = p_value, col = flag %>% as.factor())) +
geom_point(position = position_dodge(nn),aes(alpha=p)) +
geom_line(position = position_dodge(nn)) +
theme(legend.position="top")+
geom_hline(yintercept = 0.05, linetype="dotted")->p1
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(var)`
p %>% ggplotly
dataset 1 time sensitivity
#####when event rate is 50%
N <- 50
data1_1 <- tibble(t = seq(log(1), log(500), length.out = N) %>% exp() %>% round(0)) %>%
mutate(
var = map(t, ~{
raww <- raw %>% mutate(time = .x)
POI <- glm(dv ~ cave2 + offset(log(time)), family = poisson(link = "log"), data = raww)
LOG <- glm(dv ~ cave2, family = binomial(link = "logit"), data = raww)
poisson_results <- tidy(POI) %>% mutate(model = "poisson")
logistic_results <- tidy(LOG) %>% mutate(model = "logistic")
bind_rows(poisson_results, logistic_results) %>%
mutate(time = .x)
})
) %>%
unnest(cols = c(var))
nn <- 0.5
# Plot for p-values (cave2)
p_1 <- data1_1 %>%
filter(term == "cave2") %>%
ggplot(aes(x = time, y = p.value, color = model)) +
geom_point(position = position_dodge(nn)) +
geom_line(position = position_dodge(nn)) +
geom_hline(yintercept = 0.05, linetype = "dotted", color = "red") +
labs(title = "p-values (cave2) across Time", x = "Time", y = "p-value") +
theme(legend.position = "top") +
scale_color_manual(values = c("poisson" = "blue", "logistic" = "green"))
# Plot for slope estimates (sigma)
p1_1 <- data1_1 %>%
filter(term == "cave2") %>%
ggplot(aes(x = time, y = estimate, color = model)) +
geom_point(position = position_dodge(nn)) +
geom_line(position = position_dodge(nn)) +
labs(title = "Slope Estimates (cave2) across Time", x = "Time", y = "Slope Estimate (Sigma)") +
theme(legend.position = "top") +
scale_color_manual(values = c("poisson" = "blue", "logistic" = "green"))
# Plot for intercept estimates
p_intercept_1 <- data1_1 %>%
filter(term == "(Intercept)") %>%
ggplot(aes(x = time, y = estimate, color = model)) +
geom_point(position = position_dodge(nn)) +
geom_line(position = position_dodge(nn)) +
labs(title = "Intercept Estimates across Time", x = "Time", y = "Intercept Estimate") +
theme(legend.position = "top") +
scale_color_manual(values = c("poisson" = "blue", "logistic" = "green"))
# Plot for p-values of intercept
p_intercept_pval_1 <- data1_1 %>%
filter(term == "(Intercept)") %>%
ggplot(aes(x = time, y = p.value, color = model)) +
geom_point(position = position_dodge(nn)) +
geom_line(position = position_dodge(nn)) +
geom_hline(yintercept = 0.05, linetype = "dotted", color = "red") +
labs(title = "Intercept p-values across Time", x = "Time", y = "p-value") +
theme(legend.position = "top") +
scale_color_manual(values = c("poisson" = "blue", "logistic" = "green"))
p_1_interactive <- ggplotly(p_1)
p1_1_interactive <- ggplotly(p1_1)
p_intercept_1_interactive <- ggplotly(p_intercept_1)
p_intercept_pval_1_interactive <- ggplotly(p_intercept_pval_1)
p_1_interactive
p1_1_interactive
p_intercept_1_interactive
p_intercept_pval_1_interactive
# Event Rate: 0.8
id_p_0 <- raw %>% filter(dv == 1) %>% pull(id) %>% unique ### positive id 504
id_n_0 <- raw %>% filter(dv == 0) %>% pull(id) %>% unique ### negative id 482
id_p <- sample(id_p_0, length(id_p_0) * 1)
id_n <- sample(id_n_0, length(id_n_0) * 0.25)
raw0.8 <- raw %>%
filter(id %in% c(id_p, id_n)) %>% suppressMessages()
data1_2 <- tibble(t = seq(log(1), log(500), length.out = N) %>% exp() %>% round(0)) %>%
mutate(
var = map(t, ~{
raww <- raw0.8 %>% mutate(time = .x)
POI <- glm(dv ~ cave2 + offset(log(time)), family = poisson(link = "log"), data = raww)
LOG <- glm(dv ~ cave2, family = binomial(link = "logit"), data = raww)
poisson_results <- tidy(POI) %>% mutate(model = "poisson")
logistic_results <- tidy(LOG) %>% mutate(model = "logistic")
bind_rows(poisson_results, logistic_results) %>%
mutate(time = .x)
})
) %>%
unnest(cols = c(var))
# Plot for p-values (cave2)
p_2 <- data1_2 %>%
filter(term == "cave2") %>%
ggplot(aes(x = time, y = p.value, color = model)) +
geom_point(position = position_dodge(nn)) +
geom_line(position = position_dodge(nn)) +
geom_hline(yintercept = 0.05, linetype = "dotted", color = "red") +
labs(title = "p-values (cave2) across Time", x = "Time", y = "p-value") +
theme(legend.position = "top") +
scale_color_manual(values = c("poisson" = "blue", "logistic" = "green"))
# Plot for slope estimates (sigma)
p1_2 <- data1_2 %>%
filter(term == "cave2") %>%
ggplot(aes(x = time, y = estimate, color = model)) +
geom_point(position = position_dodge(nn)) +
geom_line(position = position_dodge(nn)) +
labs(title = "Slope Estimates (cave2) across Time", x = "Time", y = "Slope Estimate (Sigma)") +
theme(legend.position = "top") +
scale_color_manual(values = c("poisson" = "blue", "logistic" = "green"))
# Plot for intercept estimates
p_intercept <- data1_2 %>%
filter(term == "(Intercept)") %>%
ggplot(aes(x = time, y = estimate, color = model)) +
geom_point(position = position_dodge(nn)) +
geom_line(position = position_dodge(nn)) +
labs(title = "Intercept Estimates across Time", x = "Time", y = "Intercept Estimate") +
theme(legend.position = "top") +
scale_color_manual(values = c("poisson" = "blue", "logistic" = "green"))
# Plot for p-values of intercept
p_intercept_pval <- data1_2 %>%
filter(term == "(Intercept)") %>%
ggplot(aes(x = time, y = p.value, color = model)) +
geom_point(position = position_dodge(nn)) +
geom_line(position = position_dodge(nn)) +
geom_hline(yintercept = 0.05, linetype = "dotted", color = "red") +
labs(title = "Intercept p-values across Time", x = "Time", y = "p-value") +
theme(legend.position = "top") +
scale_color_manual(values = c("poisson" = "blue", "logistic" = "green"))
p_2_interactive <- ggplotly(p_2)
p1_2_interactive <- ggplotly(p1_2)
p_intercept_interactive <- ggplotly(p_intercept)
p_intercept_pval_interactive <- ggplotly(p_intercept_pval)
p_2_interactive
p1_2_interactive
p_intercept_interactive
p_intercept_pval_interactive
N <- 50
time_values <- seq(log(1), log(500), length.out = N) %>% exp() %>% round(0)
data_fits <- map_dfr(time_values, function(time_point) {
raw %>%
mutate(time = time_point) %>%
mutate(fit_time = paste("Time =", time_point))
})
logistic_fits_plot <- data_fits %>%
ggplot(aes(x = cave2, y = dv, color = fit_time, linetype = fit_time)) +
stat_smooth(method = glm, method.args = list(family = "binomial"), se = FALSE) +
labs(title = "Logistic Regression Fits Across Different Time",
x = "Cave2", y = "Predicted Probability", color = "Time", linetype = "Time") +
theme_minimal() +
theme(legend.position = "right")
poisson_fits_plot <- data_fits %>%
ggplot(aes(x = cave2, y = dv, color = fit_time, linetype = fit_time)) +
stat_smooth(method = glm, method.args = list(family = "poisson"), se = FALSE) +
labs(title = "Poisson Regression Fits Across Different Time",
x = "Cave2", y = "Predicted Count", color = "Time", linetype = "Time") +
theme_minimal() +
theme(legend.position = "right")
logistic_fits_interactive <- ggplotly(logistic_fits_plot)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 700 rows containing non-finite values (stat_smooth).
poisson_fits_interactive <- ggplotly(poisson_fits_plot)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 700 rows containing non-finite values (stat_smooth).
logistic_fits_interactive
poisson_fits_interactive
pull different numbers of id from positive and negative
id_p_0 <- raw %>%filter(dv==1) %>% pull(id) %>% unique ###positive id 504
id_n_0 <- raw %>%filter(dv==0) %>% pull(id) %>% unique ### negative id 482
per <- c(seq(0.05, 1, .1),1)
# NN<-200
data2 <- tibble(crossing(var1=per, var2=per)) %>%
mutate(varr=map2(var1, var2, .progress = T, ~{
.x->per1
.y->per2
id_p <- sample(id_p_0, id_p_0 %>% length*per1)
id_n <- sample(id_n_0, id_n_0 %>% length*per2)
raww <- raw %>%
filter(id%in%c(id_p, id_n)) %>% suppressMessages()
p <- raww %>%
summarise(mean(dv, na.rm=T)) %>%
clean_names() %>% pull(mean_dv_na_rm_t) %>%
suppressMessages()
n <- raww %>% nrow()
POI <- glm(dv ~ cave2 + offset(log(time)),
family = poisson(link = "log"), data = raww)
# Fit the logistic regression model
LOG <- glm(dv ~ cave2, family = binomial(link = "logit"), data = raww)
summary(POI)
POI %>% tidy %>% mutate(flag="poisson") %>% full_join(
LOG %>% tidy %>% mutate(flag="logistic")
)%>% mutate(p=p, n=n) %>% suppressMessages()
}))
#####################
data2
## # A tibble: 121 × 3
## var1 var2 varr
## <dbl> <dbl> <list>
## 1 0.05 0.05 <tibble [4 × 8]>
## 2 0.05 0.15 <tibble [4 × 8]>
## 3 0.05 0.25 <tibble [4 × 8]>
## 4 0.05 0.35 <tibble [4 × 8]>
## 5 0.05 0.45 <tibble [4 × 8]>
## 6 0.05 0.55 <tibble [4 × 8]>
## 7 0.05 0.65 <tibble [4 × 8]>
## 8 0.05 0.75 <tibble [4 × 8]>
## 9 0.05 0.85 <tibble [4 × 8]>
## 10 0.05 0.95 <tibble [4 × 8]>
## # … with 111 more rows
data2 %>%unnest()
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## # A tibble: 484 × 10
## var1 var2 term estimate std.error stati…¹ p.value flag p n
## <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <int>
## 1 0.05 0.05 (Intercept) -7.26 0.629 -11.5 8.82e-31 pois… 0.510 49
## 2 0.05 0.05 cave2 0.0193 0.00877 2.21 2.75e- 2 pois… 0.510 49
## 3 0.05 0.05 (Intercept) -2.32 0.991 -2.34 1.90e- 2 logi… 0.510 49
## 4 0.05 0.05 cave2 0.0399 0.0159 2.50 1.23e- 2 logi… 0.510 49
## 5 0.05 0.15 (Intercept) -7.29 0.426 -17.1 8.80e-66 pois… 0.258 97
## 6 0.05 0.15 cave2 0.0125 0.00566 2.22 2.65e- 2 pois… 0.258 97
## 7 0.05 0.15 (Intercept) -2.08 0.539 -3.86 1.15e- 4 logi… 0.258 97
## 8 0.05 0.15 cave2 0.0176 0.00804 2.19 2.83e- 2 logi… 0.258 97
## 9 0.05 0.25 (Intercept) -7.64 0.434 -17.6 2.06e-69 pois… 0.172 145
## 10 0.05 0.25 cave2 0.0123 0.00668 1.85 6.49e- 2 pois… 0.172 145
## # … with 474 more rows, and abbreviated variable name ¹statistic
a<-0.1
forplot <- data2 %>% unnest()
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(varr)`
p1 <- forplot %>%
ggplot(aes(x = p ,y = estimate, col = flag %>% as.factor)) +
geom_point(alpha=a) +
geom_line(alpha=a) +
#geom_smooth(method = "lm", se = T) +
#geom_col(position = position_dodge(width = .25), alpha = .5) +
#scale_y_log10()+
facet_wrap(~ term, scales = "free") +
theme(legend.position="top")+
geom_smooth()+
labs(x="event rate(%)")+
geom_vline(xintercept =raw %>% summarise(m=dv %>% mean(na.rm=T)) %>% pull(m), linetype="dotted")
forplot <-data2 %>%
unnest() %>%
clean_names()
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(varr)`
p2 <- forplot%>%
ggplot(aes(x = p ,y = p_value, col = flag %>% as.factor)) +
geom_point(alpha=a) +
geom_line(alpha=a) +
#geom_smooth(method = "lm", se = T) +
#geom_col(position = position_dodge(width = .25), alpha = .5) +
#scale_y_log10()+
facet_wrap(~ term, scales = "free") +
theme(legend.position="top")+
geom_smooth()+
labs(x="event rate(%)")+
geom_vline(xintercept =raw %>% summarise(m=dv %>% mean(na.rm=T)) %>% pull(m), linetype="dotted")+
geom_hline(yintercept = 0.05, col="blue", linetype="dotted")
p3 <- forplot%>%
mutate(rse=std_error/estimate) %>%
ggplot(aes(x = p ,y = rse, col = flag %>% as.factor)) +
geom_point(alpha=a) +
geom_line(alpha=a) +
#geom_smooth(method = "lm", se = T) +
#geom_col(position = position_dodge(width = .25), alpha = .5) +
#scale_y_log10()+
facet_wrap(~ term, scales = "free") +
theme(legend.position="top")+
geom_smooth()+
labs(x="event rate(%)")+
geom_vline(xintercept =raw %>% summarise(m=dv %>% mean(na.rm=T)) %>% pull(m), linetype="dotted")
p1 %>% ggplotly
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
p2 %>% ggplotly
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
p3 %>% ggplotly
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
pull different ratio
id_p_0 <- raw %>%filter(dv==1) %>% pull(id) %>% unique ###positive id 504
id_n_0 <- raw %>%filter(dv==0) %>% pull(id) %>% unique ### negative id 482
per6 <- c(seq(0.05, 1, 0.2), 1)
data3 <- tibble(crossing(var1 = per6, var2 = per6)) %>%
mutate(results = map2(var1, var2, ~{
per1 <- .x
per2 <- .y
id_p <- sample(id_p_0, length(id_p_0) * per1)
id_n <- sample(id_n_0, length(id_n_0) * per2)
raww <- cleaned_raw %>%
filter(id %in% c(id_p, id_n)) %>%
mutate(cave2 = as.numeric(cave2))
quantile_cutoffs <- quantile(raww$cave2, probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
raww <- raww %>%
mutate(cave2squart = cut(
cave2,
breaks = c(-Inf, quantile_cutoffs, Inf),
labels = c("1Q", "2Q", "3Q", "4Q")
))
p <- raww %>% summarise(mean_dv = mean(dv, na.rm = TRUE)) %>% pull(mean_dv)
n <- nrow(raww)
POI <- glm(dv ~ cave2 + offset(log(time)), family = poisson(link = "log"), data = raww)
LOG <- glm(dv ~ cave2, family = binomial(link = "logit"), data = raww)
POI_summary <- tidy(POI) %>% mutate(model = "Poisson", dataset_ratio = paste0("p:", round(per1, 2), ", n:", round(per2, 2)))
LOG_summary <- tidy(LOG) %>% mutate(model = "Logistic", dataset_ratio = paste0("p:", round(per1, 2), ", n:", round(per2, 2)))
logistic_p_value <- LOG_summary %>% filter(term == "cave2") %>% pull(p.value)
poisson_p_value <- POI_summary %>% filter(term == "cave2") %>% pull(p.value)
logistic_p_value_int <- LOG_summary %>% filter(term == "(Intercept)") %>% pull(p.value)
poisson_p_value_int <- POI_summary %>% filter(term == "(Intercept)") %>% pull(p.value)
combined_summary <- bind_rows(POI_summary, LOG_summary)
sm_cave <- raww %>%
group_by(cave2squart) %>%
dplyr::summarise(
mc = median(cave2),
n = n(),
rsp = sum(dv),
rate = round(rsp / n, 2),
lo = ifelse(rsp > 0 & rsp < n, round(prop.test(rsp, n)$conf.int[1], 4), NA),
hi = ifelse(rsp > 0 & rsp < n, round(prop.test(rsp, n)$conf.int[2], 4), NA)
)
logistic_eq <- paste0("Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * cave2, p_slope = ", format(signif(logistic_p_value, 3), scientific = TRUE), ", p_intercept = ", format(signif(logistic_p_value_int, 3), scientific = TRUE), ", AIC=", round(AIC(LOG),2))
poisson_eq <- paste0("Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * cave2 + log(time), p_slope = ", format(signif(poisson_p_value, 3), scientific = TRUE), ", p_intercept = ", format(signif(poisson_p_value_int, 3), scientific = TRUE),", AIC=", round(AIC(POI),2))
plot <- ggplot(raww, aes(x = cave2, y = dv)) +
scale_x_continuous() +
geom_point(color = "blue") +
theme_bw() +
stat_smooth(method = glm, method.args = list(family = "binomial"),
aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +
stat_smooth(method = glm, method.args = list(family = "poisson"),
aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
xlab("cave2") + ylab("Rate of DV (AE2)") +
labs(title = paste0("%positive=", round(per1, 2), ", %negative=", round(per2, 2), ", event rate (%)=", round(p, 3)),
color = "Model", linetype = "Model") +
theme(legend.position = "top") +
annotate("text", x = 0.1, y = 0.15, label = logistic_eq, size = 3, hjust = 0) +
annotate("text", x = 0.1, y = 0.08, label = poisson_eq, size = 3, hjust = 0)
list(raww = raww, plot = plot, summary = combined_summary)
}))
plots <- map(data3$results, "plot") %>% discard(is.null)
for (i in seq_along(plots)) {
print(plots[[i]])
}
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
pull different ratio, more cutoff
id_p_0 <- raw %>%filter(dv==1) %>% pull(id) %>% unique ###positive id 504
id_n_0 <- raw %>%filter(dv==0) %>% pull(id) %>% unique ### negative id 482
per6 <- c(seq(0.05, 1, 0.2), 1)
data3 <- tibble(crossing(var1 = per6, var2 = per6)) %>%
mutate(results = map2(var1, var2, ~{
per1 <- .x
per2 <- .y
id_p <- sample(id_p_0, length(id_p_0) * per1)
id_n <- sample(id_n_0, length(id_n_0) * per2)
raww <- cleaned_raw %>%
filter(id %in% c(id_p, id_n)) %>%
mutate(cave2 = as.numeric(cave2))
quantile_cutoffs <- quantile(raww$cave2, probs = c(0.2, 0.4, 0.6, 0.8, 0.9, 0.93, 0.96, 0.99, 0.995), na.rm = TRUE)
raww <- raww %>%
mutate(cave2squart = cut(
cave2,
breaks = c(-Inf, quantile_cutoffs, Inf),
labels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")
))
p <- raww %>% summarise(mean_dv = mean(dv, na.rm = TRUE)) %>% pull(mean_dv)
n <- nrow(raww)
POI <- glm(dv ~ cave2 + offset(log(time)), family = poisson(link = "log"), data = raww)
LOG <- glm(dv ~ cave2, family = binomial(link = "logit"), data = raww)
POI_summary <- tidy(POI) %>% mutate(model = "Poisson", dataset_ratio = paste0("p:", round(per1, 2), ", n:", round(per2, 2)))
LOG_summary <- tidy(LOG) %>% mutate(model = "Logistic", dataset_ratio = paste0("p:", round(per1, 2), ", n:", round(per2, 2)))
logistic_p_value <- LOG_summary %>% filter(term == "cave2") %>% pull(p.value)
poisson_p_value <- POI_summary %>% filter(term == "cave2") %>% pull(p.value)
logistic_p_value_int <- LOG_summary %>% filter(term == "(Intercept)") %>% pull(p.value)
poisson_p_value_int <- POI_summary %>% filter(term == "(Intercept)") %>% pull(p.value)
combined_summary <- bind_rows(POI_summary, LOG_summary)
sm_cave <- raww %>%
group_by(cave2squart) %>%
dplyr::summarise(
mc = median(cave2),
n = n(),
rsp = sum(dv),
rate = round(rsp / n, 2),
lo = ifelse(rsp > 0 & rsp < n, round(prop.test(rsp, n)$conf.int[1], 4), NA),
hi = ifelse(rsp > 0 & rsp < n, round(prop.test(rsp, n)$conf.int[2], 4), NA)
)
logistic_eq <- paste0("Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * cave2, p_slope = ", format(signif(logistic_p_value, 3), scientific = TRUE), ", p_intercept = ", format(signif(logistic_p_value_int, 3), scientific = TRUE), ", AIC=", round(AIC(LOG),2))
poisson_eq <- paste0("Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * cave2 + log(time), p_slope = ", format(signif(poisson_p_value, 3), scientific = TRUE), ", p_intercept = ", format(signif(poisson_p_value_int, 3), scientific = TRUE), ", AIC=", round(AIC(POI),2))
plot <- ggplot(raww, aes(x = cave2, y = dv)) +
scale_x_continuous() +
geom_point(color = "blue") +
theme_bw() +
stat_smooth(method = glm, method.args = list(family = "binomial"),
aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +
stat_smooth(method = glm, method.args = list(family = "poisson"),
aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
xlab("cave2") + ylab("Rate of DV (AE2)") +
labs(title = paste0("%positive=", round(per1, 2), ", %negative=", round(per2, 2), ", event rate (%)=", round(p, 3)),
color = "Model", linetype = "Model") +
theme(legend.position = "top") +
annotate("text", x = 0.1, y = 0.15, label = logistic_eq, size = 3, hjust = 0) +
annotate("text", x = 0.1, y = 0.08, label = poisson_eq, size = 3, hjust = 0)
list(raww = raww, plot = plot, summary = combined_summary)
}))
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
## Warning in prop.test(rsp, n): Chi-squared approximation may be incorrect
plots <- map(data3$results, "plot") %>% discard(is.null)
for (i in seq_along(plots)) {
print(plots[[i]])
}
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
per6 <- c(seq(0.05, 1, 0.2), 1)
data4 <- tibble(crossing(var1 = per6, var2 = per6)) %>%
mutate(results = map2(var1, var2, ~{
per1 <- .x
per2 <- .y
id_p <- sample(id_p_0, length(id_p_0) * per1)
id_n <- sample(id_n_0, length(id_n_0) * per2)
raww <- cleaned_raw %>%
filter(id %in% c(id_p, id_n)) %>%
mutate(cave2 = as.numeric(cave2)) # Ensure cave2 is numeric
if (nrow(raww) < 10 || !("dv" %in% colnames(raww))) {
return(list(raww = NULL, plot = NULL, summary = NULL))
}
quantile_cutoffs <- quantile(raww$cave2, probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
raww <- raww %>%
mutate(cave2squart = cut(
cave2,
breaks = c(-Inf, quantile_cutoffs, Inf),
labels = c("1Q", "2Q", "3Q", "4Q")
))
POI <- glm(dv ~ cave2 + offset(log(time)), family = poisson(link = "log"), data = raww)
LOG <- glm(dv ~ cave2, family = binomial(link = "logit"), data = raww)
list(raww = raww, plot = NULL, summary = NULL)
}))
plot_logistic <- function(results) {
combined_data <- map_dfr(seq_along(results), function(i) {
data <- results[[i]]
if (!is.null(data) && is.data.frame(data)) {
logistic_model <- glm(dv ~ cave2, family = binomial, data = data)
logistic_p_value <- summary(logistic_model)$coefficients[2, 4]
event_rate <- mean(data$dv, na.rm = TRUE)
logistic_label <- paste0("Logistic (Rate: ", round(event_rate, 2), ", p: ", formatC(logistic_p_value, format = "e", digits = 2), ")")
data %>%
mutate(label = logistic_label)
} else {
NULL
}
})
ggplot(combined_data, aes(x = cave2, y = dv)) +
theme_bw() +
stat_smooth(aes(color = label, linetype = label),
method = glm, method.args = list(family = "binomial"), se = FALSE) +
xlab("cave2") + ylab("Rate of DV (AE2)") +
labs(title = "Logistic Model Fits", color = "Model", linetype = "Model") +
theme(legend.position = "top")
}
plot_poisson <- function(results) {
combined_data <- map_dfr(seq_along(results), function(i) {
data <- results[[i]]
poisson_model <- glm(dv ~ cave2 + offset(log(time)), family = poisson, data = data)
poisson_p_value <- summary(poisson_model)$coefficients[2, 4]
event_rate <- mean(data$dv, na.rm = TRUE)
poisson_label <- paste0("Poisson (Rate: ", round(event_rate, 2), ", p: ", formatC(poisson_p_value, format = "e", digits = 2), ")")
data %>%
mutate(label = poisson_label)
})
ggplot(combined_data, aes(x = cave2, y = dv)) +
theme_bw() +
stat_smooth(aes(color = label, linetype = label),
method = glm, method.args = list(family = "poisson"), se = FALSE) +
xlab("cave2") + ylab("Rate of DV (AE2)") +
labs(title = "Poisson Model Fits", color = "Model", linetype = "Model") +
theme(legend.position = "top")
}
logistic_results <- map(data4$results, ~ if (!is.null(.x$raww)) .x$raww)
poisson_results <- map(data4$results, ~ if (!is.null(.x$raww)) .x$raww)
print(plot_logistic(logistic_results))
## `geom_smooth()` using formula 'y ~ x'
print(plot_poisson(poisson_results))
## `geom_smooth()` using formula 'y ~ x'
logistic_summaries <- data3 %>%
mutate(model_results = map(results, "summary")) %>%
unnest(cols = c(model_results)) %>%
filter(model == "Logistic" & term %in% c("(Intercept)", "cave2"))
poisson_summaries <- data3 %>%
mutate(model_results = map(results, "summary")) %>%
unnest(cols = c(model_results)) %>%
filter(model == "Poisson" & term %in% c("(Intercept)", "cave2"))
LOG_slope <- logistic_summaries %>%
filter(term == "cave2") %>%
mutate(slope = estimate, slope_p_value = p.value) %>%
select(var1, var2, results, slope_p_value, slope, model, dataset_ratio)
LOG_intercept <- logistic_summaries %>%
filter(term == "(Intercept)") %>%
mutate(intercept = estimate) %>%
select(var1, var2, results, intercept, model, dataset_ratio)
logistic_combined <- inner_join(LOG_slope, LOG_intercept,
by = c("var1", "var2", "results", "model", "dataset_ratio"))
time <- 300 # Replace with the appropriate offset value for your context
# Generate the dataset for POI_combined (Poisson model)
POI_slope <- poisson_summaries %>%
filter(term == "cave2") %>%
mutate(slope = estimate, slope_p_value = p.value) %>%
select(var1, var2, results, slope_p_value, slope, model, dataset_ratio)
POI_intercept <- poisson_summaries %>%
filter(term == "(Intercept)") %>%
mutate(intercept = estimate) %>%
select(var1, var2, results, intercept, model, dataset_ratio)
# Use inner_join to combine slope and intercept data
POI_combined <- inner_join(POI_slope, POI_intercept,
by = c("var1", "var2", "results", "model", "dataset_ratio"))
# Check the final structure of POI_combined
print(head(POI_combined))
## # A tibble: 6 × 8
## var1 var2 results slope_p_value slope model dataset_r…¹ inter…²
## <dbl> <dbl> <list> <dbl> <dbl> <chr> <chr> <dbl>
## 1 0.05 0.05 <named list [3]> 0.0898 0.00767 Poisson p:0.05, n:… -6.33
## 2 0.05 0.25 <named list [3]> 0.0462 0.0131 Poisson p:0.05, n:… -7.69
## 3 0.05 0.45 <named list [3]> 0.0527 0.0111 Poisson p:0.05, n:… -8.15
## 4 0.05 0.65 <named list [3]> 0.324 0.00695 Poisson p:0.05, n:… -8.18
## 5 0.05 0.85 <named list [3]> 0.00883 0.0163 Poisson p:0.05, n:… -8.99
## 6 0.05 1 <named list [3]> 0.0242 0.0141 Poisson p:0.05, n:1 -9.01
## # … with abbreviated variable names ¹dataset_ratio, ²intercept
# Create a sequence of cave2 values for prediction
cave2_seq <- seq(min(cleaned_raw$cave2, na.rm = TRUE), max(cleaned_raw$cave2, na.rm = TRUE), length.out = 100)
# Generate predictions for the logistic model
logistic_predictions <- logistic_combined %>%
mutate(
cave2_values = list(cave2_seq),
predicted = map2(slope, intercept, ~ {
if (is.na(.x) | is.na(.y)) return(rep(NA, length(cave2_seq)))
plogis(.y + .x * cave2_seq)
})
) %>%
unnest(c(cave2_values, predicted))
# Generate predictions for the Poisson model with the offset (log(time))
poisson_predictions <- POI_combined %>%
mutate(
cave2_values = list(cave2_seq),
predicted = map2(slope, intercept, ~ {
if (is.na(.x) | is.na(.y)) return(rep(NA, length(cave2_seq)))
exp(.y + .x * cave2_seq + log(time)) # Include the offset for time
})
) %>%
unnest(c(cave2_values, predicted))
# Plot for Logistic Model
logistic_plot <- ggplot(logistic_predictions, aes(x = cave2_values, y = predicted, color = dataset_ratio)) +
geom_line() +
labs(
title = "Logistic Model: Fitting Lines Across Different Datasets",
x = "cave2",
y = "Predicted Probability",
color = "Dataset Ratio"
) +
theme_minimal()
# Plot for Poisson Model
poisson_plot <- ggplot(poisson_predictions, aes(x = cave2_values, y = predicted, color = dataset_ratio)) +
geom_line() +
labs(
title = "Poisson Model: Fitting Lines Across Different Datasets (With Time Offset)",
x = "cave2",
y = "Predicted Rate",
color = "Dataset Ratio"
) +
theme_minimal()