This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(Rcpp)
library(htmltools)
library(knitr)
library(skimr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(ggplot2)
library(patchwork)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:purrr':
##
## compact
library(PKNCA)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(broom)
library(glue)
2
n<-100
n1<-50 %>% log
n2<-4000 %>% log
tibble(n=seq(n1, n2, length.out=5) %>% exp %>% round(0)) %>%
mutate(varr2=map(n,
.progress = T,
~{
.x->n
a0 <- -6.54
b0 <- 0.00195
conc0 <- 50
eta <- 0.6
b1<-b0/10
b2<-b0*25
b<-seq(b1, b2 , length.out=10)
tibble(b=b) %>%
mutate(varr=map(b,
# .progress = T,
~{
.x->b0
tibble(id = 1:n,
time = runif(n, 1, 400)) %>%
mutate(conc = rlnorm(n, conc0 %>% log, eta)) %>% suppressMessages()->temp
temp%>%
mutate(logna = a0 + log(time) + b0 * conc) %>%
mutate(na = logna %>% exp) %>%
mutate(dv = map_int(na,
~ {
.x -> na
as.integer(rpois(1, lambda = na))
})) %>%
mutate(dvv = ifelse(dv >= 1, 1, dv)) %>% suppressMessages() -> raw
raw %>%
transmute(id, time, conc, dv, dvv) %>% suppressMessages() -> raw0
raw0 %>%
summarise(p = dvv %>% mean) %>% pull(p) %>% suppressMessages()->prob
POI <- glm(dvv ~ conc + offset(log(time)),
family = poisson(link = "log"),
data = raw0)
LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw0)
POI %>% tidy() %>%
clean_names() %>%
mutate( rse=std_error/estimate*100, rse=rse %>% abs() %>% round(2)) %>%
mutate(flag="poisson") %>% full_join(
LOG %>% tidy() %>%
clean_names() %>%
mutate( rse=std_error/estimate*100, rse=rse %>% round(2)) %>%
mutate(flag="logistic")
) %>%
mutate(prob=prob) %>% suppressMessages()
}))->data4
data4 %>%
unnest() %>%
transmute(b, term, val=estimate, flag, prob) %>% suppressMessages() ->forplot
forplot %>%
filter(term=="conc") %>% suppressMessages() %>%
ggplot(aes(x = b ,y = val, col = flag %>% as.factor(), size=prob )) +
geom_point() +
labs(x="simulated", y="fitted", title="slope of drug effect", subtitle = glue("N={n}"))+
theme(legend.position="top")+
geom_abline()->p1
data4 %>%
unnest() %>%
transmute(b, term, val=p_value, flag, prob) %>% suppressMessages() ->forplot
forplot %>%
filter(term=="(Intercept)") %>% suppressMessages() %>%
ggplot(aes(x = b ,y = val, col = flag %>% as.factor(), size=prob )) +
geom_point() +
labs(x="simulated", y="fitted", title="slope of drug effect", subtitle = glue("N={n}"))+
theme(legend.position="top")+
geom_abline()->p4
forplot %>%
# filter(term=="conc") %>%
ggplot(aes(x = prob ,y = val, col = flag %>% as.factor())) +
geom_point(alpha=.5) +
geom_line()+
labs(x="event ocurrence rate", y="p_values", title="p_values", subtitle = glue("N={n}"))+
theme(legend.position="top")+
facet_wrap(~term, scales = "free")+
geom_hline(yintercept=0.05, linetype="dotted")->p2
data4 %>%
unnest() %>%
transmute(b, term, val=rse %>% abs, flag, prob) %>% suppressMessages() ->forplot
forplot %>%
# filter(term=="conc") %>%
ggplot(aes(x = prob ,y = val, col = flag %>% as.factor())) +
geom_point(alpha=.5) +
geom_line()+
labs(x="event ocurrence rate", y="RSE%", title="RSE%", subtitle = glue("N={n}"))+
theme(legend.position="top")+
facet_wrap(~term, scales = "free")+
scale_y_log10()->p3
tibble(data=list(data4), fig1=list(p1), fig2=list(p2), fig3=list(p3), fig4=list(p4))
})) %>% suppressMessages()->result
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## Warning in .f(.x[[i]], ...): NAs introduced by coercion to integer range
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
result %>% saveRDS("result.rds")
result<-readRDS("result.rds")
result %>%
unnest()->out4
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(varr2)`
out4 %>%
mutate(
fig2=map(fig2, ~{
.x+
labs(x="event ocurrence rate", y="p_values", title="p_values", subtitle = glue("N={n}"))
})) %>%
mutate(
fig3=map(fig3, ~{
.x+
labs(x="event ocurrence rate", y="RSE%", title="RSE%", subtitle = glue("N={n}"))
})
)%>%
mutate(
fig4=map(fig4, ~{
.x+
labs(x="simulated sigma", y="fitted intercept", title="intercept", subtitle = glue("N={n}"))
})
) ->out44
out44%>% pull(fig1) %>% walk(print) #slope
## Warning: Removed 2 rows containing missing values (geom_point).
out44 %>% pull(fig2) %>% walk(print) #p value
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 2 row(s) containing missing values (geom_path).
out44%>% pull(fig3) %>% walk(print) #RSE
## Warning: Removed 4 rows containing missing values (geom_point).
## Removed 2 row(s) containing missing values (geom_path).
out44%>% pull(fig4) %>% walk(print)
## Warning: Removed 2 rows containing missing values (geom_point).
b=0.05
n <- 1000
a0 <- -6.54
b0 <- 0.00195
b1<-b0/10
b2<-b0*25
b<-b0*25 #seq(b1, b2 , length.out=10)
conc0 <- 50
eta <- 0.6
set.seed(123)
raw0 <- tibble(
id = 1:n,
time = runif(n, 1, 400)
) %>%
mutate(conc = rlnorm(n, conc0 %>% log, eta))%>%
mutate(
logna = a0 + log(time) + b * conc,
na = exp(logna),
dv = map_int(na, ~ rpois(1, lambda = .)),
dvv = ifelse(dv >= 1, 1, dv)
)
prob <- raw0 %>%
summarise(p = mean(dvv)) %>%
pull(p)
POI <- glm(dvv ~ conc + offset(log(time)), family = poisson(link = "log"), data = raw0)
LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw0)
POI_summary <- summary(POI)
LOG_summary <- summary(LOG)
logistic_eq <- paste0(
"Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * conc, AIC=", round(AIC(LOG), 2)
)
poisson_eq <- paste0(
"Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * conc + log(time), AIC=", round(AIC(POI), 2)
)
raw0 <- raw0 %>%
mutate(conc_quart = cut(
conc,
breaks = quantile(conc, probs = c(0, 0.25, 0.5, 0.75, 1)),
labels = c("1Q", "2Q", "3Q", "4Q"),
include.lowest = TRUE
))
sm_cave <- ddply(raw0, c("conc_quart"), summarise,
mc = median(conc),
n = length(dvv),
rsp = sum(dvv),
rate = round(rsp / n, digits = 2),
lo = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[1], digits = 4),
NA),
hi = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[2], digits = 4),
NA)
)
ggplot(raw0, aes(x = conc, y = dvv)) +
geom_point(color = "blue") +
theme_bw() +
stat_smooth(method = glm, method.args = list(family = "binomial"),
aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +
stat_smooth(method = glm, method.args = list(family = "poisson"),
aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
xlab("Concentration (conc)") + ylab("Rate of DV (AE2)") +
labs(
title = glue("Comparison of Logistic and Poisson Fits (Event Occurrence Probability = {round(prob, 3)})"),
color = "Model", linetype = "Model"
) +
theme(legend.position = "top") +
ylim(0, 1.2) + # Set y-axis limits
annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 42 rows containing missing values (geom_smooth).
b=0.05 and mutate time accordingly
n <- 1000
a0 <- -6.54
b0 <- 0.00195
b1<-b0/10
b2<-b0*25
b<-b0*25 #seq(b1, b2 , length.out=10)
conc0 <- 50
eta <- 0.6
set.seed(123)
raw1 <- tibble(
id = 1:n,
time = runif(n, 1, 400)
) %>%
mutate(conc = rlnorm(n, log(conc0), eta)) %>%
mutate(
logna = a0 + log(time) + b * conc,
na = exp(logna),
dv = map_int(na, ~ rpois(1, lambda = .)), # Generate Poisson-distributed counts
dvv = ifelse(dv >= 1, 1, 0) # Convert to binary outcome
) %>%
mutate(
na_back_calculated = ifelse(dvv == 1, map_dbl(dvv, ~ ifelse(. > 0, ., NA)), NA),
# Back-calculate time using the relationship: logna = a0 + log(time) + b * conc
time_adjusted = ifelse(
dvv == 1,
exp((log(na_back_calculated) - (b * conc) - a0)), # Calculate time for dvv = 1
time # Keep the original time for dvv = 0
)
)
prob <- raw1 %>%
summarise(p = mean(dvv)) %>%
pull(p)
POI <- glm(dvv ~ conc + offset(log(time_adjusted)), family = poisson(link = "log"), data = raw1)
LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw1)
logistic_eq <- paste0(
"Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * conc, AIC=", round(AIC(LOG), 2)
)
poisson_eq <- paste0(
"Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * conc + log(time), AIC=", round(AIC(POI), 2)
)
# Generate predicted values
raw1 <- raw1 %>%
mutate(
pred_poi = predict(POI, type = "response"),
pred_log = predict(LOG, type = "response")
)
raw1 <- raw1 %>%
mutate(conc_quart = cut(
conc,
breaks = quantile(conc, probs = c(0, 0.25, 0.5, 0.75, 1)),
labels = c("1Q", "2Q", "3Q", "4Q"),
include.lowest = TRUE
))
sm_cave <- ddply(raw1, c("conc_quart"), summarise,
mc = median(conc),
n = length(dvv),
rsp = sum(dvv),
rate = round(rsp / n, digits = 2),
lo = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[1], digits = 4),
NA),
hi = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[2], digits = 4),
NA)
)
ggplot(raw1, aes(x = conc)) +
geom_point(aes(y = dvv), color = "blue", alpha = 0.5) + # Raw data points
geom_line(aes(y = pred_poi), color = "red", linetype = "dashed") + # Poisson fit line
geom_line(aes(y = pred_log), color = "green", linetype = "solid") + # Logistic fit line
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
labs(
y = "Probability p",
x = "conc",
title = "Fitted Lines from Poisson and Logistic Regression Models"
) +
theme_minimal()+
ylim(0, 1.2) +
annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)
## Warning: Removed 15 row(s) containing missing values (geom_path).
ggplot(raw1, aes(x = conc, y = dvv)) +
geom_point(color = "blue") +
theme_bw() +
stat_smooth(method = glm, method.args = list(family = "binomial"),
aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +
stat_smooth(method = glm, method.args = list(family = "poisson"),
aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
xlab("Concentration (conc)") + ylab("Rate of DV (AE2)") +
labs(
title = glue("Comparison of Logistic and Poisson Fits (Event Occurrence Probability = {round(prob, 3)})"),
color = "Model", linetype = "Model"
) +
theme(legend.position = "top") +
ylim(0, 1.2) +
annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 42 rows containing missing values (geom_smooth).
new b=0.05 and mutate time accordingly
n <- 1000
a0 <- -6.54
b0 <- 0.00195
b1<-b0/10
b2<-b0*25
b<-b0*25 #seq(b1, b2 , length.out=10)
conc0 <- 50
eta <- 0.6
set.seed(123)
raw1 <- tibble(
id = 1:n,
time = runif(n, 1, 400)
) %>%
mutate(conc = rlnorm(n, log(conc0), eta)) %>%
mutate(
logna = a0 + log(time) + b * conc,
na = exp(logna),
lambda = na, # Lambda is the expected rate from the Poisson model
p = 1 - exp(-lambda), # Calculate the probability of the event
dvv = rbinom(n, size = 1, prob = p) # Simulate binary outcome
)
prob <- raw1 %>%
summarise(p = mean(dvv)) %>%
pull(p)
POI <- glm(dvv ~ conc + offset(log(time)), family = poisson(link = "log"), data = raw1)
LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw1)
logistic_eq <- paste0(
"Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * conc, AIC=", round(AIC(LOG), 2)
)
poisson_eq <- paste0(
"Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * conc + log(time), AIC=", round(AIC(POI), 2)
)
# Generate predicted values
raw1 <- raw1 %>%
mutate(
pred_poi = predict(POI, type = "response"),
pred_log = predict(LOG, type = "response")
)
raw1 <- raw1 %>%
mutate(conc_quart = cut(
conc,
breaks = quantile(conc, probs = c(0, 0.25, 0.5, 0.75, 1)),
labels = c("1Q", "2Q", "3Q", "4Q"),
include.lowest = TRUE
))
sm_cave <- ddply(raw1, c("conc_quart"), summarise,
mc = median(conc),
n = length(dvv),
rsp = sum(dvv),
rate = round(rsp / n, digits = 2),
lo = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[1], digits = 4),
NA),
hi = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[2], digits = 4),
NA)
)
ggplot(raw1, aes(x = conc)) +
geom_point(aes(y = dvv), color = "blue", alpha = 0.5) + # Raw data points
geom_line(aes(y = pred_poi), color = "red", linetype = "dashed") + # Poisson fit line
geom_line(aes(y = pred_log), color = "green", linetype = "solid") + # Logistic fit line
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
labs(
y = "Probability p",
x = "conc",
title = "Fitted Lines from Poisson and Logistic Regression Models"
) +
theme_minimal()+
ylim(0, 1.2) +
annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)
## Warning: Removed 2 row(s) containing missing values (geom_path).
ggplot(raw1, aes(x = conc, y = dvv)) +
geom_point(color = "blue") +
theme_bw() +
stat_smooth(method = glm, method.args = list(family = "binomial"),
aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +
stat_smooth(method = glm, method.args = list(family = "poisson"),
aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
xlab("Concentration (conc)") + ylab("Rate of DV (AE2)") +
labs(
title = glue("Comparison of Logistic and Poisson Fits (Event Occurrence Probability = {round(prob, 3)})"),
color = "Model", linetype = "Model"
) +
theme(legend.position = "top") +
ylim(0, 1.2) +
annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 44 rows containing missing values (geom_smooth).
n <- 1000
a0 <- -6.54
b0 <- 0.00195
b1 <- b0 / 10
b2 <- b0 * 25
b_values <- seq(b1, b2, length.out = 10)
conc0 <- 50
eta <- 0.6
generate_plot_for_b <- function(b) {
set.seed(123)
raw0 <- tibble(
id = 1:n,
time = runif(n, 1, 400)
) %>%
mutate(conc = rlnorm(n, log(conc0), eta)) %>%
mutate(
logna = a0 + log(time) + b * conc,
na = exp(logna),
dv = map_int(na, ~ rpois(1, lambda = .)),
dvv = ifelse(dv >= 1, 1, dv)
)
prob <- raw0 %>%
summarise(p = mean(dvv)) %>%
pull(p)
POI <- glm(dvv ~ conc + offset(log(time)), family = poisson(link = "log"), data = raw0)
LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw0)
logistic_eq <- paste0(
"Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * conc, AIC=", round(AIC(LOG), 2)
)
poisson_eq <- paste0(
"Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * conc + log(time), AIC=", round(AIC(POI), 2)
)
raw0 <- raw0 %>%
mutate(conc_quart = cut(
conc,
breaks = quantile(conc, probs = c(0, 0.25, 0.5, 0.75, 1)),
labels = c("1Q", "2Q", "3Q", "4Q"),
include.lowest = TRUE
))
sm_cave <- ddply(raw0, c("conc_quart"), summarise,
mc = median(conc),
n = length(dvv),
rsp = sum(dvv),
rate = round(rsp / n, digits = 2),
lo = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[1], digits = 4),
NA),
hi = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[2], digits = 4),
NA)
)
plot <- ggplot(raw0, aes(x = conc, y = dvv)) +
geom_point(color = "blue") +
theme_bw() +
stat_smooth(method = glm, method.args = list(family = "binomial"),
aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +
stat_smooth(method = glm, method.args = list(family = "poisson"),
aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
xlab("Concentration (conc)") + ylab("Rate of DV (AE2)") +
labs(
title = "Comparison of Logistic and Poisson Fits",
subtitle = glue("b = {round(b, 6)}, Event Occurrence Probability = {round(prob, 3)}"),
color = "Model", linetype = "Model"
) +
theme(legend.position = "top") +
ylim(0, 1.2) + # Set y-axis limits
annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)
return(plot)
}
plots <- map(b_values, generate_plot_for_b)
walk(plots, print)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 21 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 31 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 38 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 40 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 41 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 42 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 43 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 42 rows containing missing values (geom_smooth).
if the same time
n <- 1000
a0 <- -6.54
b0 <- 0.00195
b1 <- b0 / 10
b2 <- b0 * 25
b_values <- seq(b1, b2, length.out = 10) # Sequence of 10 different b values
conc0 <- 50
eta <- 0.6
# Function to generate data, fit models, and create plots
generate_plot_for_b <- function(b) {
set.seed(123) # Ensure reproducibility for each b value
# Simulate the dataset
raw0 <- tibble(
id = 1:n,
time = 100
) %>%
mutate(conc = rlnorm(n, log(conc0), eta)) %>%
mutate(
logna = a0 + log(time) + b * conc,
na = exp(logna),
dv = map_int(na, ~ rpois(1, lambda = .)),
dvv = ifelse(dv >= 1, 1, dv)
) %>%
mutate(time=1)
# Calculate the event occurrence probability
prob <- raw0 %>%
summarise(p = mean(dvv)) %>%
pull(p)
# Fit Poisson and Logistic models
POI <- glm(dvv ~ conc + offset(log(time)), family = poisson(link = "log"), data = raw0)
LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw0)
# Generate model summaries
logistic_eq <- paste0(
"Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * conc, AIC=", round(AIC(LOG), 2)
)
poisson_eq <- paste0(
"Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * conc + log(time), AIC=", round(AIC(POI), 2)
)
# Define quartiles for conc
raw0 <- raw0 %>%
mutate(conc_quart = cut(
conc,
breaks = quantile(conc, probs = c(0, 0.25, 0.5, 0.75, 1)),
labels = c("1Q", "2Q", "3Q", "4Q"),
include.lowest = TRUE
))
sm_cave <- ddply(raw0, c("conc_quart"), summarise,
mc = median(conc),
n = length(dvv),
rsp = sum(dvv),
rate = round(rsp / n, digits = 2),
lo = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[1], digits = 4),
NA),
hi = ifelse(rsp > 0 & rsp < n,
round(prop.test(rsp, n)$conf.int[2], digits = 4),
NA)
)
plot <- ggplot(raw0, aes(x = conc, y = dvv)) +
geom_point(color = "blue") +
theme_bw() +
stat_smooth(method = glm, method.args = list(family = "binomial"),
aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +
stat_smooth(method = glm, method.args = list(family = "poisson"),
aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +
geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi),
width = 0.2, position = position_dodge(0.05)) +
geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
xlab("Concentration (conc)") + ylab("Rate of DV (AE2)") +
labs(
title = glue("Comparison of Logistic and Poisson Fits (b = {round(b, 6)}, Event Occurrence Probability = {round(prob, 3)})"),
color = "Model", linetype = "Model"
) +
theme(legend.position = "top") +
ylim(0, 1.2) + # Set y-axis limits
annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 4, hjust = 0) +
annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 4, hjust = 0)
return(plot)
}
plots <- map(b_values, generate_plot_for_b)
walk(plots, print)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 19 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 29 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 37 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 39 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 42 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 43 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 44 rows containing missing values (geom_smooth).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 44 rows containing missing values (geom_smooth).