Poisson ER

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(Rcpp)
library(htmltools)
library(knitr)
library(skimr)
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(ggplot2)
library(ggplot2)
library(patchwork)
library(plyr)

## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following object is masked from 'package:purrr':
## 
##     compact

library(PKNCA)
library(janitor)

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(broom)
library(glue)

n<-100

n1<-50 %>% log
n2<-4000 %>% log
tibble(n=seq(n1, n2, length.out=5) %>% exp %>% round(0)) %>% 
  mutate(varr2=map(n, 
                   .progress = T,
                   ~{
    .x->n
                     
a0 <- -6.54
b0 <- 0.00195

conc0 <- 50
eta <- 0.6

b1<-b0/10
b2<-b0*25
b<-seq(b1, b2 , length.out=10) 

tibble(b=b) %>%
  mutate(varr=map(b, 
                  # .progress = T,
                  ~{
    .x->b0
    
    
tibble(id = 1:n,
       time = runif(n, 1, 400)) %>%
  mutate(conc = rlnorm(n, conc0 %>% log, eta)) %>% suppressMessages()->temp

    
temp%>%
  mutate(logna = a0 + log(time) + b0 * conc) %>%
  mutate(na = logna %>% exp) %>%
  mutate(dv = map_int(na, 
                      ~ {
    .x -> na
   as.integer(rpois(1, lambda = na))
  })) %>%
  mutate(dvv = ifelse(dv >= 1, 1, dv)) %>% suppressMessages() -> raw

raw %>%
  transmute(id, time, conc, dv, dvv) %>% suppressMessages() -> raw0

raw0 %>%
  summarise(p = dvv %>% mean) %>% pull(p) %>% suppressMessages()->prob

POI <- glm(dvv ~ conc + offset(log(time)),
            family = poisson(link = "log"),
            data = raw0)

LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw0)

POI %>% tidy() %>% 
  clean_names() %>% 
  mutate( rse=std_error/estimate*100, rse=rse %>% abs() %>% round(2)) %>% 
  mutate(flag="poisson") %>% full_join(
    
    LOG %>% tidy() %>% 
  clean_names() %>% 
  mutate( rse=std_error/estimate*100, rse=rse %>% round(2)) %>% 
  mutate(flag="logistic")
  ) %>% 
  mutate(prob=prob) %>% suppressMessages()

    
  }))->data4



data4 %>% 
  unnest() %>% 
  transmute(b, term, val=estimate, flag, prob) %>% suppressMessages() ->forplot


forplot %>% 
  filter(term=="conc") %>% suppressMessages() %>% 
        ggplot(aes(x = b ,y = val, col = flag %>% as.factor(), size=prob )) +
    geom_point() +
    labs(x="simulated", y="fitted", title="slope of drug effect", subtitle = glue("N={n}"))+
        theme(legend.position="top")+
  geom_abline()->p1
    
    
 

data4 %>% 
  unnest() %>% 
  transmute(b, term, val=p_value, flag, prob) %>% suppressMessages() ->forplot

forplot %>% 
  filter(term=="(Intercept)") %>% suppressMessages() %>% 
        ggplot(aes(x = b ,y = val, col = flag %>% as.factor(), size=prob )) +
    geom_point() +
    labs(x="simulated", y="fitted", title="slope of drug effect", subtitle = glue("N={n}"))+
        theme(legend.position="top")+
  geom_abline()->p4


forplot %>% 
  # filter(term=="conc") %>% 
        ggplot(aes(x = prob ,y = val, col = flag %>% as.factor())) +
    geom_point(alpha=.5) +
  geom_line()+
    labs(x="event ocurrence rate", y="p_values", title="p_values", subtitle = glue("N={n}"))+
        theme(legend.position="top")+
  facet_wrap(~term, scales = "free")+
  geom_hline(yintercept=0.05, linetype="dotted")->p2
    
 

data4 %>% 
  unnest() %>% 
  transmute(b, term, val=rse %>% abs, flag, prob) %>% suppressMessages() ->forplot


forplot %>% 
  # filter(term=="conc") %>% 
        ggplot(aes(x = prob ,y = val, col = flag %>% as.factor())) +
    geom_point(alpha=.5) +
  geom_line()+
    labs(x="event ocurrence rate", y="RSE%", title="RSE%", subtitle = glue("N={n}"))+
        theme(legend.position="top")+
  facet_wrap(~term, scales = "free")+
  scale_y_log10()->p3
    

tibble(data=list(data4), fig1=list(p1), fig2=list(p2), fig3=list(p3), fig4=list(p4))

    
  })) %>% suppressMessages()->result

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`

## Warning in .f(.x[[i]], ...): NAs introduced by coercion to integer range

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`
## `cols` is now required when using unnest().
## Please use `cols = c(varr)`

 result %>% saveRDS("result.rds")
 
 result<-readRDS("result.rds")

result %>% 
  unnest()->out4

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(varr2)`

out4 %>% 
  mutate(
    fig2=map(fig2, ~{
    .x+
        labs(x="event ocurrence rate", y="p_values", title="p_values", subtitle = glue("N={n}"))

  })) %>% 
  mutate(
    fig3=map(fig3, ~{
    .x+
    labs(x="event ocurrence rate", y="RSE%", title="RSE%", subtitle = glue("N={n}"))

  })
  )%>% 
  mutate(
    fig4=map(fig4, ~{
    .x+
    labs(x="simulated sigma", y="fitted intercept", title="intercept", subtitle = glue("N={n}"))

  })
  ) ->out44



out44%>% pull(fig1) %>% walk(print)   #slope

## Warning: Removed 2 rows containing missing values (geom_point).

out44 %>% pull(fig2) %>% walk(print)  #p value

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 2 row(s) containing missing values (geom_path).

out44%>% pull(fig3) %>% walk(print)    #RSE

## Warning: Removed 4 rows containing missing values (geom_point).
## Removed 2 row(s) containing missing values (geom_path).

out44%>% pull(fig4) %>% walk(print)

## Warning: Removed 2 rows containing missing values (geom_point).

b=0.05

n <- 1000
a0 <- -6.54
b0 <- 0.00195 
b1<-b0/10
b2<-b0*25
b<-b0*25    #seq(b1, b2 , length.out=10) 
conc0 <- 50
eta <- 0.6

set.seed(123) 

raw0 <- tibble(
  id = 1:n,
  time = runif(n, 1, 400)
) %>%
  mutate(conc = rlnorm(n, conc0 %>% log, eta))%>%
  mutate(
    logna = a0 + log(time) + b * conc,
    na = exp(logna),
    dv = map_int(na, ~ rpois(1, lambda = .)),
    dvv = ifelse(dv >= 1, 1, dv)
  )

prob <- raw0 %>%
  summarise(p = mean(dvv)) %>%
  pull(p)

POI <- glm(dvv ~ conc + offset(log(time)), family = poisson(link = "log"), data = raw0)
LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw0)

POI_summary <- summary(POI)
LOG_summary <- summary(LOG)

logistic_eq <- paste0(
  "Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * conc, AIC=", round(AIC(LOG), 2)
)
poisson_eq <- paste0(
  "Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * conc + log(time), AIC=", round(AIC(POI), 2)
)

raw0 <- raw0 %>%
  mutate(conc_quart = cut(
    conc,
    breaks = quantile(conc, probs = c(0, 0.25, 0.5, 0.75, 1)),
    labels = c("1Q", "2Q", "3Q", "4Q"),
    include.lowest = TRUE
  ))

sm_cave <- ddply(raw0, c("conc_quart"), summarise,
                 mc = median(conc),
                 n = length(dvv),
                 rsp = sum(dvv),
                 rate = round(rsp / n, digits = 2),
                 lo = ifelse(rsp > 0 & rsp < n, 
                             round(prop.test(rsp, n)$conf.int[1], digits = 4), 
                             NA),
                 hi = ifelse(rsp > 0 & rsp < n, 
                             round(prop.test(rsp, n)$conf.int[2], digits = 4), 
                             NA)
)

ggplot(raw0, aes(x = conc, y = dvv)) + 
  geom_point(color = "blue") + 
  theme_bw() +
  stat_smooth(method = glm, method.args = list(family = "binomial"), 
              aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +  
  stat_smooth(method = glm, method.args = list(family = "poisson"), 
              aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +  
  geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi), 
                width = 0.2, position = position_dodge(0.05)) +
  geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
  xlab("Concentration (conc)") + ylab("Rate of DV (AE2)") +
  labs(
    title = glue("Comparison of Logistic and Poisson Fits (Event Occurrence Probability = {round(prob, 3)})"), 
    color = "Model", linetype = "Model"
  ) +  
  theme(legend.position = "top") +
  ylim(0, 1.2) +  # Set y-axis limits
  annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
  annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 42 rows containing missing values (geom_smooth).

b=0.05 and mutate time accordingly

n <- 1000
a0 <- -6.54
b0 <- 0.00195 
b1<-b0/10
b2<-b0*25
b<-b0*25    #seq(b1, b2 , length.out=10) 
conc0 <- 50
eta <- 0.6

set.seed(123) 

raw1 <- tibble(
  id = 1:n,
  time = runif(n, 1, 400)
) %>%
  mutate(conc = rlnorm(n, log(conc0), eta)) %>%
  mutate(
    logna = a0 + log(time) + b * conc,
    na = exp(logna),
    dv = map_int(na, ~ rpois(1, lambda = .)),  # Generate Poisson-distributed counts
    dvv = ifelse(dv >= 1, 1, 0)  # Convert to binary outcome
  ) %>%
  mutate(
    na_back_calculated = ifelse(dvv == 1, map_dbl(dvv, ~ ifelse(. > 0, ., NA)), NA),
    # Back-calculate time using the relationship: logna = a0 + log(time) + b * conc
     time_adjusted = ifelse(
        dvv == 1,
        exp((log(na_back_calculated) - (b * conc) - a0)),  # Calculate time for dvv = 1
        time  # Keep the original time for dvv = 0
      )
  )




prob <- raw1 %>%
  summarise(p = mean(dvv)) %>%
  pull(p)


POI <- glm(dvv ~ conc + offset(log(time_adjusted)), family = poisson(link = "log"), data = raw1)
LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw1)

logistic_eq <- paste0(
  "Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * conc, AIC=", round(AIC(LOG), 2)
)
poisson_eq <- paste0(
  "Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * conc + log(time), AIC=", round(AIC(POI), 2)
)


# Generate predicted values
raw1 <- raw1 %>%
  mutate(
    pred_poi = predict(POI, type = "response"),
    pred_log = predict(LOG, type = "response")
  )


raw1 <- raw1 %>%
  mutate(conc_quart = cut(
    conc,
    breaks = quantile(conc, probs = c(0, 0.25, 0.5, 0.75, 1)),
    labels = c("1Q", "2Q", "3Q", "4Q"),
    include.lowest = TRUE
  ))

sm_cave <- ddply(raw1, c("conc_quart"), summarise,
                 mc = median(conc),
                 n = length(dvv),
                 rsp = sum(dvv),
                 rate = round(rsp / n, digits = 2),
                 lo = ifelse(rsp > 0 & rsp < n, 
                             round(prop.test(rsp, n)$conf.int[1], digits = 4), 
                             NA),
                 hi = ifelse(rsp > 0 & rsp < n, 
                             round(prop.test(rsp, n)$conf.int[2], digits = 4), 
                             NA)
)



ggplot(raw1, aes(x = conc)) +
  geom_point(aes(y = dvv), color = "blue", alpha = 0.5) +  # Raw data points
  geom_line(aes(y = pred_poi), color = "red", linetype = "dashed") +  # Poisson fit line
  geom_line(aes(y = pred_log), color = "green", linetype = "solid") +  # Logistic fit line
  geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi), 
                width = 0.2, position = position_dodge(0.05)) +
  geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
  labs(
    y = "Probability p",
    x = "conc",
    title = "Fitted Lines from Poisson and Logistic Regression Models"
  ) +
  theme_minimal()+
  ylim(0, 1.2) + 
  annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
  annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)

## Warning: Removed 15 row(s) containing missing values (geom_path).

ggplot(raw1, aes(x = conc, y = dvv)) + 
  geom_point(color = "blue") + 
  theme_bw() +
  stat_smooth(method = glm, method.args = list(family = "binomial"), 
              aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +  
  stat_smooth(method = glm, method.args = list(family = "poisson"), 
              aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +  
  geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi), 
                width = 0.2, position = position_dodge(0.05)) +
  geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
  xlab("Concentration (conc)") + ylab("Rate of DV (AE2)") +
  labs(
    title = glue("Comparison of Logistic and Poisson Fits (Event Occurrence Probability = {round(prob, 3)})"), 
    color = "Model", linetype = "Model"
  ) +  
  theme(legend.position = "top") +
  ylim(0, 1.2) + 
  annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
  annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 42 rows containing missing values (geom_smooth).

new b=0.05 and mutate time accordingly

n <- 1000
a0 <- -6.54
b0 <- 0.00195 
b1<-b0/10
b2<-b0*25
b<-b0*25    #seq(b1, b2 , length.out=10) 
conc0 <- 50
eta <- 0.6

set.seed(123) 

raw1 <- tibble(
  id = 1:n,
  time = runif(n, 1, 400)
) %>%
  mutate(conc = rlnorm(n, log(conc0), eta)) %>%
  mutate(
    logna = a0 + log(time) + b * conc,
    na = exp(logna),
    lambda = na,  # Lambda is the expected rate from the Poisson model
    p = 1 - exp(-lambda),  # Calculate the probability of the event
    dvv = rbinom(n, size = 1, prob = p)  # Simulate binary outcome
  )





prob <- raw1 %>%
  summarise(p = mean(dvv)) %>%
  pull(p)


POI <- glm(dvv ~ conc + offset(log(time)), family = poisson(link = "log"), data = raw1)
LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw1)

logistic_eq <- paste0(
  "Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * conc, AIC=", round(AIC(LOG), 2)
)
poisson_eq <- paste0(
  "Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * conc + log(time), AIC=", round(AIC(POI), 2)
)


# Generate predicted values
raw1 <- raw1 %>%
  mutate(
    pred_poi = predict(POI, type = "response"),
    pred_log = predict(LOG, type = "response")
  )


raw1 <- raw1 %>%
  mutate(conc_quart = cut(
    conc,
    breaks = quantile(conc, probs = c(0, 0.25, 0.5, 0.75, 1)),
    labels = c("1Q", "2Q", "3Q", "4Q"),
    include.lowest = TRUE
  ))

sm_cave <- ddply(raw1, c("conc_quart"), summarise,
                 mc = median(conc),
                 n = length(dvv),
                 rsp = sum(dvv),
                 rate = round(rsp / n, digits = 2),
                 lo = ifelse(rsp > 0 & rsp < n, 
                             round(prop.test(rsp, n)$conf.int[1], digits = 4), 
                             NA),
                 hi = ifelse(rsp > 0 & rsp < n, 
                             round(prop.test(rsp, n)$conf.int[2], digits = 4), 
                             NA)
)



ggplot(raw1, aes(x = conc)) +
  geom_point(aes(y = dvv), color = "blue", alpha = 0.5) +  # Raw data points
  geom_line(aes(y = pred_poi), color = "red", linetype = "dashed") +  # Poisson fit line
  geom_line(aes(y = pred_log), color = "green", linetype = "solid") +  # Logistic fit line
  geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi), 
                width = 0.2, position = position_dodge(0.05)) +
  geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
  labs(
    y = "Probability p",
    x = "conc",
    title = "Fitted Lines from Poisson and Logistic Regression Models"
  ) +
  theme_minimal()+
  ylim(0, 1.2) + 
  annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
  annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)

## Warning: Removed 2 row(s) containing missing values (geom_path).

ggplot(raw1, aes(x = conc, y = dvv)) + 
  geom_point(color = "blue") + 
  theme_bw() +
  stat_smooth(method = glm, method.args = list(family = "binomial"), 
              aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +  
  stat_smooth(method = glm, method.args = list(family = "poisson"), 
              aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +  
  geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi), 
                width = 0.2, position = position_dodge(0.05)) +
  geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
  xlab("Concentration (conc)") + ylab("Rate of DV (AE2)") +
  labs(
    title = glue("Comparison of Logistic and Poisson Fits (Event Occurrence Probability = {round(prob, 3)})"), 
    color = "Model", linetype = "Model"
  ) +  
  theme(legend.position = "top") +
  ylim(0, 1.2) + 
  annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
  annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 44 rows containing missing values (geom_smooth).

n <- 1000
a0 <- -6.54
b0 <- 0.00195 
b1 <- b0 / 10
b2 <- b0 * 25
b_values <- seq(b1, b2, length.out = 10) 
conc0 <- 50
eta <- 0.6


generate_plot_for_b <- function(b) {
  set.seed(123) 
  
  raw0 <- tibble(
    id = 1:n,
    time = runif(n, 1, 400)
  ) %>%
    mutate(conc = rlnorm(n, log(conc0), eta)) %>%
    mutate(
      logna = a0 + log(time) + b * conc,
      na = exp(logna),
      dv = map_int(na, ~ rpois(1, lambda = .)),
      dvv = ifelse(dv >= 1, 1, dv)
    )
  
  prob <- raw0 %>%
    summarise(p = mean(dvv)) %>%
    pull(p)
  
  POI <- glm(dvv ~ conc + offset(log(time)), family = poisson(link = "log"), data = raw0)
  LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw0)
  
  logistic_eq <- paste0(
    "Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * conc, AIC=", round(AIC(LOG), 2)
  )
  poisson_eq <- paste0(
    "Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * conc + log(time), AIC=", round(AIC(POI), 2)
  )
  
  raw0 <- raw0 %>%
    mutate(conc_quart = cut(
      conc,
      breaks = quantile(conc, probs = c(0, 0.25, 0.5, 0.75, 1)),
      labels = c("1Q", "2Q", "3Q", "4Q"),
      include.lowest = TRUE
    ))
  
  sm_cave <- ddply(raw0, c("conc_quart"), summarise,
                   mc = median(conc),
                   n = length(dvv),
                   rsp = sum(dvv),
                   rate = round(rsp / n, digits = 2),
                   lo = ifelse(rsp > 0 & rsp < n, 
                               round(prop.test(rsp, n)$conf.int[1], digits = 4), 
                               NA),
                   hi = ifelse(rsp > 0 & rsp < n, 
                               round(prop.test(rsp, n)$conf.int[2], digits = 4), 
                               NA)
  )
  
  plot <- ggplot(raw0, aes(x = conc, y = dvv)) + 
    geom_point(color = "blue") + 
    theme_bw() +
    stat_smooth(method = glm, method.args = list(family = "binomial"), 
                aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +  
    stat_smooth(method = glm, method.args = list(family = "poisson"), 
                aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +  
    geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi), 
                  width = 0.2, position = position_dodge(0.05)) +
    geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
    xlab("Concentration (conc)") + ylab("Rate of DV (AE2)") +
    labs(
    title = "Comparison of Logistic and Poisson Fits",
    subtitle = glue("b = {round(b, 6)}, Event Occurrence Probability = {round(prob, 3)}"),
    color = "Model", linetype = "Model"
  ) + 
    theme(legend.position = "top") +
    ylim(0, 1.2) +  # Set y-axis limits
    annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 3, hjust = 0) +
    annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 3, hjust = 0)
  
  return(plot)
}

plots <- map(b_values, generate_plot_for_b)

walk(plots, print)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 21 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 31 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 38 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 40 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 41 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 42 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 43 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 42 rows containing missing values (geom_smooth).

if the same time

n <- 1000
a0 <- -6.54
b0 <- 0.00195 
b1 <- b0 / 10
b2 <- b0 * 25
b_values <- seq(b1, b2, length.out = 10)  # Sequence of 10 different b values
conc0 <- 50
eta <- 0.6

# Function to generate data, fit models, and create plots
generate_plot_for_b <- function(b) {
  set.seed(123) # Ensure reproducibility for each b value
  
  # Simulate the dataset
  raw0 <- tibble(
    id = 1:n,
    time = 100
  ) %>%
    mutate(conc = rlnorm(n, log(conc0), eta)) %>%
    mutate(
      logna = a0 + log(time) + b * conc,
      na = exp(logna),
      dv = map_int(na, ~ rpois(1, lambda = .)),
      dvv = ifelse(dv >= 1, 1, dv)
    ) %>%
    mutate(time=1)
  
  # Calculate the event occurrence probability
  prob <- raw0 %>%
    summarise(p = mean(dvv)) %>%
    pull(p)
  
  # Fit Poisson and Logistic models
  POI <- glm(dvv ~ conc + offset(log(time)), family = poisson(link = "log"), data = raw0)
  LOG <- glm(dvv ~ conc, family = binomial(link = "logit"), data = raw0)
  
  # Generate model summaries
  logistic_eq <- paste0(
    "Logistic: logit(p) = ", round(coef(LOG)[1], 3), " + ", round(coef(LOG)[2], 3), " * conc, AIC=", round(AIC(LOG), 2)
  )
  poisson_eq <- paste0(
    "Poisson: log(λ) = ", round(coef(POI)[1], 3), " + ", round(coef(POI)[2], 3), " * conc + log(time), AIC=", round(AIC(POI), 2)
  )
  
  # Define quartiles for conc
  raw0 <- raw0 %>%
    mutate(conc_quart = cut(
      conc,
      breaks = quantile(conc, probs = c(0, 0.25, 0.5, 0.75, 1)),
      labels = c("1Q", "2Q", "3Q", "4Q"),
      include.lowest = TRUE
    ))
  
  sm_cave <- ddply(raw0, c("conc_quart"), summarise,
                   mc = median(conc),
                   n = length(dvv),
                   rsp = sum(dvv),
                   rate = round(rsp / n, digits = 2),
                   lo = ifelse(rsp > 0 & rsp < n, 
                               round(prop.test(rsp, n)$conf.int[1], digits = 4), 
                               NA),
                   hi = ifelse(rsp > 0 & rsp < n, 
                               round(prop.test(rsp, n)$conf.int[2], digits = 4), 
                               NA)
  )
  
  plot <- ggplot(raw0, aes(x = conc, y = dvv)) + 
    geom_point(color = "blue") + 
    theme_bw() +
    stat_smooth(method = glm, method.args = list(family = "binomial"), 
                aes(color = "Logistic", linetype = "Logistic"), se = FALSE) +  
    stat_smooth(method = glm, method.args = list(family = "poisson"), 
                aes(color = "Poisson", linetype = "Poisson"), se = FALSE) +  
    geom_errorbar(data = sm_cave, aes(x = mc, y = rate, ymin = lo, ymax = hi), 
                  width = 0.2, position = position_dodge(0.05)) +
    geom_point(data = sm_cave, aes(x = mc, y = rate), color = "black") +
    xlab("Concentration (conc)") + ylab("Rate of DV (AE2)") +
    labs(
      title = glue("Comparison of Logistic and Poisson Fits (b = {round(b, 6)}, Event Occurrence Probability = {round(prob, 3)})"), 
      color = "Model", linetype = "Model"
    ) +  
    theme(legend.position = "top") +
    ylim(0, 1.2) +  # Set y-axis limits
    annotate("text", x = 0, y = 1.2, label = logistic_eq, size = 4, hjust = 0) +
    annotate("text", x = 0, y = 1.1, label = poisson_eq, size = 4, hjust = 0)
  
  return(plot)
}

plots <- map(b_values, generate_plot_for_b)

walk(plots, print)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 2 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 19 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 29 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 37 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 39 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 42 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 43 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 44 rows containing missing values (geom_smooth).

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 44 rows containing missing values (geom_smooth).

Poisson ER

Ping Chen

2024-08-12

R Markdown