finer-retention.R

# PrEP retention and scale intervention for uptake = 5K

rm(list=ls())

#options(tibble.print_max = 50, tibble.print_min = 50, dplyr.width=Inf)

# Libraries ----

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

# Data ----

full_res <- read.csv("../data/finer-retention-final_results.csv", header=FALSE) #final results
loc <- "../../finer-retention"

in1 <- read.table(paste0(loc,"/instance_1/output/parameters.txt"))
counts_dt <- read.csv(paste0(loc, "/instance_51/output/counts.csv"))

param_list <- read.csv("../../../data/prep-finer-retention-gradual-scale-interv.csv")
param_list_txt <- read.table("../../../data/prep-finer-retention-gradual-scale-interv.txt")

# Generate Response Dataframe ----

all_responses <- full_res[,c(1, 2, 4, 6, seq(17, 97, by=10), 98:109)]
names(all_responses) <- c("instance", 
                          "prevalence", "prev20to30", "prevge30",
                          "inc_transm_full", "inc_transm_20to30", "inc_transm_ge30",
                          "inc_transm_ext_full", "inc_transm_ext_20to30", "inc_transm_ext_ge30",
                          "inc_transm_ext_ent_full", "inc_transm_ext_ent_20to30", "inc_transm_ext_ent_ge30",
                          "newinf_transm_full", "newinf_transm_20to30", "newinf_transm_ge30",
                          "newinf_transm_ext_full", "newinf_transm_ext_20to30", "newinf_transm_ext_ge30",
                          "newinf_transm_ext_ent_full", "newinf_transm_ext_ent_20to30", "newinf_transm_ext_ent_ge30",
                          "popsize_full", "popsize_20to30", "popsize_ge30"
                          )
all_responses$param_set <- rep(1:42, each=10)
all_responses$seed <- rep(1:10, 42)
dim(all_responses)

## [1] 420  27

# Compute means by seeds ----
by_param_set <- group_by(all_responses, param_set)
responses <- summarise(by_param_set,
  prevalence = mean(prevalence),
  prev_20to30 = mean(prev20to30),
  prev_ge30 = mean(prevge30),
  inc_transm_full = mean(inc_transm_full), #transm. olny
  inc_transm_20to30 = mean(inc_transm_20to30),
  inc_transm_ge30 = mean(inc_transm_ge30),
  inc_transm_ext_full = mean(inc_transm_ext_full), #transm+ext.
  inc_transm_ext_20to30 = mean(inc_transm_ext_20to30), 
  inc_transm_ext_ge30 = mean(inc_transm_ext_ge30),
  inc_transm_ext_ent_full = mean(inc_transm_ext_ent_full),#transm+ext+entry
  inc_transm_ext_ent_20to30 = mean(inc_transm_ext_ent_20to30),
  inc_transm_ext_ent_ge30 = mean(inc_transm_ext_ent_ge30),
  newinf_transm_full = mean(newinf_transm_full), #transm. olny
  newinf_transm_20to30 = mean(newinf_transm_20to30),
  newinf_transm_ge30 = mean(newinf_transm_ge30),
  newinf_transm_ext_full = mean(newinf_transm_ext_full), #transm+ext.
  newinf_transm_ext_20to30 = mean(newinf_transm_ext_20to30), 
  newinf_transm_ext_ge30 = mean(newinf_transm_ext_ge30),
  newinf_transm_ext_ent_full = mean(newinf_transm_ext_ent_full),#transm+ext+entry
  newinf_transm_ext_ent_20to30 = mean(newinf_transm_ext_ent_20to30),
  newinf_transm_ext_ent_ge30 = mean(newinf_transm_ext_ent_ge30),
  popsize_full = mean(popsize_full),
  popsize_20to30 = mean(popsize_20to30),
  popsize_ge30 = mean(popsize_ge30)
  )

dim(responses)

## [1] 42 25

# All Responses ----

responses %>% 
  select(prevalence, prev_20to30, prev_ge30,
         inc_transm_ext_ent_full, inc_transm_ext_20to30, inc_transm_ext_ent_ge30,
         newinf_transm_full, newinf_transm_20to30, newinf_transm_ge30
         )

## # A tibble: 42 x 9
##    prevalence prev_20to30 prev_ge30 inc_transm_ext_ent… inc_transm_ext_20…
##         <dbl>       <dbl>     <dbl>               <dbl>              <dbl>
##  1       30.5        27.3      54.0                4.86               5.23
##  2       29.7        26.4      53.4                4.21               4.38
##  3       28.8        25.4      52.3                4.00               4.14
##  4       28.3        24.8      51.7                3.42               3.58
##  5       27.8        24.3      51.1                3.00               3.05
##  6       26.8        23.2      49.8                2.84               2.78
##  7       30.3        27.1      54.0                4.81               5.20
##  8       29.8        26.6      53.3                4.22               4.43
##  9       29.2        25.8      52.8                3.88               4.12
## 10       28.2        24.8      51.5                3.39               3.50
## # ... with 32 more rows, and 4 more variables:
## #   inc_transm_ext_ent_ge30 <dbl>, newinf_transm_full <dbl>,
## #   newinf_transm_20to30 <dbl>, newinf_transm_ge30 <dbl>

write.csv(responses, file="out/prep-finer-retention-scale-interv.csv")

# Select Unique Parameters From List ----

seq_of_ones <- seq(1, nrow(param_list), by=10)
View(param_list[seq_of_ones,]) #unique input parameters (without seed repetitions)
write.csv(param_list[seq_of_ones,], file="out/prep-finer-retention-scale-interv-param-list.csv")

# Summary of Results Regions ----

apply(responses, 2, summary)

##         param_set prevalence prev_20to30 prev_ge30 inc_transm_full
## Min.         1.00   26.80081    23.19095  49.78126        1.918300
## 1st Qu.     11.25   28.02762    24.54003  51.27093        2.388075
## Median      21.50   28.77824    25.35297  52.30400        3.034150
## Mean        21.50   28.91622    25.53390  52.32018        2.966969
## 3rd Qu.     31.75   29.80344    26.53536  53.35769        3.481950
## Max.        42.00   30.53796    27.39686  54.40194        4.110100
##         inc_transm_20to30 inc_transm_ge30 inc_transm_ext_full
## Min.             1.986700        2.445700            2.566300
## 1st Qu.          2.543475        2.965825            3.079550
## Median           3.251800        3.633150            3.691400
## Mean             3.204050        3.589964            3.612371
## 3rd Qu.          3.763925        4.150325            4.131550
## Max.             4.578800        5.007800            4.762800
##         inc_transm_ext_20to30 inc_transm_ext_ge30 inc_transm_ext_ent_full
## Min.                 2.627700            3.068600                2.658500
## 1st Qu.              3.259775            3.593100                3.151850
## Median               3.922300            4.258900                3.777400
## Mean                 3.850707            4.229569                3.700921
## 3rd Qu.              4.419150            4.869425                4.218200
## Max.                 5.226500            5.592200                4.860800
##         inc_transm_ext_ent_20to30 inc_transm_ext_ent_ge30
## Min.                     2.627700                3.068600
## 1st Qu.                  3.259775                3.593100
## Median                   3.922300                4.258900
## Mean                     3.850707                4.229569
## 3rd Qu.                  4.419150                4.869425
## Max.                     5.226500                5.592200
##         newinf_transm_full newinf_transm_20to30 newinf_transm_ge30
## Min.              122.1300              85.9600           27.47000
## 1st Qu.           137.5475              97.6050           29.73500
## Median            152.7250             107.8850           32.86000
## Mean              153.1438             108.7095           32.50857
## 3rd Qu.           167.9050             119.9350           35.25000
## Max.              179.6700             128.2200           37.52000
##         newinf_transm_ext_full newinf_transm_ext_20to30
## Min.                  147.9600                 103.5900
## 1st Qu.               163.8450                 115.0175
## Median                177.9150                 125.0800
## Mean                  179.0376                 125.9590
## 3rd Qu.               194.0050                 137.0475
## Max.                  206.2300                 146.0400
##         newinf_transm_ext_ge30 newinf_transm_ext_ent_full
## Min.                   31.5900                   151.8300
## 1st Qu.                33.9275                   167.3875
## Median                 36.8850                   181.5950
## Mean                   36.6069                   182.7093
## 3rd Qu.                39.1575                   197.7400
## Max.                   41.2700                   209.9500
##         newinf_transm_ext_ent_20to30 newinf_transm_ext_ent_ge30
## Min.                        103.5900                    31.5900
## 1st Qu.                     115.0175                    33.9275
## Median                      125.0800                    36.8850
## Mean                        125.9590                    36.6069
## 3rd Qu.                     137.0475                    39.1575
## Max.                        146.0400                    41.2700
##         popsize_full popsize_20to30 popsize_ge30
## Min.        5540.600       3534.200     1287.800
## 1st Qu.     5577.925       3558.025     1291.725
## Median      5583.450       3566.850     1292.700
## Mean        5587.155       3566.745     1292.643
## 3rd Qu.     5598.025       3576.175     1294.150
## Max.        5617.600       3594.400     1296.400

# Annual prevalence and incidence ----

# source function
source("../../../R/ph_summarize_functions.R")

## Loading required package: data.table

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

# set input
ranges = c(20, 29, 30, 34)

# call function
dt <- summarize_directory(loc, ranges=ranges)

# obtain tenth year prevalence 
prev.dt <- dt[["prevs"]]
prev.dt[, param_set := rep(1:42, each=100)]
prev.dt[, seed := rep(rep(1:10, each=10), 42)]
prev.dt.10 <- prev.dt[ycat == 10]
avg.prev.10yr <- prev.dt.10[, .(avg = mean(mean), 
                                avg_1=mean(mean_1), 
                                avg_2=mean(mean_2)), 
                            by=param_set]

write.csv(avg.prev.10yr, file="out/prep-finer-retention-scale-interv-tenyearprev.csv")

# obtain tenth year incidence
inc.dt <- dt[["incs"]]
inc.dt[, param_set := rep(1:42, each=100)]
inc.dt[, seed := rep(rep(1:10, each=10), 42)]
inc.dt.10 <- inc.dt[ycat == 10]
avg.inc.10yr <- inc.dt.10[, .(avg = mean(full_inc_mean), 
                              avg_1=mean(full_inc_mean_1), 
                              avg_2=mean(full_inc_mean_2)), 
                          by=param_set]

write.csv(avg.inc.10yr, file="out/prep-finer-reten-scale-interv-tenyearinc.csv")


# PrEP summary  ----

qplot(data=counts_dt[-1,], x=tick/365, y=on_prep/uninfected, geom="line")

# pull out data with constant retention, increasing uptake 

## example: retention = 180 days
instances <- seq(1, 51, by=10)

avg.inc.10yr[1:6,] #outcomes

##    param_set      avg    avg_1    avg_2
## 1:         1 2.868012 3.166568 3.266411
## 2:         2 2.321143 2.378943 3.159001
## 3:         3 3.075368 3.311157 3.777251
## 4:         4 3.737690 4.173212 4.330921
## 5:         5 3.381977 3.776589 3.776993
## 6:         6 3.152826 3.354837 3.926295

avg.prev.10yr[1:6,]

##    param_set      avg    avg_1    avg_2
## 1:         1 23.72121 20.76198 43.80632
## 2:         2 21.81230 18.54677 41.66018
## 3:         3 24.89352 21.76652 45.96542
## 4:         4 27.48894 24.65054 49.15310
## 5:         5 26.49006 23.48681 47.84990
## 6:         6 24.77244 21.31195 46.76215

ret180 <- lapply(instances, function(x)
  read.csv(paste0(loc, "/instance_", x, "/output/counts.csv"))
  )
ret180 <- lapply(1:length(ret180), function (x) ret180[[x]][-1,])
names(ret180) <- c("13.7%-control", "20%", "30%", "40%", "50%", "60%")

ggplot(bind_rows(ret180, .id="f.prep.uptake"), 
       aes(x=tick/365, y=on_prep/uninfected, color=f.prep.uptake))+
  geom_line()+
  ggtitle("Retention 180 days")

## example: retention = 270 days
instances <- seq(61, 111, by=10)

avg.inc.10yr[7:12,] #outcomes

##    param_set      avg    avg_1    avg_2
## 1:         7 2.838835 2.964749 3.848206
## 2:         8 2.480252 2.710139 2.858314
## 3:         9 2.115836 2.233536 2.694991
## 4:        10 3.791003 4.049026 4.988467
## 5:        11 3.467630 3.811276 3.974183
## 6:        12 3.528198 3.877755 4.113722

avg.prev.10yr[7:12,]

##    param_set      avg    avg_1    avg_2
## 1:         7 24.35914 21.03661 45.58584
## 2:         8 22.94836 19.67766 43.19039
## 3:         9 21.62423 18.28240 41.48552
## 4:        10 27.35984 24.49869 48.94803
## 5:        11 26.55763 23.64988 47.72785
## 6:        12 26.41819 23.55650 47.68384

ret270 <- lapply(instances, function(x)
  read.csv(paste0(loc, "/instance_", x, "/output/counts.csv"))
)
ret270 <- lapply(1:length(ret270), function (x) ret270[[x]][-1,])
names(ret270) <- c("13.7%-control", "20%", "30%", "40%", "50%", "60%")

ggplot(bind_rows(ret270, .id="f.prep.uptake"), 
       aes(x=tick/365, y=on_prep/uninfected, color=f.prep.uptake))+
  geom_line()+
  ggtitle("Retention 270 days")

## example: retention = 730 days
instances <- seq(361, 411, by=10)

avg.inc.10yr[37:42,] #outcomes

##    param_set      avg    avg_1    avg_2
## 1:        37 2.373204 2.496359 2.949707
## 2:        38 2.654437 2.830773 3.430470
## 3:        39 3.986814 4.458230 4.387616
## 4:        40 3.570478 3.870909 4.376573
## 5:        41 3.257816 3.570205 3.812559
## 6:        42 2.722475 2.922475 3.414911

avg.prev.10yr[37:42,]

##    param_set      avg    avg_1    avg_2
## 1:        37 22.42440 18.91546 43.12910
## 2:        38 22.63613 19.21860 43.11169
## 3:        39 28.09150 25.36003 50.00266
## 4:        40 26.88262 24.03765 48.25697
## 5:        41 25.74052 22.74634 46.86203
## 6:        42 24.00362 21.03039 44.28225

ret730 <- lapply(instances, function(x)
  read.csv(paste0(loc, "/instance_", x, "/output/counts.csv"))
)
ret730 <- lapply(1:length(ret730), function (x) ret730[[x]][-1,])
names(ret730) <- c("13.7%-control", "20%", "30%", "40%", "50%", "60%")

ggplot(bind_rows(ret730, .id="f.prep.uptake"), 
       aes(x=tick/365, y=on_prep/uninfected, color=f.prep.uptake))+
  geom_line()+
  ggtitle("Retention 730 days")

## uptake = control level, vary retention 
instances <- c(1, seq(61, 361, by=60))
avg.inc.10yr[(instances %/% 10)+1,] #outcomes

##    param_set      avg    avg_1    avg_2
## 1:         1 2.868012 3.166568 3.266411
## 2:         7 2.838835 2.964749 3.848206
## 3:        13 2.892210 3.022447 3.908703
## 4:        19 3.196018 3.454373 3.847417
## 5:        25 3.352046 3.747394 3.821006
## 6:        31 3.474668 3.722400 4.227193
## 7:        37 2.373204 2.496359 2.949707

avg.prev.10yr[(instances %/% 10)+1,]

##    param_set      avg    avg_1    avg_2
## 1:         1 23.72121 20.76198 43.80632
## 2:         7 24.35914 21.03661 45.58584
## 3:        13 24.90015 21.64884 45.93196
## 4:        19 25.51873 22.49629 46.82067
## 5:        25 26.52239 23.60077 47.47805
## 6:        31 26.60324 23.71054 47.72980
## 7:        37 22.42440 18.91546 43.12910

uptake_control <- lapply(instances, function(x)
  read.csv(paste0(loc, "/instance_", x, "/output/counts.csv"))
)
uptake_control <- lapply(1:length(uptake_control), function (x) uptake_control[[x]][-1,])
names(uptake_control) <- c("180d-control", "270d", "365d", "450d", "540d", "630d", "730d")

ggplot(bind_rows(uptake_control, .id="retention"), 
       aes(x=tick/365, y=on_prep/uninfected, color=retention))+
  geom_line()+
  ggtitle("Uptake: Control=13.7%")

## uptake = final 20%, vary retention 
instances <- seq(11, 420, by=60)
avg.inc.10yr[(instances %/% 10)+1,] #outcomes

##    param_set      avg    avg_1    avg_2
## 1:         2 2.321143 2.378943 3.159001
## 2:         8 2.480252 2.710139 2.858314
## 3:        14 2.638735 2.840854 3.178921
## 4:        20 2.963729 3.213561 3.440814
## 5:        26 2.870124 3.035489 3.555898
## 6:        32 3.054656 3.256618 3.726022
## 7:        38 2.654437 2.830773 3.430470

avg.prev.10yr[(instances %/% 10)+1,]

##    param_set      avg    avg_1    avg_2
## 1:         2 21.81230 18.54677 41.66018
## 2:         8 22.94836 19.67766 43.19039
## 3:        14 22.97174 19.67594 43.55902
## 4:        20 24.37223 21.22759 45.05139
## 5:        26 25.18480 21.98235 46.22817
## 6:        32 25.72602 22.50236 47.27235
## 7:        38 22.63613 19.21860 43.11169

uptake_f_20pct <- lapply(instances, function(x)
  read.csv(paste0(loc, "/instance_", x, "/output/counts.csv"))
)
uptake_f_20pct <- lapply(1:length(uptake_f_20pct), function (x) uptake_f_20pct[[x]][-1,])
names(uptake_f_20pct) <- c("180d-control", "270d", "365d", "450d", "540d", "630d", "730d")

ggplot(bind_rows(uptake_f_20pct, .id="retention"), 
       aes(x=tick/365, y=on_prep/uninfected, color=retention))+
  geom_line()+
  ggtitle("Uptake: Final=20%")

## uptake = final 40%, vary retention 
instances <- seq(31, 420, by=60)
avg.inc.10yr[(instances %/% 10)+1,] #outcomes

##    param_set      avg    avg_1    avg_2
## 1:         4 3.737690 4.173212 4.330921
## 2:        10 3.791003 4.049026 4.988467
## 3:        16 2.619029 2.794495 2.969008
## 4:        22 2.255701 2.411604 2.740489
## 5:        28 2.423151 2.523228 3.094448
## 6:        34 2.534219 2.759427 2.974676
## 7:        40 3.570478 3.870909 4.376573

avg.prev.10yr[(instances %/% 10)+1,]

##    param_set      avg    avg_1    avg_2
## 1:         4 27.48894 24.65054 49.15310
## 2:        10 27.35984 24.49869 48.94803
## 3:        16 23.11834 19.76032 43.77446
## 4:        22 22.14073 18.74705 42.56083
## 5:        28 22.93975 19.71362 43.21561
## 6:        34 23.88630 20.78061 44.23056
## 7:        40 26.88262 24.03765 48.25697

uptake_f_40pct <- lapply(instances, function(x)
  read.csv(paste0(loc, "/instance_", x, "/output/counts.csv"))
)
uptake_f_40pct <- lapply(1:length(uptake_f_40pct), function (x) uptake_f_40pct[[x]][-1,])
names(uptake_f_40pct) <- c("180d-control", "270d", "365d", "450d", "540d", "630d", "730d")

ggplot(bind_rows(uptake_f_40pct, .id="retention"), 
       aes(x=tick/365, y=on_prep/uninfected, color=retention))+
  geom_line()+
  ggtitle("Uptake: Final=40%")

## uptake = final 60%, vary retention 
instances <- seq(51, 420, by=60)
avg.inc.10yr[(instances %/% 10)+1,] #outcomes

##    param_set      avg    avg_1    avg_2
## 1:         6 3.152826 3.354837 3.926295
## 2:        12 3.528198 3.877755 4.113722
## 3:        18 3.303426 3.512174 4.069867
## 4:        24 3.714033 4.013202 4.449341
## 5:        30 3.612080 4.014186 4.157187
## 6:        36 2.124714 2.246686 2.822669
## 7:        42 2.722475 2.922475 3.414911

avg.prev.10yr[(instances %/% 10)+1,]

##    param_set      avg    avg_1    avg_2
## 1:         6 24.77244 21.31195 46.76215
## 2:        12 26.41819 23.55650 47.68384
## 3:        18 26.48077 23.49133 47.76173
## 4:        24 27.01546 23.92697 48.97650
## 5:        30 27.64780 24.93791 49.19963
## 6:        36 21.79663 18.45338 41.96492
## 7:        42 24.00362 21.03039 44.28225

uptake_f_60pct <- lapply(instances, function(x)
  read.csv(paste0(loc, "/instance_", x, "/output/counts.csv"))
)
uptake_f_60pct <- lapply(1:length(uptake_f_60pct), function (x) uptake_f_60pct[[x]][-1,])
names(uptake_f_60pct) <- c("180d-control", "270d", "365d", "450d", "540d", "630d", "730d")

ggplot(bind_rows(uptake_f_60pct, .id="retention"), 
       aes(x=tick/365, y=on_prep/uninfected, color=retention))+
  geom_line()+
  ggtitle("Uptake: Final=60%")

# Population size summary ----

responses_popsize <- 
  responses %>% 
  select(popsize_full, popsize_20to30, popsize_ge30)

# Compute annual growth rate
starting.popsize <- counts_dt$vertex_count[2]
#starting.popsize.20to30 <- counts_dt$vertex_count_18[2]
#starting.popsize.ge30 <- counts_dt$vertex_count_B[2]  
  
n.ts <- nrow(counts_dt) - 1
n.yrs <- n.ts/365

growth.rate <- ((5600/5000)^(1/n.yrs) - 1)*100 #example

responses_popsize %>% 
  mutate(growthrate_allages = ((popsize_full/starting.popsize)^(1/n.yrs) - 1)*100
         #growthrate_20to30 = ((popsize_20to30/starting.popsize.20to30)^(1/n.yrs) - 1)*100,
         #growthrate_ge30 = ((popsize_ge30/starting.popsize.ge30)^(1/n.yrs) - 1)*100
         )

## # A tibble: 42 x 4
##    popsize_full popsize_20to30 popsize_ge30 growthrate_allages
##           <dbl>          <dbl>        <dbl>              <dbl>
##  1         5608           3584         1293            -0.108 
##  2         5583           3570         1293            -0.153 
##  3         5583           3565         1292            -0.153 
##  4         5583           3561         1294            -0.152 
##  5         5582           3564         1293            -0.155 
##  6         5580           3566         1292            -0.158 
##  7         5617           3576         1292            -0.0914
##  8         5585           3574         1290            -0.150 
##  9         5609           3582         1294            -0.107 
## 10         5598           3567         1296            -0.125 
## # ... with 32 more rows

#   # Network data ----
# 
# main_net <- readRDS(paste0(loc, "main_network_50000.RDS"))
# cas_net <- readRDS(paste0(loc, "casual_network_50000.RDS"))
# main_net
# cas_net
# 
# # Check simulated vs target statistics ----
# 
# ## mean degree
# network.edgecount(main_net)*2/network.size(main_net); 
# network.edgecount(cas_net)*2/network.size(cas_net); 687*2/3000
# 
# ## degree distribution
# degreedist(main_net)/network.size(main_net); c(1698, 1236, 54)/3000
# degreedist(cas_net)/network.size(cas_net); c(1767, 966, 204)/3000
# 
# ## partnership duration
# ## see `compute-partnership-durations.R` in 
# ##`/project/khanna7/Projects/midway2/BARS/transmission_model/r/testing` for example
# # pt_data <- fread(paste0(loc, "partnership_events.csv"))
# # pt_data_main <- pt_data[network_type == 0] #main network
# # pt_data_casual <- pt_data[network_type == 1] #casual network
# # 
# # pt_data_main[,.(counts = .N),by=.(p1,p2),][,unique(counts)] # 1 2 4 3
# # pt_data_main[,.(counts = .N),by=.(p1,p2),][,.N,by=counts]
# # pt_data_casual[,.(counts = .N),by=.(p1,p2),][,unique(counts)] # 1 2 3 4 6
# # pt_data_casual[,.(counts = .N),by=.(p1,p2),][,.N,by=counts]
# # 
# # res_main_2 <- pt_data_main[,.(tick, counts = .N),by=.(p1,p2),][counts == 2,.(dur = abs(diff(
# #   tick))), by=.(p1,p2)][,dur]
# # res_main_4 <- pt_data_main[,.(tick, counts = .N),by=.(p1,p2),][counts == 4,.(dur = abs(diff(
# #   tick))), by=.(p1,p2)][,dur]
# # res_casual_2 <- pt_data_casual[,.(tick, counts = .N),by=.(p1,p2),][counts == 2,.(dur = abs(diff(
# #   tick))), by=.(p1,p2)][,dur]
# # res_casual_4 <- pt_data_casual[,.(tick, counts = .N),by=.(p1,p2),][counts == 4,.(dur = abs(diff(
# #   tick))), by=.(p1,p2)][,dur]
# # res_casual_6 <- pt_data_casual[,.(tick, counts = .N),by=.(p1,p2),][counts == 6,.(dur = abs(diff(
# #   tick))), by=.(p1,p2)][,dur]
# # 
# # summary(c(res_main_2, res_main_4))
# # summary(c(res_casual_2, res_casual_4, res_casual_6))
# # 
# # 
# # ## mean age diff
# # main_el <- as.matrix(as.edgelist(main_net))
# # cas_el <- as.matrix(as.edgelist(cas_net))
# # mean(abs((main_net%v%"age")[main_el[,1]] - (main_net%v%"age")[main_el[,2]])); 2.91
# # mean(abs((cas_net%v%"age")[cas_el[,1]] - (cas_net%v%"age")[cas_el[,2]])); 3.14
# 
# ## population distribution
# length(which(main_net%v%"age"<26))/network.size(main_net)
# length(which(main_net%v%"age" >= 26))/network.size(main_net)
# 
# ## for one instance, add other forms of incidence
# summarize_inc <- function(filename="counts.csv"){
#   
#   counts <- read.csv(filename)
#   counts <- counts[-1,]
#   counts$total_inc <- counts$infected_via_transmission+counts$infected_at_entry+counts$infected_externally
#   counts_yr_chunks <- split(counts,
#                             ceiling(seq_along(counts$tick)/365))
#   
#   mean_inc_total <- lapply(counts_yr_chunks, function (x)
#     mean(x[2:nrow(x), "total_inc"] / x[1:(nrow(x) - 1), "uninfected"])
#   )
#   mean_inc_total_ten_yrs <- tail(mean_inc_total, 10)
#   mean_inc_total_ten_yrs <- unlist(mean_inc_total_ten_yrs)*365*100 
#   
#   mean_inc <- lapply(counts_yr_chunks, function (x)
#     mean(x[2:nrow(x), "infected_via_transmission"] / x[1:(nrow(x) - 1), "uninfected"])
#   )
#   mean_inc_ten_yrs <- tail(mean_inc, 10)
#   mean_inc_ten_yrs <- unlist(mean_inc_ten_yrs)*365*100 
#   full_result <- c(round(mean_inc_ten_yrs[10], 3), round(mean_inc_total_ten_yrs[10], 3))
#   
#   gte26_result <- round(mean_inc_ten_yrs, 3)
#   
#   return(c(full_result))
# }
# 
# summarize_inc(paste0(loc, "counts.csv"))
#   
# # Save data ----
# 
# save.image(file="age-20-30-34-w-newinf.RData")
#

finer-retention.R

khanna7

Tue Feb 20 14:38:11 2018