setwd("F:/AAA TESIS 2017/Resultados/CalculoR2/taller")
cundinamarca.raw<-read.csv("cundinamarca.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(cundinamarca.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant',
'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
cundinamarca.raw$date_report<-as.Date(cundinamarca.raw$date_report_char, '%m/%d/%Y')
cundinamarca.raw$date_clinic<-as.Date(cundinamarca.raw$date_clinic_char, '%m/%d/%Y')
cundinamarca.raw$date_onset<-as.Date(cundinamarca.raw$date_onset_char, '%m/%d/%Y')
cundinamarca.raw$date_birth<-as.Date(cundinamarca.raw$date_birth_char, '%m/%d/%Y')
cundinamarca.raw$date_report_char<-cundinamarca.raw$date_clinic_char<-cundinamarca.raw$date_onset_char<-cundinamarca.raw$date_birth_char<-NULL
cundinamarca.raw$gender <- as.numeric(cundinamarca.raw$sex=='F')
cundinamarca.raw$age_grp <- 1
cundinamarca.raw$age_grp[cundinamarca.raw$age>20 & cundinamarca.raw$age<=50] <- 2
cundinamarca.raw$age_grp[cundinamarca.raw$age>50] <- 3
cundinamarca.raw$child <- as.numeric(cundinamarca.raw$age_grp==1)
cundinamarca.raw$senior <- as.numeric(cundinamarca.raw$age_grp==3)
cundinamarca.raw$day <- as.numeric(cundinamarca.raw$date_onset - min(cundinamarca.raw$date_onset) + 1)
cundinamarca.raw <- cundinamarca.raw[order(cundinamarca.raw$date_onset, cundinamarca.raw$gender, cundinamarca.raw$age_grp),]
cundinamarca.raw$count <- 1
cundinamarca <- aggregate(count~date_onset, data=cundinamarca.raw, FUN='sum')
cundinamarca$day <- as.numeric(cundinamarca$date_onset - min(cundinamarca$date_onset) + 1)
cundinamarca.raw$age_grp <- factor(cundinamarca.raw$age_grp,
levels = c(1,2,3),
labels = c("Children", "Adults", "Senior"))
cundinamarca.raw$sex <- factor(cundinamarca.raw$sex,
#levels = c(1,2),
#labels = c("M", "F"))
)
#Descriptive
#Epidemic curve
counts <- table(cundinamarca.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, Cundinamarca",
xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(cundinamarca.raw$sex)
sex.table
##
## F M
## 3065 2021
a<-prop.table(sex.table)
rbind(sex.table, a)
## F M
## sex.table 3065.0000000 2021.0000000
## a 0.6026347 0.3973653
#Plot
counts <- table(cundinamarca.raw$sex)
barplot(counts, main="Casos de Zika por genero, Cundinamarca",
xlab="Genero")

#Grupo edad
counts <- table(cundinamarca.raw$age)
barplot(counts, main="Casos de Zika por Edad, Cundinamarca",
xlab="Edad", col=c("red"))

summary(cundinamarca.raw$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 20.00 32.00 33.45 46.00 94.00
counts <- table(cundinamarca.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, Cundinamarca",
xlab="Grupo de Edad", col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(cundinamarca.raw$sex, cundinamarca.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, Cundinamarca",
xlab="Age group", col=c("red", "darkblue"),
legend = rownames(counts), beside=TRUE)

#plot.epicurve(cundinamarca, 'Cundinamarca, Colombia')
cundinamarca.strata <- aggregate(count~day+gender+child+senior, data=cundinamarca.raw, FUN='sum')
demo.cundinamarca.raw<-read.csv('pop_cundinamarca.csv', sep=';', header=TRUE, as.is=TRUE)
demo.cundinamarca.raw$child<-as.numeric(demo.cundinamarca.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.cundinamarca.raw$senior<-as.numeric(demo.cundinamarca.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.cundinamarca.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.cundinamarca.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.cundinamarca <- rbind(demo.male, demo.female)
save(cundinamarca, cundinamarca.strata, demo.cundinamarca, file='cundinamarca.RData')
load('cundinamarca.RData')
######GIRARDOT##########
girardot.raw<-read.csv('Girardot_update.csv', sep=';', header=TRUE, as.is=TRUE)
colnames(girardot.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant',
'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
girardot.raw$date_report<-as.Date(girardot.raw$date_report_char, '%m/%d/%Y')
girardot.raw$date_clinic<-as.Date(girardot.raw$date_clinic_char, '%m/%d/%Y')
girardot.raw$date_onset<-as.Date(girardot.raw$date_onset_char, '%m/%d/%Y')
girardot.raw$date_birth<-as.Date(girardot.raw$date_birth_char, '%m/%d/%Y')
girardot.raw$date_report_char<-girardot.raw$date_clinic_char<-girardot.raw$date_onset_char<-girardot.raw$date_birth_char<-NULL
girardot.raw$gender <- as.numeric(girardot.raw$sex=='F')
girardot.raw$age_grp <- 1
girardot.raw$age_grp[girardot.raw$age>20 & girardot.raw$age<=50] <- 2
girardot.raw$age_grp[girardot.raw$age>50] <- 3
girardot.raw$child <- as.numeric(girardot.raw$age_grp==1)
girardot.raw$senior <- as.numeric(girardot.raw$age_grp==3)
girardot.raw$day <- as.numeric(girardot.raw$date_onset - min(girardot.raw$date_onset) + 1)
girardot.raw <- girardot.raw[order(girardot.raw$date_onset, girardot.raw$gender, girardot.raw$age_grp),]
girardot.raw$count <- 1
girardot <- aggregate(count~date_onset, data=girardot.raw, FUN='sum')
girardot$day <- as.numeric(girardot$date_onset - min(girardot$date_onset) + 1)
girardot.raw$age_grp <- factor(girardot.raw$age_grp,
levels = c(1,2,3),
labels = c("Children", "Adults", "Senior"))
#girardot.raw$sex <- factor(girardot.raw$sex,
#levels = c(1,2),
#labels = c("M", "F"))
#Descriptive
#Epidemic curve
counts <- table(girardot.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, Girardot",
xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(girardot.raw$sex)
sex.table
##
## F M
## 1157 779
a<-prop.table(sex.table)
rbind(sex.table, a)
## F M
## sex.table 1157.000000 779.000000
## a 0.597624 0.402376
#Plot
counts <- table(girardot.raw$sex)
barplot(counts, main="Casos de Zika por genero, Girardot",
xlab="Genero")

#Grupo edad
counts <- table(girardot.raw$age)
barplot(counts, main="Casos de Zika por Edad, Girardot",
xlab="Edad", col=c("red"))

summary(girardot.raw$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 24.00 34.00 34.81 46.00 89.00
counts <- table(girardot.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, Girardot",
xlab="Grupo de Edad", col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(girardot.raw$sex, girardot.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, Girardot",
xlab="Age group", col=c("red", "darkblue"),
legend = rownames(counts), beside=TRUE)

#plot.epicurve(girardot, 'Girardot, Colombia')
girardot.strata <- aggregate(count~day+gender+child+senior, data=girardot.raw, FUN='sum')
demo.girardot.raw<-read.csv('pop_Girardot.csv', sep=';', header=TRUE, as.is=TRUE)
demo.girardot.raw$child<-as.numeric(demo.girardot.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.girardot.raw$senior<-as.numeric(demo.girardot.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.girardot.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.girardot.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.girardot <- rbind(demo.male, demo.female)
save(girardot, girardot.strata, demo.girardot, file='girardot.RData')
load('girardot.RData')
##################la mesa##########
lamesa.raw<-read.csv("lamesa.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(lamesa.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant',
'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
lamesa.raw$date_report<-as.Date(lamesa.raw$date_report_char, '%m/%d/%Y')
lamesa.raw$date_clinic<-as.Date(lamesa.raw$date_clinic_char, '%m/%d/%Y')
lamesa.raw$date_onset<-as.Date(lamesa.raw$date_onset_char, '%m/%d/%Y')
lamesa.raw$date_birth<-as.Date(lamesa.raw$date_birth_char, '%m/%d/%Y')
lamesa.raw$date_report_char<-lamesa.raw$date_clinic_char<-lamesa.raw$date_onset_char<-lamesa.raw$date_birth_char<-NULL
lamesa.raw$gender <- as.numeric(lamesa.raw$sex=='F')
lamesa.raw$age_grp <- 1
lamesa.raw$age_grp[lamesa.raw$age>20 & lamesa.raw$age<=50] <- 2
lamesa.raw$age_grp[lamesa.raw$age>50] <- 3
lamesa.raw$child <- as.numeric(lamesa.raw$age_grp==1)
lamesa.raw$senior <- as.numeric(lamesa.raw$age_grp==3)
lamesa.raw$day <- as.numeric(lamesa.raw$date_onset - min(lamesa.raw$date_onset) + 1)
lamesa.raw <- lamesa.raw[order(lamesa.raw$date_onset, lamesa.raw$gender, lamesa.raw$age_grp),]
lamesa.raw$count <- 1
lamesa <- aggregate(count~date_onset, data=lamesa.raw, FUN='sum')
lamesa$day <- as.numeric(lamesa$date_onset - min(lamesa$date_onset) + 1)
lamesa.raw$age_grp <- factor(lamesa.raw$age_grp,
levels = c(1,2,3),
labels = c("Children", "Adults", "Senior"))
#lamesa.raw$sex <- factor(lamesa.raw$sex,
#levels = c(1,2),
#labels = c("M", "F"))
#Descriptive
#Epidemic curve
counts <- table(lamesa.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, La Mesa",
xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(lamesa.raw$sex)
sex.table
##
## F M
## 264 193
a<-prop.table(sex.table)
rbind(sex.table, a)
## F M
## sex.table 264.0000000 193.0000000
## a 0.5776805 0.4223195
#Plot
counts <- table(lamesa.raw$sex)
barplot(counts, main="Casos de Zika por genero, La Mesa",
xlab="Genero")

#Grupo edad
counts <- table(lamesa.raw$age)
barplot(counts, main="Casos de Zika por Edad, La Mesa",
xlab="Edad", col=c("red"))

summary(lamesa.raw$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 19.00 32.00 33.03 46.00 85.00
counts <- table(lamesa.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, La Mesa",
xlab="Grupo de Edad", col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(lamesa.raw$sex, lamesa.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, La Mesa",
xlab="Age group", col=c("red", "darkblue"),
legend = rownames(counts), beside=TRUE)

#plot.epicurve(lamesa, 'LaMesa, Colombia')
lamesa.strata <- aggregate(count~day+gender+child+senior, data=lamesa.raw, FUN='sum')
demo.lamesa.raw<-read.csv('pop_lamesa.csv', sep=';', header=TRUE, as.is=TRUE)
demo.lamesa.raw$child<-as.numeric(demo.lamesa.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.lamesa.raw$senior<-as.numeric(demo.lamesa.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.lamesa.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.lamesa.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.lamesa <- rbind(demo.male, demo.female)
save(lamesa, lamesa.strata, demo.lamesa, file='lamesa.RData')
load('lamesa.RData')
################EL COLEGIO##################################################
elcolegio.raw<-read.csv("elcolegio.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(elcolegio.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant',
'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
elcolegio.raw$date_report<-as.Date(elcolegio.raw$date_report_char, '%m/%d/%Y')
elcolegio.raw$date_clinic<-as.Date(elcolegio.raw$date_clinic_char, '%m/%d/%Y')
elcolegio.raw$date_onset<-as.Date(elcolegio.raw$date_onset_char, '%m/%d/%Y')
elcolegio.raw$date_birth<-as.Date(elcolegio.raw$date_birth_char, '%m/%d/%Y')
elcolegio.raw$date_report_char<-elcolegio.raw$date_clinic_char<-elcolegio.raw$date_onset_char<-elcolegio.raw$date_birth_char<-NULL
elcolegio.raw$gender <- as.numeric(elcolegio.raw$sex=='F')
elcolegio.raw$age_grp <- 1
elcolegio.raw$age_grp[elcolegio.raw$age>20 & elcolegio.raw$age<=50] <- 2
elcolegio.raw$age_grp[elcolegio.raw$age>50] <- 3
elcolegio.raw$child <- as.numeric(elcolegio.raw$age_grp==1)
elcolegio.raw$senior <- as.numeric(elcolegio.raw$age_grp==3)
elcolegio.raw$day <- as.numeric(elcolegio.raw$date_onset - min(elcolegio.raw$date_onset) + 1)
elcolegio.raw <- elcolegio.raw[order(elcolegio.raw$date_onset, elcolegio.raw$gender, elcolegio.raw$age_grp),]
elcolegio.raw$count <- 1
elcolegio <- aggregate(count~date_onset, data=elcolegio.raw, FUN='sum')
elcolegio$day <- as.numeric(elcolegio$date_onset - min(elcolegio$date_onset) + 1)
elcolegio.raw$age_grp <- factor(elcolegio.raw$age_grp,
levels = c(1,2,3),
labels = c("Children", "Adults", "Senior"))
#elcolegio.raw$sex <- factor(elcolegio.raw$sex,
#levels = c(1,2),
#labels = c("M", "F"))
#Descriptive
#Epidemic curve
counts <- table(elcolegio.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, El Colegio",
xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(elcolegio.raw$sex)
sex.table
##
## F M
## 401 243
a<-prop.table(sex.table)
rbind(sex.table, a)
## F M
## sex.table 401.0000000 243.0000000
## a 0.6226708 0.3773292
#Plot
counts <- table(elcolegio.raw$sex)
barplot(counts, main="Casos de Zika por genero, El Colegio",
xlab="Genero")

#Grupo edad
counts <- table(elcolegio.raw$age)
barplot(counts, main="Casos de Zika por Edad, El Colegio",
xlab="Edad", col=c("red"))

summary(elcolegio.raw$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 14.00 29.00 31.44 47.00 94.00
counts <- table(elcolegio.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, El Colegio",
xlab="Grupo de Edad", col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(elcolegio.raw$sex, elcolegio.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, El Colegio",
xlab="Age group", col=c("red", "darkblue"),
legend = rownames(counts), beside=TRUE)

#plot.epicurve(elcolegio, 'Elcolegio, Colombia')
elcolegio.strata <- aggregate(count~day+gender+child+senior, data=elcolegio.raw, FUN='sum')
demo.elcolegio.raw<-read.csv('pop_elcolegio.csv', sep=';', header=TRUE, as.is=TRUE)
demo.elcolegio.raw$child<-as.numeric(demo.elcolegio.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.elcolegio.raw$senior<-as.numeric(demo.elcolegio.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.elcolegio.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.elcolegio.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.elcolegio <- rbind(demo.male, demo.female)
save(elcolegio, elcolegio.strata, demo.elcolegio, file='elcolegio.RData')
load('elcolegio.RData')
#############fusagasuga###############################
fusagasuga.raw<-read.csv("fusagasuga.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(fusagasuga.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant',
'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
fusagasuga.raw$date_report<-as.Date(fusagasuga.raw$date_report_char, '%m/%d/%Y')
fusagasuga.raw$date_clinic<-as.Date(fusagasuga.raw$date_clinic_char, '%m/%d/%Y')
fusagasuga.raw$date_onset<-as.Date(fusagasuga.raw$date_onset_char, '%m/%d/%Y')
fusagasuga.raw$date_birth<-as.Date(fusagasuga.raw$date_birth_char, '%m/%d/%Y')
fusagasuga.raw$date_report_char<-fusagasuga.raw$date_clinic_char<-fusagasuga.raw$date_onset_char<-fusagasuga.raw$date_birth_char<-NULL
fusagasuga.raw$gender <- as.numeric(fusagasuga.raw$sex=='F')
fusagasuga.raw$age_grp <- 1
fusagasuga.raw$age_grp[fusagasuga.raw$age>20 & fusagasuga.raw$age<=50] <- 2
fusagasuga.raw$age_grp[fusagasuga.raw$age>50] <- 3
fusagasuga.raw$child <- as.numeric(fusagasuga.raw$age_grp==1)
fusagasuga.raw$senior <- as.numeric(fusagasuga.raw$age_grp==3)
fusagasuga.raw$day <- as.numeric(fusagasuga.raw$date_onset - min(fusagasuga.raw$date_onset) + 1)
fusagasuga.raw <- fusagasuga.raw[order(fusagasuga.raw$date_onset, fusagasuga.raw$gender, fusagasuga.raw$age_grp),]
fusagasuga.raw$count <- 1
fusagasuga <- aggregate(count~date_onset, data=fusagasuga.raw, FUN='sum')
fusagasuga$day <- as.numeric(fusagasuga$date_onset - min(fusagasuga$date_onset) + 1)
fusagasuga.raw$age_grp <- factor(fusagasuga.raw$age_grp,
levels = c(1,2,3),
labels = c("Children", "Adults", "Senior"))
#fusagasuga.raw$sex <- factor(fusagasuga.raw$sex,
#levels = c(1,2),
#labels = c("M", "F"))
#Descriptive
#Epidemic curve
counts <- table(fusagasuga.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, Fusagasugá",
xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(fusagasuga.raw$sex)
sex.table
##
## F M
## 114 75
a<-prop.table(sex.table)
rbind(sex.table, a)
## F M
## sex.table 114.0000000 75.0000000
## a 0.6031746 0.3968254
#Plot
counts <- table(fusagasuga.raw$sex)
barplot(counts, main="Casos de Zika por genero, Fusagasuga",
xlab="Genero")

#Grupo edad
counts <- table(fusagasuga.raw$age)
barplot(counts, main="Casos de Zika por Edad, Fusagasuga",
xlab="Edad", col=c("red"))

summary(fusagasuga.raw$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 18.00 29.00 33.41 49.00 72.00
counts <- table(fusagasuga.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, Fusagasuga",
xlab="Grupo de Edad", col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(fusagasuga.raw$sex, fusagasuga.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, Fusagasuga",
xlab="Age group", col=c("red", "darkblue"),
legend = rownames(counts), beside=TRUE)

#plot.epicurve(fusagasuga, 'Fusagasuga, Colombia')
fusagasuga.strata <- aggregate(count~day+gender+child+senior, data=fusagasuga.raw, FUN='sum')
demo.fusagasuga.raw<-read.csv('pop_fusagasuga.csv', sep=';', header=TRUE, as.is=TRUE)
demo.fusagasuga.raw$child<-as.numeric(demo.fusagasuga.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.fusagasuga.raw$senior<-as.numeric(demo.fusagasuga.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.fusagasuga.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.fusagasuga.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.fusagasuga <- rbind(demo.male, demo.female)
save(fusagasuga, fusagasuga.strata, demo.fusagasuga, file='fusagasuga.RData')
load('fusagasuga.RData')
###############################ANAPOIMA######################################################
anapoima.raw<-read.csv("anapoima.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(anapoima.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant',
'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
anapoima.raw$date_report<-as.Date(anapoima.raw$date_report_char, '%m/%d/%Y')
anapoima.raw$date_clinic<-as.Date(anapoima.raw$date_clinic_char, '%m/%d/%Y')
anapoima.raw$date_onset<-as.Date(anapoima.raw$date_onset_char, '%m/%d/%Y')
anapoima.raw$date_birth<-as.Date(anapoima.raw$date_birth_char, '%m/%d/%Y')
anapoima.raw$date_report_char<-anapoima.raw$date_clinic_char<-anapoima.raw$date_onset_char<-anapoima.raw$date_birth_char<-NULL
anapoima.raw$gender <- as.numeric(anapoima.raw$sex=='F')
anapoima.raw$age_grp <- 1
anapoima.raw$age_grp[anapoima.raw$age>20 & anapoima.raw$age<=50] <- 2
anapoima.raw$age_grp[anapoima.raw$age>50] <- 3
anapoima.raw$child <- as.numeric(anapoima.raw$age_grp==1)
anapoima.raw$senior <- as.numeric(anapoima.raw$age_grp==3)
anapoima.raw$day <- as.numeric(anapoima.raw$date_onset - min(anapoima.raw$date_onset) + 1)
anapoima.raw <- anapoima.raw[order(anapoima.raw$date_onset, anapoima.raw$gender, anapoima.raw$age_grp),]
anapoima.raw$count <- 1
anapoima <- aggregate(count~date_onset, data=anapoima.raw, FUN='sum')
anapoima$day <- as.numeric(anapoima$date_onset - min(anapoima$date_onset) + 1)
anapoima.raw$age_grp <- factor(anapoima.raw$age_grp,
levels = c(1,2,3),
labels = c("Children", "Adults", "Senior"))
#anapoima.raw$sex <- factor(anapoima.raw$sex,
#levels = c(1,2),
#labels = c("M", "F"))
#Descriptive
#Epidemic curve
counts <- table(anapoima.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, Anapoima",
xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(anapoima.raw$sex)
sex.table
##
## F M
## 92 71
a<-prop.table(sex.table)
rbind(sex.table, a)
## F M
## sex.table 92.0000000 71.0000000
## a 0.5644172 0.4355828
#Plot
counts <- table(anapoima.raw$sex)
barplot(counts, main="Casos de Zika por genero, Anapoima",
xlab="Genero")

#Grupo edad
counts <- table(anapoima.raw$age)
barplot(counts, main="Casos de Zika por Edad, Anapoima",
xlab="Edad", col=c("red"))

summary(anapoima.raw$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 22.50 39.00 38.52 52.00 89.00
counts <- table(anapoima.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, Anapoima",
xlab="Grupo de Edad", col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(anapoima.raw$sex, anapoima.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, Anapoima",
xlab="Age group", col=c("red", "darkblue"),
legend = rownames(counts), beside=TRUE)

#plot.epicurve(anapoima, 'Anapoima, Colombia')
anapoima.strata <- aggregate(count~day+gender+child+senior, data=anapoima.raw, FUN='sum')
demo.anapoima.raw<-read.csv('pop_anapoima.csv', sep=';', header=TRUE, as.is=TRUE)
demo.anapoima.raw$child<-as.numeric(demo.anapoima.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.anapoima.raw$senior<-as.numeric(demo.anapoima.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.anapoima.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.anapoima.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.anapoima <- rbind(demo.male, demo.female)
save(anapoima, anapoima.strata, demo.anapoima, file='anapoima.RData')
load('anapoima.RData')
################LA PALMA######################
lapalma.raw<-read.csv("lapalma.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(lapalma.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant',
'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
lapalma.raw$date_report<-as.Date(lapalma.raw$date_report_char, '%m/%d/%Y')
lapalma.raw$date_clinic<-as.Date(lapalma.raw$date_clinic_char, '%m/%d/%Y')
lapalma.raw$date_onset<-as.Date(lapalma.raw$date_onset_char, '%m/%d/%Y')
lapalma.raw$date_birth<-as.Date(lapalma.raw$date_birth_char, '%m/%d/%Y')
lapalma.raw$date_report_char<-lapalma.raw$date_clinic_char<-lapalma.raw$date_onset_char<-lapalma.raw$date_birth_char<-NULL
lapalma.raw$gender <- as.numeric(lapalma.raw$sex=='F')
lapalma.raw$age_grp <- 1
lapalma.raw$age_grp[lapalma.raw$age>20 & lapalma.raw$age<=50] <- 2
lapalma.raw$age_grp[lapalma.raw$age>50] <- 3
lapalma.raw$child <- as.numeric(lapalma.raw$age_grp==1)
lapalma.raw$senior <- as.numeric(lapalma.raw$age_grp==3)
lapalma.raw$day <- as.numeric(lapalma.raw$date_onset - min(lapalma.raw$date_onset) + 1)
lapalma.raw <- lapalma.raw[order(lapalma.raw$date_onset, lapalma.raw$gender, lapalma.raw$age_grp),]
lapalma.raw$count <- 1
lapalma <- aggregate(count~date_onset, data=lapalma.raw, FUN='sum')
lapalma$day <- as.numeric(lapalma$date_onset - min(lapalma$date_onset) + 1)
lapalma.raw$age_grp <- factor(lapalma.raw$age_grp,
levels = c(1,2,3),
labels = c("Children", "Adults", "Senior"))
#lapalma.raw$sex <- factor(lapalma.raw$sex,
#levels = c(1,2),
#labels = c("M", "F"))
#Descriptive
#Epidemic curve
counts <- table(lapalma.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, Lapalma",
xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(lapalma.raw$sex)
sex.table
##
## F M
## 99 60
a<-prop.table(sex.table)
rbind(sex.table, a)
## F M
## sex.table 99.0000000 60.0000000
## a 0.6226415 0.3773585
#Plot
counts <- table(lapalma.raw$sex)
barplot(counts, main="Casos de Zika por genero, La Palma",
xlab="Genero")

#Grupo edad
counts <- table(lapalma.raw$age)
barplot(counts, main="Casos de Zika por Edad, La Palma ",
xlab="Edad", col=c("red"))

summary(lapalma.raw$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 12.00 30.00 32.25 50.00 78.00
counts <- table(lapalma.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, La Palma ",
xlab="Grupo de Edad", col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(lapalma.raw$sex, lapalma.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, La Palma ", xlab="Age group", col=c("red", "darkblue"), legend = rownames(counts), beside=TRUE)

#plot.epicurve(lapalma, ' La Palma, Colombia')
lapalma.strata <- aggregate(count~day+gender+child+senior, data=lapalma.raw, FUN='sum')
demo.lapalma.raw<-read.csv('pop_lapalma.csv', sep=';', header=TRUE, as.is=TRUE)
demo.lapalma.raw$child<-as.numeric(demo.lapalma.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.lapalma.raw$senior<-as.numeric(demo.lapalma.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.lapalma.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.lapalma.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.lapalma <- rbind(demo.male, demo.female)
save(lapalma, lapalma.strata, demo.lapalma, file='lapalma.RData')
load('lapalma.RData')
#################EPIESTIM#####################################################################################################################
source('EstimationR.R')
fill.dates <- function(cases)
{
cases.sort <- cases[order(cases$date_onset), ]
tmp.date<-tmp.day<-NULL
for(i in 2:nrow(cases))
{
d <- as.numeric(cases.sort$date_onset[i]-cases.sort$date_onset[i-1])
if(d>1)
{
for(j in 1:(d-1))
{
tmp.date <- c(tmp.date, cases.sort$date_onset[i-1]+j)
tmp.day <- c(tmp.day, cases.sort$day[i-1]+j)
}
}
}
if(length(tmp.date) > 0)
{
data.tmp<-data.frame(tmp.date, rep(0,length(tmp.date)), tmp.day)
colnames(data.tmp) <- c('date_onset', 'count', 'day')
data.tmp$date_onset <- as.Date(data.tmp$date_onset, origin = "1970-01-01")
data.new <- rbind(cases, data.tmp)
data.new <- data.new[order(data.new$date_onset),]
return(data.new)
} else return(cases.sort)
}
fit.for.plot <- function(cases, start.time, stop.time, SI.mean, SI.sd, main.text)
{
#cases<-cundinamarca; start.time<-8:67; stop.time<-14:73; SI.mean<-c(10,15,20); SI.sd<-c(3,3,3); main.text<-'Cundinamarca, Colombia'
R.mean <- R.lower <- R.upper <- NULL
for(i in 1:length(SI.mean))
{
fit<-EstimateR(cases$count, T.Start=start.time, T.End=stop.time, method=c("ParametricSI"),
Mean.SI=SI.mean[i], Std.SI=SI.sd[i], plot=TRUE)
R.mean <- rbind(R.mean, fit$R[,3])
R.lower <- rbind(R.lower, fit$R[,5])
R.upper <- rbind(R.upper, fit$R[,11])
}
R.max <- max(R.upper, na.rm=TRUE)
#par(mfrow=c(2,1))
plot(stop.time, R.mean[1,], type='n', xlim=c(0,nrow(cases)), ylim=c(0, R.max), xaxt='n', xlab='', ylab='R', main=main.text)
k<-which(!is.na(R.upper[1,]) & !is.na(R.lower[1,]))
polygon(c(rev(stop.time[k]), stop.time[k]), c(rev(R.upper[1,k]), R.lower[1,k]), col = 'grey80', border = NA)
k<-which(!is.na(R.upper[2,]) & !is.na(R.lower[2,]))
polygon(c(rev(stop.time[k]), stop.time[k]), c(rev(R.upper[2,k]), R.lower[2,k]), col = 'grey80', border = NA)
k<-which(!is.na(R.upper[3,]) & !is.na(R.lower[3,]))
polygon(c(rev(stop.time[k]), stop.time[k]), c(rev(R.upper[3,k]), R.lower[3,k]), col = 'grey80', border = NA)
lines(stop.time, R.mean[1,], lwd=1.5, lty=1, col='green')
lines(stop.time, R.mean[2,], lwd=1.5, lty=2, col='red')
lines(stop.time, R.mean[3,], lwd=1.5, lty=3, col='blue')
legend("topright", legend=paste('Mean SI: ', SI.mean, ' days', sep=''),
lty=c(1,2,3), lwd=c(2,2,2), col=c('green', 'red', 'blue'), text.width=30)
x<-1:nrow(cases)
x<-c(x-0.5, x+0.5)
k<-order(x)
x<-x[k]
y<-rep(cases$count,each=2)
plot(x,y, ylim=c(0, max(y)), type='l', lty=1, lwd=1.5, xlab='Day', ylab='Case Number')
}
# fitting the model using EpiEstim
par(mfcol=c(2,2))
####### PARA HACER LAS CURVAS#####
tmp <- fill.dates(cundinamarca)
fit.for.plot(tmp, 8:284, 14:290, c(10,15,20), c(3,3,3), 'Cundinamarca, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.


write.csv(cundinamarca, file="cundinamarcaR.csv") #guardamos en un archivo CSV###
tmp <- fill.dates(girardot)
fit.for.plot(tmp, 8:84, 14:90, c(10,15,20), c(3,3,3), 'Girardot, Colombia')




tmp <- fill.dates(elcolegio)
fit.for.plot(tmp, 8:84, 14:90, c(10,15,20), c(3,3,3), 'El Colegio, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.


tmp <- fill.dates(lamesa)
fit.for.plot(tmp, 8:84, 14:90, c(10,15,20), c(3,3,3), 'La Mesa, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.


tmp <- fill.dates(fusagasuga)
fit.for.plot(tmp, 8:84, 14:90, c(10,15,20), c(3,3,3), 'Fusagasuga, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.


tmp <- fill.dates(anapoima)
fit.for.plot(tmp, 8:84, 14:90, c(10,15,20), c(3,3,3), 'Anapoima, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.


tmp <- fill.dates(lapalma)
fit.for.plot(tmp, 8:184, 14:190, c(10,15,20), c(3,3,3), 'La Palma, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.


######AHORA TRABAJO CON EL SCRIPT "EstimationR"########
#############################################
#############################################
# Functions #
#############################################
#############################################
#########################################################
# Discretized serial interval (assuming a shifted gamma #
# distribution (with shift 1) #
#########################################################
DiscrSI<-function(k,mu,sigma)
{
if(sigma<0)
{
stop("sigma must be >=0.")
}
a <- ((mu-1)/sigma)^2
b <- sigma^2/(mu-1)
CDFGamma<-function(k,a,b)
{
return(pgamma(k,shape=a,scale=b))
}
res <- k*CDFGamma(k,a,b)+(k-2)*CDFGamma(k-2,a,b)-2*(k-1)*CDFGamma(k-1,a,b)
res <- res+a*b*(2*CDFGamma(k-1,a+1,b)-CDFGamma(k-2,a+1,b)-CDFGamma(k,a+1,b))
res <- max(0,res)
return(res)
}
#########################################################
# Calculates Lambda_t = Sum_1^t I_{t-s} * w_s #
# with I incidence and w discrete SI distribution #
#########################################################
OverallInfectivity <-function (I,SI.Distr)
{
if(is.vector(I)==FALSE)
{
stop("I must be a vector.")
}
T<-length(I)
for(i in 1:T)
{
if(I[i]<0)
{
stop("I must be a positive vector.")
}
}
if(is.vector(SI.Distr)==FALSE)
{
stop("SI.Distr must be a vector.")
}
if(SI.Distr[1]!=0)
{
stop("SI.Distr[1] needs to be 0.")
}
if(length(SI.Distr)>1)
{
for(i in 2:length(SI.Distr))
{
if(SI.Distr[i]<0)
{
stop("SI.Distr must be a positive vector.")
}
}
}
if(abs(sum(SI.Distr)-1)>0.01)
{
stop("SI.Distr must sum to 1.")
}
lambda <- vector()
lambda[2:length(I)] <- sapply(2:length(I), function(t) sum(SI.Distr[1:t]*I[t:1],na.rm=TRUE))
return(lambda)
}
#########################################################
# Main functions #
#########################################################
EstimateR<-function(I,T.Start,T.End,method=c("NonParametricSI","ParametricSI","UncertainSI"),n1=NULL,n2=NULL,Mean.SI=NULL,Std.SI=NULL,Std.Mean.SI=NULL,Min.Mean.SI=NULL,Max.Mean.SI=NULL,Std.Std.SI=NULL,Min.Std.SI=NULL,Max.Std.SI=NULL,SI.Distr=NULL,Mean.Prior=5,Std.Prior=5,CV.Posterior=0.3,plot=FALSE,leg.pos="topright")
{
}
### Functions ###
#########################################################
# Calculates the cumulative incidence over time steps #
#########################################################
CalculIncidencePerTimeStep <-function (I,T.Start,T.End)
{
NbTimePeriods <- length(T.Start)
IncidencePerTimeStep <- sapply(1:NbTimePeriods, function(i) sum(I[T.Start[i]:T.End[i]]))
return(IncidencePerTimeStep)
}
#########################################################
# Calculates the parameters of the Gamma posterior #
# distribution from the discrete SI distribution #
#########################################################
PosteriorFromSIDistr <-function (I,SI.Distr,a.Prior,b.Prior,T.Start,T.End)
{
NbTimePeriods<-length(T.Start)
lambda <- OverallInfectivity(I,SI.Distr)
FinalMean.SI<-sum(SI.Distr*(0:(length(SI.Distr)-1)))
a.Posterior<-vector()
b.Posterior<-vector()
a.Posterior <- lapply(1:(NbTimePeriods), function(t) if(T.End[t]>FinalMean.SI){a.Prior + sum(I[T.Start[t]:T.End[t]])}else{NA})
b.Posterior <- lapply(1:(NbTimePeriods), function(t) if(T.End[t]>FinalMean.SI){1 / ( 1/b.Prior + sum(lambda[T.Start[t]:T.End[t]]) )}else{NA})
return(list(a.Posterior,b.Posterior))
}
#########################################################
# Samples from the Gamma posterior distribution for a #
# given mean SI and std SI #
#########################################################
SampleFromPosterior <-function (SampleSize,I,Mean.SI,Std.SI,a.Prior,b.Prior,T.Start,T.End)
{
NbTimePeriods<-length(T.Start)
SI.Distr <- sapply(1:T, function(t) DiscrSI(t-1,Mean.SI,Std.SI))
FinalMean.SI <- sum(SI.Distr*(0:(length(SI.Distr)-1)))
lambda <- OverallInfectivity(I,SI.Distr)
a.Posterior <- vector()
b.Posterior <- vector()
a.Posterior <- sapply(1:(NbTimePeriods), function(t) if(T.End[t]>FinalMean.SI){a.Prior + sum(I[T.Start[t]:T.End[t]])}else{NA})
b.Posterior <- sapply(1:(NbTimePeriods), function(t) if(T.End[t]>FinalMean.SI){1 / ( 1/b.Prior + sum(lambda[T.Start[t]:T.End[t]],na.rm=TRUE) )}else{NA})
SampleR.Posterior <- sapply(1:(NbTimePeriods), function(t) if(!is.na(a.Posterior[t])){rgamma(SampleSize,shape=unlist(a.Posterior[t]), scale = unlist(b.Posterior[t]))}else{rep(NA,SampleSize)})
return(list(SampleR.Posterior,SI.Distr))
}
