setwd("F:/AAA TESIS 2017/Resultados/CalculoR2/taller")
cundinamarca.raw<-read.csv("cundinamarca.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(cundinamarca.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant', 
                         'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
cundinamarca.raw$date_report<-as.Date(cundinamarca.raw$date_report_char, '%m/%d/%Y')
cundinamarca.raw$date_clinic<-as.Date(cundinamarca.raw$date_clinic_char, '%m/%d/%Y')
cundinamarca.raw$date_onset<-as.Date(cundinamarca.raw$date_onset_char, '%m/%d/%Y')
cundinamarca.raw$date_birth<-as.Date(cundinamarca.raw$date_birth_char, '%m/%d/%Y')
# cundinamarca.raw$date_report_char<-cundinamarca.raw$date_clinic_char<-cundinamarca.raw$date_onset_char<-cundinamarca.raw$date_birth_char<-NULL
cundinamarca.raw$gender <- as.numeric(cundinamarca.raw$sex=='F')
cundinamarca.raw$age_grp <- 1
cundinamarca.raw$age_grp[cundinamarca.raw$age>20 & cundinamarca.raw$age<=50] <- 2
cundinamarca.raw$age_grp[cundinamarca.raw$age>50] <- 3
cundinamarca.raw$child <- as.numeric(cundinamarca.raw$age_grp==1)
cundinamarca.raw$senior <- as.numeric(cundinamarca.raw$age_grp==3)
cundinamarca.raw$day <- as.numeric(cundinamarca.raw$date_onset - min(cundinamarca.raw$date_onset) + 1)
cundinamarca.raw <- cundinamarca.raw[order(cundinamarca.raw$date_onset, cundinamarca.raw$gender, cundinamarca.raw$age_grp),]
cundinamarca.raw$count <- 1
cundinamarca <- aggregate(count~date_onset, data=cundinamarca.raw, FUN='sum')
cundinamarca$day <- as.numeric(cundinamarca$date_onset - min(cundinamarca$date_onset) + 1)
cundinamarca.raw$age_grp <- factor(cundinamarca.raw$age_grp,
                               levels = c(1,2,3),
                               labels = c("Children", "Adults", "Senior"))
cundinamarca.raw$sex <- factor(cundinamarca.raw$sex,
                               #levels = c(1,2),
                               #labels = c("M", "F")) 
)
#Descriptive 
#Epidemic curve
counts <- table(cundinamarca.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, Cundinamarca", 
        xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(cundinamarca.raw$sex)
sex.table
## 
##    F    M 
## 3065 2021
a<-prop.table(sex.table)
rbind(sex.table, a)
##                      F            M
## sex.table 3065.0000000 2021.0000000
## a            0.6026347    0.3973653
#Plot
counts <- table(cundinamarca.raw$sex)
barplot(counts, main="Casos de Zika por genero, Cundinamarca", 
        xlab="Genero")

#Grupo edad
counts <- table(cundinamarca.raw$age)
barplot(counts, main="Casos de Zika por Edad, Cundinamarca", 
        xlab="Edad", col=c("red"))

summary(cundinamarca.raw$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   20.00   32.00   33.45   46.00   94.00
counts <- table(cundinamarca.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, Cundinamarca", 
        xlab="Grupo de Edad",  col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(cundinamarca.raw$sex, cundinamarca.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, Cundinamarca",
        xlab="Age group", col=c("red", "darkblue"),
        legend = rownames(counts), beside=TRUE)

#plot.epicurve(cundinamarca, 'Cundinamarca, Colombia')
cundinamarca.strata <- aggregate(count~day+gender+child+senior, data=cundinamarca.raw, FUN='sum')
demo.cundinamarca.raw<-read.csv('pop_cundinamarca.csv', sep=';', header=TRUE, as.is=TRUE)
demo.cundinamarca.raw$child<-as.numeric(demo.cundinamarca.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.cundinamarca.raw$senior<-as.numeric(demo.cundinamarca.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.cundinamarca.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.cundinamarca.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.cundinamarca <- rbind(demo.male, demo.female)
save(cundinamarca, cundinamarca.strata, demo.cundinamarca, file='cundinamarca.RData')
load('cundinamarca.RData')

######GIRARDOT##########
girardot.raw<-read.csv('Girardot_update.csv', sep=';', header=TRUE, as.is=TRUE)
colnames(girardot.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant', 
                         'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
girardot.raw$date_report<-as.Date(girardot.raw$date_report_char, '%m/%d/%Y')
girardot.raw$date_clinic<-as.Date(girardot.raw$date_clinic_char, '%m/%d/%Y')
girardot.raw$date_onset<-as.Date(girardot.raw$date_onset_char, '%m/%d/%Y')
girardot.raw$date_birth<-as.Date(girardot.raw$date_birth_char, '%m/%d/%Y')
girardot.raw$date_report_char<-girardot.raw$date_clinic_char<-girardot.raw$date_onset_char<-girardot.raw$date_birth_char<-NULL
girardot.raw$gender <- as.numeric(girardot.raw$sex=='F')
girardot.raw$age_grp <- 1
girardot.raw$age_grp[girardot.raw$age>20 & girardot.raw$age<=50] <- 2
girardot.raw$age_grp[girardot.raw$age>50] <- 3
girardot.raw$child <- as.numeric(girardot.raw$age_grp==1)
girardot.raw$senior <- as.numeric(girardot.raw$age_grp==3)
girardot.raw$day <- as.numeric(girardot.raw$date_onset - min(girardot.raw$date_onset) + 1)
girardot.raw <- girardot.raw[order(girardot.raw$date_onset, girardot.raw$gender, girardot.raw$age_grp),]
girardot.raw$count <- 1
girardot <- aggregate(count~date_onset, data=girardot.raw, FUN='sum')
girardot$day <- as.numeric(girardot$date_onset - min(girardot$date_onset) + 1)
girardot.raw$age_grp <- factor(girardot.raw$age_grp,
                               levels = c(1,2,3),
                               labels = c("Children", "Adults", "Senior"))
#girardot.raw$sex <- factor(girardot.raw$sex,
                               #levels = c(1,2),
                               #labels = c("M", "F"))
#Descriptive 
#Epidemic curve

counts <- table(girardot.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, Girardot", 
        xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(girardot.raw$sex)
sex.table
## 
##    F    M 
## 1157  779
a<-prop.table(sex.table)
rbind(sex.table, a)
##                     F          M
## sex.table 1157.000000 779.000000
## a            0.597624   0.402376
#Plot
counts <- table(girardot.raw$sex)
barplot(counts, main="Casos de Zika por genero, Girardot", 
        xlab="Genero")

#Grupo edad
counts <- table(girardot.raw$age)
barplot(counts, main="Casos de Zika por Edad, Girardot", 
        xlab="Edad", col=c("red"))

summary(girardot.raw$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   24.00   34.00   34.81   46.00   89.00
counts <- table(girardot.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, Girardot", 
        xlab="Grupo de Edad",  col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(girardot.raw$sex, girardot.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, Girardot",
        xlab="Age group", col=c("red", "darkblue"),
        legend = rownames(counts), beside=TRUE)

#plot.epicurve(girardot, 'Girardot, Colombia')
girardot.strata <- aggregate(count~day+gender+child+senior, data=girardot.raw, FUN='sum')
demo.girardot.raw<-read.csv('pop_Girardot.csv', sep=';', header=TRUE, as.is=TRUE)
demo.girardot.raw$child<-as.numeric(demo.girardot.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.girardot.raw$senior<-as.numeric(demo.girardot.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.girardot.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.girardot.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.girardot <- rbind(demo.male, demo.female)
save(girardot, girardot.strata, demo.girardot, file='girardot.RData')
load('girardot.RData')

##################la mesa##########
lamesa.raw<-read.csv("lamesa.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(lamesa.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant', 
                         'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
lamesa.raw$date_report<-as.Date(lamesa.raw$date_report_char, '%m/%d/%Y')
lamesa.raw$date_clinic<-as.Date(lamesa.raw$date_clinic_char, '%m/%d/%Y')
lamesa.raw$date_onset<-as.Date(lamesa.raw$date_onset_char, '%m/%d/%Y')
lamesa.raw$date_birth<-as.Date(lamesa.raw$date_birth_char, '%m/%d/%Y')
lamesa.raw$date_report_char<-lamesa.raw$date_clinic_char<-lamesa.raw$date_onset_char<-lamesa.raw$date_birth_char<-NULL
lamesa.raw$gender <- as.numeric(lamesa.raw$sex=='F')
lamesa.raw$age_grp <- 1
lamesa.raw$age_grp[lamesa.raw$age>20 & lamesa.raw$age<=50] <- 2
lamesa.raw$age_grp[lamesa.raw$age>50] <- 3
lamesa.raw$child <- as.numeric(lamesa.raw$age_grp==1)
lamesa.raw$senior <- as.numeric(lamesa.raw$age_grp==3)
lamesa.raw$day <- as.numeric(lamesa.raw$date_onset - min(lamesa.raw$date_onset) + 1)
lamesa.raw <- lamesa.raw[order(lamesa.raw$date_onset, lamesa.raw$gender, lamesa.raw$age_grp),]
lamesa.raw$count <- 1
lamesa <- aggregate(count~date_onset, data=lamesa.raw, FUN='sum')
lamesa$day <- as.numeric(lamesa$date_onset - min(lamesa$date_onset) + 1)
lamesa.raw$age_grp <- factor(lamesa.raw$age_grp,
                               levels = c(1,2,3),
                               labels = c("Children", "Adults", "Senior"))

#lamesa.raw$sex <- factor(lamesa.raw$sex,
                               #levels = c(1,2),
                               #labels = c("M", "F"))

#Descriptive 
#Epidemic curve
counts <- table(lamesa.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, La Mesa", 
        xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(lamesa.raw$sex)
sex.table
## 
##   F   M 
## 264 193
a<-prop.table(sex.table)
rbind(sex.table, a)
##                     F           M
## sex.table 264.0000000 193.0000000
## a           0.5776805   0.4223195
#Plot
counts <- table(lamesa.raw$sex)
barplot(counts, main="Casos de Zika por genero, La Mesa", 
        xlab="Genero")

#Grupo edad
counts <- table(lamesa.raw$age)
barplot(counts, main="Casos de Zika por Edad, La Mesa", 
        xlab="Edad", col=c("red"))

summary(lamesa.raw$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   19.00   32.00   33.03   46.00   85.00
counts <- table(lamesa.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, La Mesa", 
        xlab="Grupo de Edad",  col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(lamesa.raw$sex, lamesa.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, La Mesa",
        xlab="Age group", col=c("red", "darkblue"),
        legend = rownames(counts), beside=TRUE)

#plot.epicurve(lamesa, 'LaMesa, Colombia')
lamesa.strata <- aggregate(count~day+gender+child+senior, data=lamesa.raw, FUN='sum')
demo.lamesa.raw<-read.csv('pop_lamesa.csv', sep=';', header=TRUE, as.is=TRUE)
demo.lamesa.raw$child<-as.numeric(demo.lamesa.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.lamesa.raw$senior<-as.numeric(demo.lamesa.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.lamesa.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.lamesa.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.lamesa <- rbind(demo.male, demo.female)
save(lamesa, lamesa.strata, demo.lamesa, file='lamesa.RData')
load('lamesa.RData')


################EL COLEGIO##################################################
elcolegio.raw<-read.csv("elcolegio.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(elcolegio.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant', 
                         'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
elcolegio.raw$date_report<-as.Date(elcolegio.raw$date_report_char, '%m/%d/%Y')
elcolegio.raw$date_clinic<-as.Date(elcolegio.raw$date_clinic_char, '%m/%d/%Y')
elcolegio.raw$date_onset<-as.Date(elcolegio.raw$date_onset_char, '%m/%d/%Y')
elcolegio.raw$date_birth<-as.Date(elcolegio.raw$date_birth_char, '%m/%d/%Y')
elcolegio.raw$date_report_char<-elcolegio.raw$date_clinic_char<-elcolegio.raw$date_onset_char<-elcolegio.raw$date_birth_char<-NULL
elcolegio.raw$gender <- as.numeric(elcolegio.raw$sex=='F')
elcolegio.raw$age_grp <- 1
elcolegio.raw$age_grp[elcolegio.raw$age>20 & elcolegio.raw$age<=50] <- 2
elcolegio.raw$age_grp[elcolegio.raw$age>50] <- 3
elcolegio.raw$child <- as.numeric(elcolegio.raw$age_grp==1)
elcolegio.raw$senior <- as.numeric(elcolegio.raw$age_grp==3)
elcolegio.raw$day <- as.numeric(elcolegio.raw$date_onset - min(elcolegio.raw$date_onset) + 1)
elcolegio.raw <- elcolegio.raw[order(elcolegio.raw$date_onset, elcolegio.raw$gender, elcolegio.raw$age_grp),]
elcolegio.raw$count <- 1
elcolegio <- aggregate(count~date_onset, data=elcolegio.raw, FUN='sum')
elcolegio$day <- as.numeric(elcolegio$date_onset - min(elcolegio$date_onset) + 1)
elcolegio.raw$age_grp <- factor(elcolegio.raw$age_grp,
                               levels = c(1,2,3),
                               labels = c("Children", "Adults", "Senior"))

#elcolegio.raw$sex <- factor(elcolegio.raw$sex,
                               #levels = c(1,2),
                               #labels = c("M", "F"))

#Descriptive 
#Epidemic curve
counts <- table(elcolegio.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, El Colegio", 
        xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(elcolegio.raw$sex)
sex.table
## 
##   F   M 
## 401 243
a<-prop.table(sex.table)
rbind(sex.table, a)
##                     F           M
## sex.table 401.0000000 243.0000000
## a           0.6226708   0.3773292
#Plot
counts <- table(elcolegio.raw$sex)
barplot(counts, main="Casos de Zika por genero, El Colegio", 
        xlab="Genero")

#Grupo edad
counts <- table(elcolegio.raw$age)
barplot(counts, main="Casos de Zika por Edad, El Colegio", 
        xlab="Edad", col=c("red"))

summary(elcolegio.raw$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   14.00   29.00   31.44   47.00   94.00
counts <- table(elcolegio.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, El Colegio", 
        xlab="Grupo de Edad",  col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(elcolegio.raw$sex, elcolegio.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, El Colegio",
        xlab="Age group", col=c("red", "darkblue"),
        legend = rownames(counts), beside=TRUE)

#plot.epicurve(elcolegio, 'Elcolegio, Colombia')
elcolegio.strata <- aggregate(count~day+gender+child+senior, data=elcolegio.raw, FUN='sum')
demo.elcolegio.raw<-read.csv('pop_elcolegio.csv', sep=';', header=TRUE, as.is=TRUE)
demo.elcolegio.raw$child<-as.numeric(demo.elcolegio.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.elcolegio.raw$senior<-as.numeric(demo.elcolegio.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.elcolegio.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.elcolegio.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.elcolegio <- rbind(demo.male, demo.female)
save(elcolegio, elcolegio.strata, demo.elcolegio, file='elcolegio.RData')
load('elcolegio.RData')

#############fusagasuga###############################

fusagasuga.raw<-read.csv("fusagasuga.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(fusagasuga.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant', 
                         'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
fusagasuga.raw$date_report<-as.Date(fusagasuga.raw$date_report_char, '%m/%d/%Y')
fusagasuga.raw$date_clinic<-as.Date(fusagasuga.raw$date_clinic_char, '%m/%d/%Y')
fusagasuga.raw$date_onset<-as.Date(fusagasuga.raw$date_onset_char, '%m/%d/%Y')
fusagasuga.raw$date_birth<-as.Date(fusagasuga.raw$date_birth_char, '%m/%d/%Y')
fusagasuga.raw$date_report_char<-fusagasuga.raw$date_clinic_char<-fusagasuga.raw$date_onset_char<-fusagasuga.raw$date_birth_char<-NULL
fusagasuga.raw$gender <- as.numeric(fusagasuga.raw$sex=='F')
fusagasuga.raw$age_grp <- 1
fusagasuga.raw$age_grp[fusagasuga.raw$age>20 & fusagasuga.raw$age<=50] <- 2
fusagasuga.raw$age_grp[fusagasuga.raw$age>50] <- 3
fusagasuga.raw$child <- as.numeric(fusagasuga.raw$age_grp==1)
fusagasuga.raw$senior <- as.numeric(fusagasuga.raw$age_grp==3)
fusagasuga.raw$day <- as.numeric(fusagasuga.raw$date_onset - min(fusagasuga.raw$date_onset) + 1)
fusagasuga.raw <- fusagasuga.raw[order(fusagasuga.raw$date_onset, fusagasuga.raw$gender, fusagasuga.raw$age_grp),]
fusagasuga.raw$count <- 1
fusagasuga <- aggregate(count~date_onset, data=fusagasuga.raw, FUN='sum')
fusagasuga$day <- as.numeric(fusagasuga$date_onset - min(fusagasuga$date_onset) + 1)
fusagasuga.raw$age_grp <- factor(fusagasuga.raw$age_grp,
                               levels = c(1,2,3),
                               labels = c("Children", "Adults", "Senior"))

#fusagasuga.raw$sex <- factor(fusagasuga.raw$sex,
                               #levels = c(1,2),
                               #labels = c("M", "F"))

#Descriptive 
#Epidemic curve
counts <- table(fusagasuga.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, Fusagasug?", 
        xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(fusagasuga.raw$sex)
sex.table
## 
##   F   M 
## 114  75
a<-prop.table(sex.table)
rbind(sex.table, a)
##                     F          M
## sex.table 114.0000000 75.0000000
## a           0.6031746  0.3968254
#Plot
counts <- table(fusagasuga.raw$sex)
barplot(counts, main="Casos de Zika por genero, Fusagasuga", 
        xlab="Genero")

#Grupo edad
counts <- table(fusagasuga.raw$age)
barplot(counts, main="Casos de Zika por Edad, Fusagasuga", 
        xlab="Edad", col=c("red"))

summary(fusagasuga.raw$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   18.00   29.00   33.41   49.00   72.00
counts <- table(fusagasuga.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, Fusagasuga", 
        xlab="Grupo de Edad",  col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(fusagasuga.raw$sex, fusagasuga.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, Fusagasuga",
        xlab="Age group", col=c("red", "darkblue"),
        legend = rownames(counts), beside=TRUE)

#plot.epicurve(fusagasuga, 'Fusagasuga, Colombia')
fusagasuga.strata <- aggregate(count~day+gender+child+senior, data=fusagasuga.raw, FUN='sum')
demo.fusagasuga.raw<-read.csv('pop_fusagasuga.csv', sep=';', header=TRUE, as.is=TRUE)
demo.fusagasuga.raw$child<-as.numeric(demo.fusagasuga.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.fusagasuga.raw$senior<-as.numeric(demo.fusagasuga.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.fusagasuga.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.fusagasuga.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.fusagasuga <- rbind(demo.male, demo.female)
save(fusagasuga, fusagasuga.strata, demo.fusagasuga, file='fusagasuga.RData')
load('fusagasuga.RData')
###############################ANAPOIMA######################################################

anapoima.raw<-read.csv("anapoima.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(anapoima.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant', 
                         'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
anapoima.raw$date_report<-as.Date(anapoima.raw$date_report_char, '%m/%d/%Y')
anapoima.raw$date_clinic<-as.Date(anapoima.raw$date_clinic_char, '%m/%d/%Y')
anapoima.raw$date_onset<-as.Date(anapoima.raw$date_onset_char, '%m/%d/%Y')
anapoima.raw$date_birth<-as.Date(anapoima.raw$date_birth_char, '%m/%d/%Y')
anapoima.raw$date_report_char<-anapoima.raw$date_clinic_char<-anapoima.raw$date_onset_char<-anapoima.raw$date_birth_char<-NULL
anapoima.raw$gender <- as.numeric(anapoima.raw$sex=='F')
anapoima.raw$age_grp <- 1
anapoima.raw$age_grp[anapoima.raw$age>20 & anapoima.raw$age<=50] <- 2
anapoima.raw$age_grp[anapoima.raw$age>50] <- 3
anapoima.raw$child <- as.numeric(anapoima.raw$age_grp==1)
anapoima.raw$senior <- as.numeric(anapoima.raw$age_grp==3)
anapoima.raw$day <- as.numeric(anapoima.raw$date_onset - min(anapoima.raw$date_onset) + 1)
anapoima.raw <- anapoima.raw[order(anapoima.raw$date_onset, anapoima.raw$gender, anapoima.raw$age_grp),]
anapoima.raw$count <- 1
anapoima <- aggregate(count~date_onset, data=anapoima.raw, FUN='sum')
anapoima$day <- as.numeric(anapoima$date_onset - min(anapoima$date_onset) + 1)
anapoima.raw$age_grp <- factor(anapoima.raw$age_grp,
                               levels = c(1,2,3),
                               labels = c("Children", "Adults", "Senior"))

#anapoima.raw$sex <- factor(anapoima.raw$sex,
                               #levels = c(1,2),
                               #labels = c("M", "F"))

#Descriptive 
#Epidemic curve
counts <- table(anapoima.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, Anapoima", 
        xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(anapoima.raw$sex)
sex.table
## 
##  F  M 
## 92 71
a<-prop.table(sex.table)
rbind(sex.table, a)
##                    F          M
## sex.table 92.0000000 71.0000000
## a          0.5644172  0.4355828
#Plot
counts <- table(anapoima.raw$sex)
barplot(counts, main="Casos de Zika por genero, Anapoima", 
        xlab="Genero")

#Grupo edad
counts <- table(anapoima.raw$age)
barplot(counts, main="Casos de Zika por Edad, Anapoima", 
        xlab="Edad", col=c("red"))

summary(anapoima.raw$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   22.50   39.00   38.52   52.00   89.00
counts <- table(anapoima.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, Anapoima", 
        xlab="Grupo de Edad",  col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(anapoima.raw$sex, anapoima.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, Anapoima",
        xlab="Age group", col=c("red", "darkblue"),
        legend = rownames(counts), beside=TRUE)

#plot.epicurve(anapoima, 'Anapoima, Colombia')
anapoima.strata <- aggregate(count~day+gender+child+senior, data=anapoima.raw, FUN='sum')
demo.anapoima.raw<-read.csv('pop_anapoima.csv', sep=';', header=TRUE, as.is=TRUE)
demo.anapoima.raw$child<-as.numeric(demo.anapoima.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.anapoima.raw$senior<-as.numeric(demo.anapoima.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.anapoima.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.anapoima.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.anapoima <- rbind(demo.male, demo.female)
save(anapoima, anapoima.strata, demo.anapoima, file='anapoima.RData')
load('anapoima.RData')


################LA PALMA######################

lapalma.raw<-read.csv("lapalma.csv", sep=";", header=TRUE, as.is=TRUE)
colnames(lapalma.raw)<-c('date_report_char', 'EW', 'year', 'age', 'uni_med', 'sex', 'pregnant', 
                         'date_clinic_char', 'date_onset_char', 'case_type', 'date_birth_char')
lapalma.raw$date_report<-as.Date(lapalma.raw$date_report_char, '%m/%d/%Y')
lapalma.raw$date_clinic<-as.Date(lapalma.raw$date_clinic_char, '%m/%d/%Y')
lapalma.raw$date_onset<-as.Date(lapalma.raw$date_onset_char, '%m/%d/%Y')
lapalma.raw$date_birth<-as.Date(lapalma.raw$date_birth_char, '%m/%d/%Y')
lapalma.raw$date_report_char<-lapalma.raw$date_clinic_char<-lapalma.raw$date_onset_char<-lapalma.raw$date_birth_char<-NULL
lapalma.raw$gender <- as.numeric(lapalma.raw$sex=='F')
lapalma.raw$age_grp <- 1
lapalma.raw$age_grp[lapalma.raw$age>20 & lapalma.raw$age<=50] <- 2
lapalma.raw$age_grp[lapalma.raw$age>50] <- 3
lapalma.raw$child <- as.numeric(lapalma.raw$age_grp==1)
lapalma.raw$senior <- as.numeric(lapalma.raw$age_grp==3)
lapalma.raw$day <- as.numeric(lapalma.raw$date_onset - min(lapalma.raw$date_onset) + 1)
lapalma.raw <- lapalma.raw[order(lapalma.raw$date_onset, lapalma.raw$gender, lapalma.raw$age_grp),]
lapalma.raw$count <- 1
lapalma <- aggregate(count~date_onset, data=lapalma.raw, FUN='sum')
lapalma$day <- as.numeric(lapalma$date_onset - min(lapalma$date_onset) + 1)
lapalma.raw$age_grp <- factor(lapalma.raw$age_grp,
                               levels = c(1,2,3),
                               labels = c("Children", "Adults", "Senior"))

#lapalma.raw$sex <- factor(lapalma.raw$sex,
                               #levels = c(1,2),
                               #labels = c("M", "F"))

#Descriptive 
#Epidemic curve
counts <- table(lapalma.raw$date_onset)
barplot(counts, main="Epidemic curve Zika, Lapalma", 
        xlab="Date of onset of Symptoms")

#Genero
sex.table <- table(lapalma.raw$sex)
sex.table
## 
##  F  M 
## 99 60
a<-prop.table(sex.table)
rbind(sex.table, a)
##                    F          M
## sex.table 99.0000000 60.0000000
## a          0.6226415  0.3773585
#Plot
counts <- table(lapalma.raw$sex)
barplot(counts, main="Casos de Zika por genero, La Palma", 
        xlab="Genero")

#Grupo edad
counts <- table(lapalma.raw$age)
barplot(counts, main="Casos de Zika por Edad, La Palma ", 
        xlab="Edad", col=c("red"))

summary(lapalma.raw$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   12.00   30.00   32.25   50.00   78.00
counts <- table(lapalma.raw$age_grp)
barplot(counts, main="Casos de Zika por Grupo edad, La Palma ", 
        xlab="Grupo de Edad",  col=c("orange"))

# Agrupado por grupo edad & genero
counts <- table(lapalma.raw$sex, lapalma.raw$age_grp)
barplot(counts, main="Casos de Zika por edad y genero, La Palma ", xlab="Age group", col=c("red", "darkblue"), legend = rownames(counts), beside=TRUE)

#plot.epicurve(lapalma, ' La Palma, Colombia')

lapalma.strata <- aggregate(count~day+gender+child+senior, data=lapalma.raw, FUN='sum')
demo.lapalma.raw<-read.csv('pop_lapalma.csv', sep=';', header=TRUE, as.is=TRUE)
demo.lapalma.raw$child<-as.numeric(demo.lapalma.raw$agegrp %in% c('0-4', '5-9', '10-14', '15-19'))
demo.lapalma.raw$senior<-as.numeric(demo.lapalma.raw$agegrp %in% c('50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '>80'))
demo.male <- aggregate(male~child+senior, data=demo.lapalma.raw, FUN='sum')
colnames(demo.male) <- c('child', 'senior', 'total')
demo.male$gender <- 0
demo.female <- aggregate(female~child+senior, data=demo.lapalma.raw, FUN='sum')
colnames(demo.female) <- c('child', 'senior', 'total')
demo.female$gender <- 1
demo.lapalma <- rbind(demo.male, demo.female)
save(lapalma, lapalma.strata, demo.lapalma, file='lapalma.RData')
load('lapalma.RData')

#################EPIESTIM#####################################################################################################################

source('EstimationR.R')
fill.dates <- function(cases)
{ 
   cases.sort <- cases[order(cases$date_onset), ]
   tmp.date<-tmp.day<-NULL
   for(i in 2:nrow(cases))
   {
      d <- as.numeric(cases.sort$date_onset[i]-cases.sort$date_onset[i-1])
      if(d>1)
      {
         for(j in 1:(d-1))
         {
            tmp.date <- c(tmp.date, cases.sort$date_onset[i-1]+j)
            tmp.day <- c(tmp.day, cases.sort$day[i-1]+j)
         }
      }
   }
   if(length(tmp.date) > 0)
   {
      data.tmp<-data.frame(tmp.date, rep(0,length(tmp.date)), tmp.day)
      colnames(data.tmp) <- c('date_onset', 'count', 'day')
      data.tmp$date_onset <- as.Date(data.tmp$date_onset, origin = "1970-01-01")
      data.new <- rbind(cases, data.tmp)
      data.new <- data.new[order(data.new$date_onset),]
      return(data.new)
   }  else  return(cases.sort)
}


fit.for.plot <- function(cases, start.time, stop.time, SI.mean, SI.sd, main.text)
{
#cases<-cundinamarca; start.time<-8:67; stop.time<-14:73; SI.mean<-c(10,15,20); SI.sd<-c(3,3,3); main.text<-'Cundinamarca, Colombia'
   R.mean <- R.lower <- R.upper <- NULL
   for(i in 1:length(SI.mean))
{
      fit<-EstimateR(cases$count, T.Start=start.time, T.End=stop.time, method=c("ParametricSI"),
                     Mean.SI=SI.mean[i], Std.SI=SI.sd[i], plot=TRUE)
      R.mean <- rbind(R.mean, fit$R[,3])
      R.lower <- rbind(R.lower, fit$R[,5])
      R.upper <- rbind(R.upper, fit$R[,11])
   }
   R.max <- max(R.upper, na.rm=TRUE)
   #par(mfrow=c(2,1))
   plot(stop.time, R.mean[1,], type='n', xlim=c(0,nrow(cases)), ylim=c(0, R.max), xaxt='n', xlab='', ylab='R', main=main.text)
   k<-which(!is.na(R.upper[1,]) & !is.na(R.lower[1,]))
   polygon(c(rev(stop.time[k]), stop.time[k]), c(rev(R.upper[1,k]), R.lower[1,k]), col = 'grey80', border = NA)
   k<-which(!is.na(R.upper[2,]) & !is.na(R.lower[2,]))
   polygon(c(rev(stop.time[k]), stop.time[k]), c(rev(R.upper[2,k]), R.lower[2,k]), col = 'grey80', border = NA)
   k<-which(!is.na(R.upper[3,]) & !is.na(R.lower[3,]))
   polygon(c(rev(stop.time[k]), stop.time[k]), c(rev(R.upper[3,k]), R.lower[3,k]), col = 'grey80', border = NA)
   lines(stop.time, R.mean[1,], lwd=1.5, lty=1, col='green')
   lines(stop.time, R.mean[2,], lwd=1.5, lty=2, col='red')
   lines(stop.time, R.mean[3,], lwd=1.5, lty=3, col='blue')
   legend("topright", legend=paste('Mean SI: ', SI.mean, ' days', sep=''),
          lty=c(1,2,3), lwd=c(2,2,2), col=c('green', 'red', 'blue'), text.width=30)
   x<-1:nrow(cases)
   x<-c(x-0.5, x+0.5)
   k<-order(x)
   x<-x[k]
   y<-rep(cases$count,each=2)
   plot(x,y, ylim=c(0, max(y)), type='l', lty=1, lwd=1.5, xlab='Day', ylab='Case Number')
}   
# fitting the model using EpiEstim
par(mfcol=c(2,2))  

####### PARA HACER LAS CURVAS#####
tmp <- fill.dates(cundinamarca) 
fit.for.plot(tmp, 8:284, 14:290, c(10,15,20), c(3,3,3), 'Cundinamarca, Colombia')  
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

write.csv(cundinamarca, file="cundinamarcaR.csv") #guardamos en un archivo CSV###

tmp <- fill.dates(girardot) 
fit.for.plot(tmp, 8:84, 14:90, c(10,15,20), c(3,3,3), 'Girardot, Colombia')

tmp <- fill.dates(elcolegio) 
fit.for.plot(tmp, 8:84, 14:90, c(10,15,20), c(3,3,3), 'El Colegio, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

tmp <- fill.dates(lamesa) 
fit.for.plot(tmp, 8:84, 14:90, c(10,15,20), c(3,3,3), 'La Mesa, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

tmp <- fill.dates(fusagasuga) 
fit.for.plot(tmp, 8:84, 14:90, c(10,15,20), c(3,3,3), 'Fusagasuga, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

tmp <- fill.dates(anapoima) 
fit.for.plot(tmp, 8:84, 14:90, c(10,15,20), c(3,3,3), 'Anapoima, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

tmp <- fill.dates(lapalma) 
fit.for.plot(tmp, 8:184, 14:190, c(10,15,20), c(3,3,3), 'La Palma, Colombia')
## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

## Warning in EstimateR(cases$count, T.Start = start.time, T.End =
## stop.time, : You're estimating R too early in the epidemic to get the
## desired posterior CV.

######AHORA TRABAJO CON EL SCRIPT "EstimationR"########

#############################################
#############################################
# Functions                                 #
#############################################
#############################################

#########################################################
# Discretized serial interval (assuming a shifted gamma #
# distribution (with shift 1)                           #
#########################################################



DiscrSI<-function(k,mu,sigma)
{
    if(sigma<0)
    {
        stop("sigma must be >=0.")
    }
    a <- ((mu-1)/sigma)^2
    b <- sigma^2/(mu-1)
    CDFGamma<-function(k,a,b)
    {
        return(pgamma(k,shape=a,scale=b))
    }
    res <- k*CDFGamma(k,a,b)+(k-2)*CDFGamma(k-2,a,b)-2*(k-1)*CDFGamma(k-1,a,b)
    res <- res+a*b*(2*CDFGamma(k-1,a+1,b)-CDFGamma(k-2,a+1,b)-CDFGamma(k,a+1,b))
    res <- max(0,res)
    return(res)
}

#########################################################
# Calculates Lambda_t = Sum_1^t I_{t-s} * w_s           #
# with I incidence and w discrete SI distribution       #                          
#########################################################

OverallInfectivity <-function (I,SI.Distr)
{
    if(is.vector(I)==FALSE)
    {
        stop("I must be a vector.")
    }
    T<-length(I)
    for(i in 1:T)
    {
        if(I[i]<0)
        {
            stop("I must be a positive vector.")
        }
    }
    if(is.vector(SI.Distr)==FALSE)
    {
        stop("SI.Distr must be a vector.")
    }
    if(SI.Distr[1]!=0)
    {
        stop("SI.Distr[1] needs to be 0.")
    }
    if(length(SI.Distr)>1)
    {
        for(i in 2:length(SI.Distr))
        {
            if(SI.Distr[i]<0)
            {
                stop("SI.Distr must be a positive vector.")
            }
        }
    }
    if(abs(sum(SI.Distr)-1)>0.01)
    {
        stop("SI.Distr must sum to 1.")
    }
    lambda <- vector() 
    lambda[2:length(I)] <- sapply(2:length(I), function(t) sum(SI.Distr[1:t]*I[t:1],na.rm=TRUE)) 
    return(lambda)
}

#########################################################
# Main functions                                        #
#########################################################

EstimateR<-function(I,T.Start,T.End,method=c("NonParametricSI","ParametricSI","UncertainSI"),n1=NULL,n2=NULL,Mean.SI=NULL,Std.SI=NULL,Std.Mean.SI=NULL,Min.Mean.SI=NULL,Max.Mean.SI=NULL,Std.Std.SI=NULL,Min.Std.SI=NULL,Max.Std.SI=NULL,SI.Distr=NULL,Mean.Prior=5,Std.Prior=5,CV.Posterior=0.3,plot=FALSE,leg.pos="topright")
{
}
    ### Functions ###
    
    #########################################################
    # Calculates the cumulative incidence over time steps   #
    #########################################################

    CalculIncidencePerTimeStep <-function (I,T.Start,T.End)
    {
        NbTimePeriods <- length(T.Start)
        IncidencePerTimeStep <- sapply(1:NbTimePeriods, function(i) sum(I[T.Start[i]:T.End[i]]))
        return(IncidencePerTimeStep)
    }
    
    #########################################################
    # Calculates the parameters of the Gamma posterior      #
    # distribution from the discrete SI distribution        #
    #########################################################

    PosteriorFromSIDistr <-function (I,SI.Distr,a.Prior,b.Prior,T.Start,T.End)
    {
        NbTimePeriods<-length(T.Start)
        lambda <- OverallInfectivity(I,SI.Distr)
        FinalMean.SI<-sum(SI.Distr*(0:(length(SI.Distr)-1)))

        a.Posterior<-vector()
        b.Posterior<-vector()
        a.Posterior <- lapply(1:(NbTimePeriods), function(t) if(T.End[t]>FinalMean.SI){a.Prior + sum(I[T.Start[t]:T.End[t]])}else{NA})
        b.Posterior <- lapply(1:(NbTimePeriods), function(t) if(T.End[t]>FinalMean.SI){1 / ( 1/b.Prior + sum(lambda[T.Start[t]:T.End[t]]) )}else{NA})
        
        return(list(a.Posterior,b.Posterior))
    }
    
    #########################################################
    # Samples from the Gamma posterior distribution for a   #
    # given mean SI and std SI                              #
    #########################################################
    
    SampleFromPosterior <-function (SampleSize,I,Mean.SI,Std.SI,a.Prior,b.Prior,T.Start,T.End)
    {
        NbTimePeriods<-length(T.Start)
        
        SI.Distr <- sapply(1:T, function(t) DiscrSI(t-1,Mean.SI,Std.SI))
        FinalMean.SI <- sum(SI.Distr*(0:(length(SI.Distr)-1)))
        lambda <- OverallInfectivity(I,SI.Distr)

        a.Posterior <- vector()
        b.Posterior <- vector()
        a.Posterior <- sapply(1:(NbTimePeriods), function(t) if(T.End[t]>FinalMean.SI){a.Prior + sum(I[T.Start[t]:T.End[t]])}else{NA})
        b.Posterior <- sapply(1:(NbTimePeriods), function(t) if(T.End[t]>FinalMean.SI){1 / ( 1/b.Prior + sum(lambda[T.Start[t]:T.End[t]],na.rm=TRUE) )}else{NA})

        SampleR.Posterior <- sapply(1:(NbTimePeriods), function(t) if(!is.na(a.Posterior[t])){rgamma(SampleSize,shape=unlist(a.Posterior[t]), scale = unlist(b.Posterior[t]))}else{rep(NA,SampleSize)})
        
        return(list(SampleR.Posterior,SI.Distr))
    }