setwd("~/Dropbox/MyR/consult/ISA/tese")
library(ggplot2)
library(reshape2)
library(hnp) # library(lme4)
## Loading required package: MASS
# Enlatado todas as variaveis
(dat <- read.table("enlatado_multi.txt", header = T, dec = ","))
##        iso exp  espor conidio germi incu lat incid
## 1  SP06210   1 59.933    0.75 56.22    2   2   100
## 2  SP05160   1 43.583    0.54 56.22    3   3   100
## 3  PR09638   1 28.333    0.35 84.78    2   2   100
## 4  SP09839   1 51.250    0.64 62.56    2   2   100
## 5  SP08345   1 92.750    1.16 54.45    2   2   100
## 6  SP09885   1  2.222    0.03 91.11    2   2   100
## 7  SP06210   2 39.370    0.49 64.55    2   2   100
## 8  SP05160   2 32.300    0.40 84.00    3   3   100
## 9  PR09638   2 34.481    0.43 71.33    2   2   100
## 10 SP09839   2 37.100    0.46 90.89    2   2   100
## 11 SP08345   2 53.633    0.67 50.22    2   2   100
## 12 SP09885   2  1.867    0.02 54.56    2   2   100
datlong <- melt(dat, id.vars = c("iso", "exp"))

# Enlatado » Expandido
datuni <- read.table("enlatado_uni.txt", header = T, dec = ",", na.strings = ".")
df <- datuni[,c(1,2,3,4)]
#names(df)[1] = "race"
#write.csv(df,"espor.csv",  na="NA", row.names = FALSE)
head(datlong); tail(datlong)
##       iso exp variable  value
## 1 SP06210   1    espor 59.933
## 2 SP05160   1    espor 43.583
## 3 PR09638   1    espor 28.333
## 4 SP09839   1    espor 51.250
## 5 SP08345   1    espor 92.750
## 6 SP09885   1    espor  2.222
##        iso exp variable value
## 67 SP06210   2    incid   100
## 68 SP05160   2    incid   100
## 69 PR09638   2    incid   100
## 70 SP09839   2    incid   100
## 71 SP08345   2    incid   100
## 72 SP09885   2    incid   100
ggplot(datlong, aes(x=iso, y=value, fill = factor(exp)))+
        geom_point()+
        facet_grid(variable~., scales = "free") + 
        labs(title="Enlatado - todas as variaveis",
        x ="Isolados", y = "Valor")

head(datuni)
##       iso exp rep espor.cont espor.conidio incu lat incid
## 1 SP06210   1   1   24.33333     0.3041667    2   2   100
## 2 SP06210   1   2   35.66667     0.4458333   NA  NA    NA
## 3 SP06210   1   3   56.00000     0.7000000   NA  NA    NA
## 4 SP06210   1   4   65.66667     0.8208333   NA  NA    NA
## 5 SP06210   1   5   79.66667     0.9958333   NA  NA    NA
## 6 SP06210   1   6   64.33333     0.8041667   NA  NA    NA
ggplot(aes(y = espor.cont, x = iso, fill = factor(exp)), 
       data=subset(datuni, !is.na(espor.cont)) ) + geom_boxplot()+
        labs(title="Enlatado - Esporulação",
        x ="Isolados", y = "Esporos.mL-1")

Germinação

# Variable distrib. 
hist(dat$germi, main = "Histograma de germinação de esporos")

# Quasipoisson distrib.
fit0.germi = glm(germi ~ iso +  factor(exp), 
           data = dat, family = quasipoisson(link=log)) 
anova(fit0.germi, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: germi
## 
## Terms added sequentially (first to last)
## 
## 
##             Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                           11     37.354              
## iso          5   15.377         6     21.976 0.7091 0.6424
## factor(exp)  1    0.127         5     21.849 0.0293 0.8708
summary(fit0.germi)
## 
## Call:
## glm(formula = germi ~ iso + factor(exp), family = quasipoisson(link = log), 
##     data = dat)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6        7        8  
## -0.4464  -1.6178   0.8625  -1.5653   0.3814   2.1709   0.4326   1.5012  
##       9       10       11       12  
## -0.8804   1.4592  -0.3834  -2.3428  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.34490    0.18221  23.846 2.42e-06 ***
## isoSP05160   -0.10735    0.24231  -0.443    0.676    
## isoSP06210   -0.25667    0.25238  -1.017    0.356    
## isoSP08345   -0.39975    0.26309  -1.519    0.189    
## isoSP09839   -0.01719    0.23674  -0.073    0.945    
## isoSP09885   -0.06922    0.23991  -0.289    0.785    
## factor(exp)2  0.02488    0.14539   0.171    0.871    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 4.337084)
## 
##     Null deviance: 37.354  on 11  degrees of freedom
## Residual deviance: 21.849  on  5  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
# Model diagnostic
plot(hnp(fit0.germi, print.on=TRUE, plot=FALSE))
## Quasi-Poisson model

Incubação

# Variable distrib. 
boxplot(dat$incu~dat$iso)

# Quasipoisson distrib.
#fit0.incu = glm(incu ~ iso +  factor(exp), data = dat, family = quasipoisson(link=log)) 
#anova(fit0.incu, test="F")
#summary(fit0.incu)

# Model diagnostic
#plot(hnp(fit0.germi, print.on=TRUE, plot=FALSE))

Esporulação

# Variable distrib. 
hist(datuni$espor.cont, main = "Histograma de esporulação")

## GLM 

# Quasipoisson distrib.
fit1 = glm(espor.cont ~ iso +  factor(exp), 
           data = datuni, family = quasipoisson(link=log)) 
anova(fit1, test="F")
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: espor.cont
## 
## Terms added sequentially (first to last)
## 
## 
##             Df Deviance Resid. Df Resid. Dev      F    Pr(>F)    
## NULL                          109    2295.27                     
## iso          5  1546.45       104     748.82 45.165 < 2.2e-16 ***
## factor(exp)  1    82.59       103     666.22 12.061 0.0007542 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit1)
## 
## Call:
## glm(formula = espor.cont ~ iso + factor(exp), family = quasipoisson(link = log), 
##     data = datuni)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -5.381  -2.020  -0.357   1.439   6.093  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.57918    0.11556  30.973  < 2e-16 ***
## isoSP05160    0.12155    0.14754   0.824 0.411926    
## isoSP06210    0.38524    0.14463   2.664 0.008972 ** 
## isoSP08345    0.80346    0.13437   5.979 3.26e-08 ***
## isoSP09839    0.36786    0.14185   2.593 0.010887 *  
## isoSP09885   -2.72904    0.43500  -6.274 8.42e-09 ***
## factor(exp)2 -0.28444    0.08203  -3.467 0.000768 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 6.848008)
## 
##     Null deviance: 2295.27  on 109  degrees of freedom
## Residual deviance:  666.22  on 103  degrees of freedom
##   (10 observations deleted due to missingness)
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6
# Model diagnostic

par(mfrow=c(1,2),oma = c(0, 0, 2, 0))
plot(hnp(fit1, print.on=TRUE, plot=FALSE), main="Envelope simulado")
## Quasi-Poisson model
plot(fitted(fit1), residuals(fit1),
     xlab = "Fitted Values", ylab = "Residuals",
      main="Homocedasticidade")
abline(h=0, lty=2)
lines(smooth.spline(fitted(fit1), residuals(fit1)))
mtext("Diagnostico do modelo", outer = TRUE, cex = 1.5)

# Contraste entre isolados

library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
summary(glht(fit1, linfct=mcp(iso="Tukey")), test=adjusted(type="bonferroni"))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = espor.cont ~ iso + factor(exp), family = quasipoisson(link = log), 
##     data = datuni)
## 
## Linear Hypotheses:
##                        Estimate Std. Error z value Pr(>|z|)    
## SP05160 - PR09638 == 0  0.12155    0.14754   0.824  1.00000    
## SP06210 - PR09638 == 0  0.38524    0.14463   2.664  0.11594    
## SP08345 - PR09638 == 0  0.80346    0.13437   5.979 3.36e-08 ***
## SP09839 - PR09638 == 0  0.36786    0.14185   2.593  0.14257    
## SP09885 - PR09638 == 0 -2.72904    0.43500  -6.274 5.29e-09 ***
## SP06210 - SP05160 == 0  0.26369    0.13586   1.941  0.78404    
## SP08345 - SP05160 == 0  0.68191    0.12489   5.460 7.13e-07 ***
## SP09839 - SP05160 == 0  0.24631    0.13290   1.853  0.95739    
## SP09885 - SP05160 == 0 -2.85059    0.43216  -6.596 6.33e-10 ***
## SP08345 - SP06210 == 0  0.41822    0.12129   3.448  0.00847 ** 
## SP09839 - SP06210 == 0 -0.01738    0.12962  -0.134  1.00000    
## SP09885 - SP06210 == 0 -3.11428    0.43116  -7.223 7.63e-12 ***
## SP09839 - SP08345 == 0 -0.43560    0.11798  -3.692  0.00334 ** 
## SP09885 - SP08345 == 0 -3.53250    0.42781  -8.257 3.33e-15 ***
## SP09885 - SP09839 == 0 -3.09690    0.43024  -7.198 9.16e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- bonferroni method)

Comparar as três metodologias

tec <- read.table("compara.txt", header = T, sep = "\t", dec = ",", na.strings = "."); head(tec); tail(tec)
##       iso exp rep vivo enlat invitro
## 1 PR09638   1   1 1.08  0.41    0.00
## 2 PR09638   1   2   NA    NA    0.33
## 3 PR09638   1   3 1.25  0.29    0.00
## 4 PR09638   1   4 1.00  0.24    1.00
## 5 PR09638   1   5 0.67  0.25    0.33
## 6 PR09638   1   6 0.83  0.22    0.00
##         iso exp rep vivo enlat invitro
## 115 SP09885   2   5   NA  0.06    0.00
## 116 SP09885   2   6 0.05  0.03    0.33
## 117 SP09885   2   7   NA  0.03    0.67
## 118 SP09885   2   8   NA  0.00    1.00
## 119 SP09885   2   9   NA  0.09    1.67
## 120 SP09885   2  10   NA  0.02    0.67
long <- melt(tec, id=c("iso", "exp", "rep"),
             #na.rm = TRUE, 
             value.name = "value") ; head(long)
##       iso exp rep variable value
## 1 PR09638   1   1     vivo  1.08
## 2 PR09638   1   2     vivo    NA
## 3 PR09638   1   3     vivo  1.25
## 4 PR09638   1   4     vivo  1.00
## 5 PR09638   1   5     vivo  0.67
## 6 PR09638   1   6     vivo  0.83
#str(tec)
# for (i in 1:3) tec[,i] <- as.factor(tec[,i])
# for (i in 4:6) tec[,i] <- as.numeric(tec[,i])

ggplot(long, aes(x=iso, y=value, fill = factor(exp)))+
        geom_boxplot()+
        facet_grid(variable~., scales = "free")+
        labs(title="Dados originais",
             x ="Isolados", y = "Esporos.mL-1")
## Warning: Removed 47 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).

# Removing outliers 

#subset(long, iso=="PR09638" & exp=="2" & variable=="vivo")
#subset(long, iso=="SP08345" & exp=="1" & variable=="vivo")
#subset(long, iso=="SP09885" & exp=="1" & variable=="vivo")
#subset(long, iso=="PR09638" & exp=="1" & variable=="invitro")
long1 = long[-c(14,62,90,247,248,256,314),]; head(long1)
##       iso exp rep variable value
## 1 PR09638   1   1     vivo  1.08
## 2 PR09638   1   2     vivo    NA
## 3 PR09638   1   3     vivo  1.25
## 4 PR09638   1   4     vivo  1.00
## 5 PR09638   1   5     vivo  0.67
## 6 PR09638   1   6     vivo  0.83
#dim(long); dim(long1)
ggplot(long1, 
       aes(x=iso, y=value, fill = factor(exp)))+
        geom_boxplot()+
        facet_grid(variable~., scales = "free")+
        labs(title="Dados sem discrepantes",
             x ="Isolados", y = "Esporos.mL-1")
## Warning: Removed 47 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).

# Long to wide clean data

wide1 = dcast(long1, iso + exp + rep ~ variable, value.var="value")

# Summarizing wide1 by iso*exp

(sumwide1 = aggregate(cbind(vivo, enlat, invitro)~iso + exp, data = wide1, mean))
##        iso exp      vivo     enlat   invitro
## 1  PR09638   1 1.0550000 0.3183333 0.2216667
## 2  SP05160   1 0.8975000 0.5475000 1.1675000
## 3  SP06210   1 0.9660000 0.7500000 0.6680000
## 4  SP08345   1 0.7925000 1.1600000 0.4175000
## 5  SP09839   1 1.2728571 0.6042857 3.9057143
## 6  SP09885   1 0.4425000 0.0050000 0.0000000
## 7  PR09638   2 1.2200000 0.3850000 2.2766667
## 8  SP05160   2 0.9866667 0.3516667 6.3316667
## 9  SP06210   2 1.5280000 0.4600000 2.6000000
## 10 SP08345   2 0.9085714 0.7014286 6.7600000
## 11 SP09839   2 0.9216667 0.4900000 6.0016667
## 12 SP09885   2 0.0200000 0.0100000 0.4433333
sumwide1.l <- split(sumwide1[3:5], sumwide1$exp)

# Spearman rank correlation 
# Medida de correlação não-paramétrica, isto é, ele avalia uma função monótona arbitrária que pode ser a descrição da relação entre duas variáveis, sem fazer nenhumas suposições sobre a distribuição de frequências das variáveis.

# Ao contrário do coeficiente de correlação de Pearson, não requer a suposição que a relação entre as variáveis é linear, nem requer que as variáveis sejam medidas em intervalo de classe; pode ser usado para as variáveis medidas no nível ordinal.

# browseURL("https://pt.wikipedia.org/wiki/Coeficiente_de_correla%C3%A7%C3%A3o_de_postos_de_Spearman")

lapply(sumwide1.l, cor, method = "spearman")# psych::corr.test
## $`1`
##              vivo     enlat   invitro
## vivo    1.0000000 0.1428571 0.6000000
## enlat   0.1428571 1.0000000 0.4857143
## invitro 0.6000000 0.4857143 1.0000000
## 
## $`2`
##                vivo      enlat     invitro
## vivo     1.00000000 0.08571429 -0.02857143
## enlat    0.08571429 1.00000000  0.65714286
## invitro -0.02857143 0.65714286  1.00000000
# Summarizing wide1 by iso

(sumwide2 = aggregate(cbind(vivo, enlat, invitro)~iso, data = wide1, mean))
##       iso      vivo       enlat  invitro
## 1 PR09638 1.1375000 0.351666667 1.249167
## 2 SP05160 0.9510000 0.430000000 4.266000
## 3 SP06210 1.2470000 0.605000000 1.634000
## 4 SP08345 0.8663636 0.868181818 4.453636
## 5 SP09839 1.1107692 0.551538462 4.873077
## 6 SP09885 0.2614286 0.007142857 0.190000
# Spearman rank correlation 

cor(sumwide2[,2:4],method="spearman")
##               vivo     enlat    invitro
## vivo    1.00000000 0.2571429 0.08571429
## enlat   0.25714286 1.0000000 0.71428571
## invitro 0.08571429 0.7142857 1.00000000
iso.vivo = sumwide2[with(sumwide2, order(vivo)), ][,1]
iso.enlat = sumwide2[with(sumwide2, order(enlat)), ][,1]
iso.vitro = sumwide2[with(sumwide2, order(invitro)), ][,1]
data.frame(iso.vivo, iso.enlat, iso.vitro)
##   iso.vivo iso.enlat iso.vitro
## 1  SP09885   SP09885   SP09885
## 2  SP08345   PR09638   PR09638
## 3  SP05160   SP05160   SP06210
## 4  SP09839   SP09839   SP05160
## 5  PR09638   SP06210   SP08345
## 6  SP06210   SP08345   SP09839