setwd("~/Dropbox/MyR/consult/ISA/tese")
library(ggplot2)
library(reshape2)
library(hnp) # library(lme4)
## Loading required package: MASS
# Enlatado todas as variaveis
(dat <- read.table("enlatado_multi.txt", header = T, dec = ","))
## iso exp espor conidio germi incu lat incid
## 1 SP06210 1 59.933 0.75 56.22 2 2 100
## 2 SP05160 1 43.583 0.54 56.22 3 3 100
## 3 PR09638 1 28.333 0.35 84.78 2 2 100
## 4 SP09839 1 51.250 0.64 62.56 2 2 100
## 5 SP08345 1 92.750 1.16 54.45 2 2 100
## 6 SP09885 1 2.222 0.03 91.11 2 2 100
## 7 SP06210 2 39.370 0.49 64.55 2 2 100
## 8 SP05160 2 32.300 0.40 84.00 3 3 100
## 9 PR09638 2 34.481 0.43 71.33 2 2 100
## 10 SP09839 2 37.100 0.46 90.89 2 2 100
## 11 SP08345 2 53.633 0.67 50.22 2 2 100
## 12 SP09885 2 1.867 0.02 54.56 2 2 100
datlong <- melt(dat, id.vars = c("iso", "exp"))
# Enlatado » Expandido
datuni <- read.table("enlatado_uni.txt", header = T, dec = ",", na.strings = ".")
df <- datuni[,c(1,2,3,4)]
#names(df)[1] = "race"
#write.csv(df,"espor.csv", na="NA", row.names = FALSE)
head(datlong); tail(datlong)
## iso exp variable value
## 1 SP06210 1 espor 59.933
## 2 SP05160 1 espor 43.583
## 3 PR09638 1 espor 28.333
## 4 SP09839 1 espor 51.250
## 5 SP08345 1 espor 92.750
## 6 SP09885 1 espor 2.222
## iso exp variable value
## 67 SP06210 2 incid 100
## 68 SP05160 2 incid 100
## 69 PR09638 2 incid 100
## 70 SP09839 2 incid 100
## 71 SP08345 2 incid 100
## 72 SP09885 2 incid 100
ggplot(datlong, aes(x=iso, y=value, fill = factor(exp)))+
geom_point()+
facet_grid(variable~., scales = "free") +
labs(title="Enlatado - todas as variaveis",
x ="Isolados", y = "Valor")
head(datuni)
## iso exp rep espor.cont espor.conidio incu lat incid
## 1 SP06210 1 1 24.33333 0.3041667 2 2 100
## 2 SP06210 1 2 35.66667 0.4458333 NA NA NA
## 3 SP06210 1 3 56.00000 0.7000000 NA NA NA
## 4 SP06210 1 4 65.66667 0.8208333 NA NA NA
## 5 SP06210 1 5 79.66667 0.9958333 NA NA NA
## 6 SP06210 1 6 64.33333 0.8041667 NA NA NA
ggplot(aes(y = espor.cont, x = iso, fill = factor(exp)),
data=subset(datuni, !is.na(espor.cont)) ) + geom_boxplot()+
labs(title="Enlatado - Esporulação",
x ="Isolados", y = "Esporos.mL-1")
# Variable distrib.
hist(dat$germi, main = "Histograma de germinação de esporos")
# Quasipoisson distrib.
fit0.germi = glm(germi ~ iso + factor(exp),
data = dat, family = quasipoisson(link=log))
anova(fit0.germi, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: germi
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 11 37.354
## iso 5 15.377 6 21.976 0.7091 0.6424
## factor(exp) 1 0.127 5 21.849 0.0293 0.8708
summary(fit0.germi)
##
## Call:
## glm(formula = germi ~ iso + factor(exp), family = quasipoisson(link = log),
## data = dat)
##
## Deviance Residuals:
## 1 2 3 4 5 6 7 8
## -0.4464 -1.6178 0.8625 -1.5653 0.3814 2.1709 0.4326 1.5012
## 9 10 11 12
## -0.8804 1.4592 -0.3834 -2.3428
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.34490 0.18221 23.846 2.42e-06 ***
## isoSP05160 -0.10735 0.24231 -0.443 0.676
## isoSP06210 -0.25667 0.25238 -1.017 0.356
## isoSP08345 -0.39975 0.26309 -1.519 0.189
## isoSP09839 -0.01719 0.23674 -0.073 0.945
## isoSP09885 -0.06922 0.23991 -0.289 0.785
## factor(exp)2 0.02488 0.14539 0.171 0.871
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 4.337084)
##
## Null deviance: 37.354 on 11 degrees of freedom
## Residual deviance: 21.849 on 5 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
# Model diagnostic
plot(hnp(fit0.germi, print.on=TRUE, plot=FALSE))
## Quasi-Poisson model
# Variable distrib.
boxplot(dat$incu~dat$iso)
# Quasipoisson distrib.
#fit0.incu = glm(incu ~ iso + factor(exp), data = dat, family = quasipoisson(link=log))
#anova(fit0.incu, test="F")
#summary(fit0.incu)
# Model diagnostic
#plot(hnp(fit0.germi, print.on=TRUE, plot=FALSE))
# Variable distrib.
hist(datuni$espor.cont, main = "Histograma de esporulação")
## GLM
# Quasipoisson distrib.
fit1 = glm(espor.cont ~ iso + factor(exp),
data = datuni, family = quasipoisson(link=log))
anova(fit1, test="F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: espor.cont
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 109 2295.27
## iso 5 1546.45 104 748.82 45.165 < 2.2e-16 ***
## factor(exp) 1 82.59 103 666.22 12.061 0.0007542 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit1)
##
## Call:
## glm(formula = espor.cont ~ iso + factor(exp), family = quasipoisson(link = log),
## data = datuni)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.381 -2.020 -0.357 1.439 6.093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.57918 0.11556 30.973 < 2e-16 ***
## isoSP05160 0.12155 0.14754 0.824 0.411926
## isoSP06210 0.38524 0.14463 2.664 0.008972 **
## isoSP08345 0.80346 0.13437 5.979 3.26e-08 ***
## isoSP09839 0.36786 0.14185 2.593 0.010887 *
## isoSP09885 -2.72904 0.43500 -6.274 8.42e-09 ***
## factor(exp)2 -0.28444 0.08203 -3.467 0.000768 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 6.848008)
##
## Null deviance: 2295.27 on 109 degrees of freedom
## Residual deviance: 666.22 on 103 degrees of freedom
## (10 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
# Model diagnostic
par(mfrow=c(1,2),oma = c(0, 0, 2, 0))
plot(hnp(fit1, print.on=TRUE, plot=FALSE), main="Envelope simulado")
## Quasi-Poisson model
plot(fitted(fit1), residuals(fit1),
xlab = "Fitted Values", ylab = "Residuals",
main="Homocedasticidade")
abline(h=0, lty=2)
lines(smooth.spline(fitted(fit1), residuals(fit1)))
mtext("Diagnostico do modelo", outer = TRUE, cex = 1.5)
# Contraste entre isolados
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
summary(glht(fit1, linfct=mcp(iso="Tukey")), test=adjusted(type="bonferroni"))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glm(formula = espor.cont ~ iso + factor(exp), family = quasipoisson(link = log),
## data = datuni)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## SP05160 - PR09638 == 0 0.12155 0.14754 0.824 1.00000
## SP06210 - PR09638 == 0 0.38524 0.14463 2.664 0.11594
## SP08345 - PR09638 == 0 0.80346 0.13437 5.979 3.36e-08 ***
## SP09839 - PR09638 == 0 0.36786 0.14185 2.593 0.14257
## SP09885 - PR09638 == 0 -2.72904 0.43500 -6.274 5.29e-09 ***
## SP06210 - SP05160 == 0 0.26369 0.13586 1.941 0.78404
## SP08345 - SP05160 == 0 0.68191 0.12489 5.460 7.13e-07 ***
## SP09839 - SP05160 == 0 0.24631 0.13290 1.853 0.95739
## SP09885 - SP05160 == 0 -2.85059 0.43216 -6.596 6.33e-10 ***
## SP08345 - SP06210 == 0 0.41822 0.12129 3.448 0.00847 **
## SP09839 - SP06210 == 0 -0.01738 0.12962 -0.134 1.00000
## SP09885 - SP06210 == 0 -3.11428 0.43116 -7.223 7.63e-12 ***
## SP09839 - SP08345 == 0 -0.43560 0.11798 -3.692 0.00334 **
## SP09885 - SP08345 == 0 -3.53250 0.42781 -8.257 3.33e-15 ***
## SP09885 - SP09839 == 0 -3.09690 0.43024 -7.198 9.16e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- bonferroni method)
Comparar as três metodologias
tec <- read.table("compara.txt", header = T, sep = "\t", dec = ",", na.strings = "."); head(tec); tail(tec)
## iso exp rep vivo enlat invitro
## 1 PR09638 1 1 1.08 0.41 0.00
## 2 PR09638 1 2 NA NA 0.33
## 3 PR09638 1 3 1.25 0.29 0.00
## 4 PR09638 1 4 1.00 0.24 1.00
## 5 PR09638 1 5 0.67 0.25 0.33
## 6 PR09638 1 6 0.83 0.22 0.00
## iso exp rep vivo enlat invitro
## 115 SP09885 2 5 NA 0.06 0.00
## 116 SP09885 2 6 0.05 0.03 0.33
## 117 SP09885 2 7 NA 0.03 0.67
## 118 SP09885 2 8 NA 0.00 1.00
## 119 SP09885 2 9 NA 0.09 1.67
## 120 SP09885 2 10 NA 0.02 0.67
long <- melt(tec, id=c("iso", "exp", "rep"),
#na.rm = TRUE,
value.name = "value") ; head(long)
## iso exp rep variable value
## 1 PR09638 1 1 vivo 1.08
## 2 PR09638 1 2 vivo NA
## 3 PR09638 1 3 vivo 1.25
## 4 PR09638 1 4 vivo 1.00
## 5 PR09638 1 5 vivo 0.67
## 6 PR09638 1 6 vivo 0.83
#str(tec)
# for (i in 1:3) tec[,i] <- as.factor(tec[,i])
# for (i in 4:6) tec[,i] <- as.numeric(tec[,i])
ggplot(long, aes(x=iso, y=value, fill = factor(exp)))+
geom_boxplot()+
facet_grid(variable~., scales = "free")+
labs(title="Dados originais",
x ="Isolados", y = "Esporos.mL-1")
## Warning: Removed 47 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
# Removing outliers
#subset(long, iso=="PR09638" & exp=="2" & variable=="vivo")
#subset(long, iso=="SP08345" & exp=="1" & variable=="vivo")
#subset(long, iso=="SP09885" & exp=="1" & variable=="vivo")
#subset(long, iso=="PR09638" & exp=="1" & variable=="invitro")
long1 = long[-c(14,62,90,247,248,256,314),]; head(long1)
## iso exp rep variable value
## 1 PR09638 1 1 vivo 1.08
## 2 PR09638 1 2 vivo NA
## 3 PR09638 1 3 vivo 1.25
## 4 PR09638 1 4 vivo 1.00
## 5 PR09638 1 5 vivo 0.67
## 6 PR09638 1 6 vivo 0.83
#dim(long); dim(long1)
ggplot(long1,
aes(x=iso, y=value, fill = factor(exp)))+
geom_boxplot()+
facet_grid(variable~., scales = "free")+
labs(title="Dados sem discrepantes",
x ="Isolados", y = "Esporos.mL-1")
## Warning: Removed 47 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
# Long to wide clean data
wide1 = dcast(long1, iso + exp + rep ~ variable, value.var="value")
# Summarizing wide1 by iso*exp
(sumwide1 = aggregate(cbind(vivo, enlat, invitro)~iso + exp, data = wide1, mean))
## iso exp vivo enlat invitro
## 1 PR09638 1 1.0550000 0.3183333 0.2216667
## 2 SP05160 1 0.8975000 0.5475000 1.1675000
## 3 SP06210 1 0.9660000 0.7500000 0.6680000
## 4 SP08345 1 0.7925000 1.1600000 0.4175000
## 5 SP09839 1 1.2728571 0.6042857 3.9057143
## 6 SP09885 1 0.4425000 0.0050000 0.0000000
## 7 PR09638 2 1.2200000 0.3850000 2.2766667
## 8 SP05160 2 0.9866667 0.3516667 6.3316667
## 9 SP06210 2 1.5280000 0.4600000 2.6000000
## 10 SP08345 2 0.9085714 0.7014286 6.7600000
## 11 SP09839 2 0.9216667 0.4900000 6.0016667
## 12 SP09885 2 0.0200000 0.0100000 0.4433333
sumwide1.l <- split(sumwide1[3:5], sumwide1$exp)
# Spearman rank correlation
# Medida de correlação não-paramétrica, isto é, ele avalia uma função monótona arbitrária que pode ser a descrição da relação entre duas variáveis, sem fazer nenhumas suposições sobre a distribuição de frequências das variáveis.
# Ao contrário do coeficiente de correlação de Pearson, não requer a suposição que a relação entre as variáveis é linear, nem requer que as variáveis sejam medidas em intervalo de classe; pode ser usado para as variáveis medidas no nível ordinal.
# browseURL("https://pt.wikipedia.org/wiki/Coeficiente_de_correla%C3%A7%C3%A3o_de_postos_de_Spearman")
lapply(sumwide1.l, cor, method = "spearman")# psych::corr.test
## $`1`
## vivo enlat invitro
## vivo 1.0000000 0.1428571 0.6000000
## enlat 0.1428571 1.0000000 0.4857143
## invitro 0.6000000 0.4857143 1.0000000
##
## $`2`
## vivo enlat invitro
## vivo 1.00000000 0.08571429 -0.02857143
## enlat 0.08571429 1.00000000 0.65714286
## invitro -0.02857143 0.65714286 1.00000000
# Summarizing wide1 by iso
(sumwide2 = aggregate(cbind(vivo, enlat, invitro)~iso, data = wide1, mean))
## iso vivo enlat invitro
## 1 PR09638 1.1375000 0.351666667 1.249167
## 2 SP05160 0.9510000 0.430000000 4.266000
## 3 SP06210 1.2470000 0.605000000 1.634000
## 4 SP08345 0.8663636 0.868181818 4.453636
## 5 SP09839 1.1107692 0.551538462 4.873077
## 6 SP09885 0.2614286 0.007142857 0.190000
# Spearman rank correlation
cor(sumwide2[,2:4],method="spearman")
## vivo enlat invitro
## vivo 1.00000000 0.2571429 0.08571429
## enlat 0.25714286 1.0000000 0.71428571
## invitro 0.08571429 0.7142857 1.00000000
iso.vivo = sumwide2[with(sumwide2, order(vivo)), ][,1]
iso.enlat = sumwide2[with(sumwide2, order(enlat)), ][,1]
iso.vitro = sumwide2[with(sumwide2, order(invitro)), ][,1]
data.frame(iso.vivo, iso.enlat, iso.vitro)
## iso.vivo iso.enlat iso.vitro
## 1 SP09885 SP09885 SP09885
## 2 SP08345 PR09638 PR09638
## 3 SP05160 SP05160 SP06210
## 4 SP09839 SP09839 SP05160
## 5 PR09638 SP06210 SP08345
## 6 SP06210 SP08345 SP09839