Analysis of Capsid GFP datasets

Three capsid datasets were analyzed. In all cases, these dataset included several technical replicates of obsevrations of GFP % for a small number of dogs. Each treatment was applied to one or at most two dogs’ eyes and measured four times per eyes.

All the statistical analyses were performed at the observation level, i.e: the individual measure within a dog was used as a replicate.

In all datasets we followed the same analytical protocol: First we fit a linear model to individual observations of GFP with fixed effects of treatment (this is an ANOVA), extracted residuals and checked their normality. If the residuals departed from the normal distribution we transformed the data using the log10 function repeated the procedure. In all cases, the log transformation resulted in residuals with better fit to the normal distribution compared to the residuals from the model based on untransformed data.

Second, we obtained the ANOVA for transformed data and performed least squares mean comparison for all possible pairs of treatments. Resulting p-values were corrected using the FDR.

Finally, for graphical representation purposes, lsmeans and thir 95% confidence intervals were back teansformed to the original scale

prepare all packages

library(tidyverse)
library(readxl)
library(lsmeans)

setwd("C:/Users/marti/OneDrive/Documents/andras/Kristin")

Capsid variation

Read data and show freqency table of measures per dog and treatment

dts<-read_xlsx("GFP expression for STATS_2020.xlsx",sheet = "capsid_variation")
## New names:
## * `` -> ...6
table(dts$`Capsid variation`,dts$Dog)
##          
##           Esteban Felix Javier Jax Mateo Stefano
##   Q 10^11       0     4      0   0     0       0
##   Q 10^9        0     0      0   0     0       4
##   S 10^11       4     0      4   0     0       0
##   S 10^9        0     0      0   4     0       0
##   T 10^11       0     0      0   0     4       0
##   T 10^9        4     0      0   0     0       0

###ANOVA for raw data and diagnostics

Residual vs fitted by and normal q-q plot show large departures from normality

cap_var_lm<-lm(`% area GFP labeling`~`Capsid variation`,data=dts)
anova(cap_var_lm)
## Analysis of Variance Table
## 
## Response: % area GFP labeling
##                    Df  Sum Sq Mean Sq F value  Pr(>F)  
## `Capsid variation`  5 0.63201 0.12640  3.1156 0.02825 *
## Residuals          22 0.89255 0.04057                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(cap_var_lm)

###ANOVA for log-transformed data and diagnostics with log-transformation the residuals fit a normal distribution better

dts$lgfp<-log10((dts$`% area GFP labeling`))
cap_var_log<-lm(lgfp~`Capsid variation`,data=dts)
plot(cap_var_log)

#Means comparisons Significant differences between treatments can be dysected into all pairwise comparisons

anova(cap_var_log)
## Analysis of Variance Table
## 
## Response: lgfp
##                    Df Sum Sq Mean Sq F value    Pr(>F)    
## `Capsid variation`  5 13.957 2.79137  11.047 2.051e-05 ***
## Residuals          22  5.559 0.25268                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rst<-lsmeans(cap_var_log,pairwise~`Capsid variation`,adjust="fdr")

rst$contrasts
##  contrast          estimate    SE df t.ratio p.value
##  Q 10^11 - Q 10^9    -1.348 0.355 22 -3.791  0.0030 
##  Q 10^11 - S 10^11   -1.799 0.308 22 -5.845  0.0001 
##  Q 10^11 - S 10^9    -0.600 0.355 22 -1.689  0.1756 
##  Q 10^11 - T 10^11   -0.207 0.355 22 -0.583  0.6060 
##  Q 10^11 - T 10^9    -0.355 0.355 22 -0.998  0.4113 
##  Q 10^9 - S 10^11    -0.452 0.308 22 -1.467  0.2346 
##  Q 10^9 - S 10^9      0.747 0.355 22  2.102  0.0885 
##  Q 10^9 - T 10^11     1.140 0.355 22  3.208  0.0101 
##  Q 10^9 - T 10^9      0.993 0.355 22  2.793  0.0227 
##  S 10^11 - S 10^9     1.199 0.308 22  3.895  0.0029 
##  S 10^11 - T 10^11    1.592 0.308 22  5.172  0.0003 
##  S 10^11 - T 10^9     1.444 0.308 22  4.693  0.0006 
##  S 10^9 - T 10^11     0.393 0.355 22  1.106  0.3829 
##  S 10^9 - T 10^9      0.246 0.355 22  0.691  0.5733 
##  T 10^11 - T 10^9    -0.147 0.355 22 -0.415  0.6823 
## 
## P value adjustment: fdr method for 15 tests

Finally, the table of back-transformed means to the original scale is represented

##   Capsid variation      lsmean        SE df    lower.CL    upper.CL
## 1          Q 10^11 0.002632148 0.2513369 22 0.000792628 0.008740801
## 2           Q 10^9 0.058592504 0.2513369 22 0.017644166 0.194573184
## 3          S 10^11 0.165792932 0.1777221 22 0.070956636 0.387381615
## 4           S 10^9 0.010487222 0.2513369 22 0.003158054 0.034825821
## 5          T 10^11 0.004242641 0.2513369 22 0.001277601 0.014088903
## 6           T 10^9 0.005957892 0.2513369 22 0.001794121 0.019784886

Single Capsid Dosing Variation

Read data and show freqency table of measures per dog and treatment

dts<-read_xlsx("GFP expression for STATS_2020.xlsx",sheet = "dosing_variation")
## New names:
## * `` -> ...6
## * `` -> ...7
table(dts$`Single Capsid Dosing Variation`,dts$Dog)
##               
##                Esteban Javier Jax Merrimack Nashua Stefano Sunapee Winni
##   Ivit               0      0   0         0      0       0       0     7
##   S 10^10            0      0   0         0      0       4       0     0
##   S 10^11            4      4   0         0      0       0       0     0
##   S 10^9             0      0   4         0      0       0       0     0
##   S 2.78x10^12       0      0   0         4      4       0       0     0
##   S 5.56x10^12       0      0   0         0      0       0       8     0

###ANOVA for raw data and diagnostics

Residual vs fitted by and normal q-q plot show large departures from normality

cap_var_lm<-lm(`% area GFP labeling`~`Single Capsid Dosing Variation`,data=dts)
anova(cap_var_lm)
## Analysis of Variance Table
## 
## Response: % area GFP labeling
##                                  Df Sum Sq Mean Sq F value  Pr(>F)  
## `Single Capsid Dosing Variation`  5  82.41 16.4821  3.5856 0.01064 *
## Residuals                        33 151.69  4.5968                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(cap_var_lm)

###ANOVA for log-transformed data and diagnostics with log-transformation the residuals fit a normal distribution better

dts$lgfp<-log10((dts$`% area GFP labeling`))
cap_var_log<-lm(lgfp~`Single Capsid Dosing Variation`,data=dts)
plot(cap_var_log)

#Means comparisons Significant differences between treatments can be dysected into all pairwise comparisons

anova(cap_var_log)
## Analysis of Variance Table
## 
## Response: lgfp
##                                  Df  Sum Sq Mean Sq F value    Pr(>F)    
## `Single Capsid Dosing Variation`  5 15.9642  3.1928  11.278 2.141e-06 ***
## Residuals                        33  9.3421  0.2831                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rst<-lsmeans(cap_var_log,pairwise~`Single Capsid Dosing Variation`,adjust="fdr")

rst$contrasts
##  contrast                    estimate    SE df t.ratio p.value
##  Ivit - S 10^10                0.4630 0.333 33  1.388  0.2615 
##  Ivit - S 10^11                0.7598 0.275 33  2.759  0.0201 
##  Ivit - S 10^9                 1.9587 0.333 33  5.873  <.0001 
##  Ivit - S 2.78x10^12          -0.2501 0.275 33 -0.908  0.3968 
##  Ivit - S 5.56x10^12           0.0998 0.275 33  0.363  0.7193 
##  S 10^10 - S 10^11             0.2968 0.326 33  0.911  0.3968 
##  S 10^10 - S 10^9              1.4957 0.376 33  3.976  0.0014 
##  S 10^10 - S 2.78x10^12       -0.7131 0.326 33 -2.189  0.0597 
##  S 10^10 - S 5.56x10^12       -0.3632 0.326 33 -1.115  0.3413 
##  S 10^11 - S 10^9              1.1989 0.326 33  3.680  0.0021 
##  S 10^11 - S 2.78x10^12       -1.0099 0.266 33 -3.796  0.0018 
##  S 10^11 - S 5.56x10^12       -0.6600 0.266 33 -2.481  0.0345 
##  S 10^9 - S 2.78x10^12        -2.2088 0.326 33 -6.779  <.0001 
##  S 10^9 - S 5.56x10^12        -1.8589 0.326 33 -5.705  <.0001 
##  S 2.78x10^12 - S 5.56x10^12   0.3499 0.266 33  1.315  0.2693 
## 
## P value adjustment: fdr method for 15 tests

Finally, the table of back-transformed means to the original scale is represented

##   Single Capsid Dosing Variation     lsmean        SE df    lower.CL   upper.CL
## 1                           Ivit 0.95362465 0.2011018 33 0.371734353 2.44637054
## 2                        S 10^10 0.32836679 0.2660327 33 0.094430344 1.14184429
## 3                        S 10^11 0.16579293 0.1881135 33 0.068682505 0.40020812
## 4                         S 10^9 0.01048722 0.2660327 33 0.003015871 0.03646768
## 5                   S 2.78x10^12 1.69614843 0.1881135 33 0.702657954 4.09433847
## 6                   S 5.56x10^12 0.75778529 0.1881135 33 0.313925276 1.82922050

Pupil Variation

Read data and show freqency table of measures per dog and treatment

dts<-read_xlsx("GFP expression for STATS_2020.xlsx",sheet = "pupil_variation")
## New names:
## * `` -> ...6
## * `` -> ...7
table(dts$`Pupil Variation`,dts$Dog)
##            
##             Claremont Merrimack Nashua
##   miotic            4         4      0
##   mydriatic         4         0      4
##   unaltered         0         4      4

###ANOVA for raw data and diagnostics

Residual vs fitted by and normal q-q plot show large departures from normality

cap_var_lm<-lm(`% area GFP labeling`~`Pupil Variation`,data=dts)
anova(cap_var_lm)
## Analysis of Variance Table
## 
## Response: % area GFP labeling
##                   Df  Sum Sq Mean Sq F value Pr(>F)
## `Pupil Variation`  2  52.996  26.498  2.2569 0.1295
## Residuals         21 246.557  11.741
plot(cap_var_lm)

## hat values (leverages) are all = 0.125
##  and there are no factor predictors; no plot no. 5

###ANOVA for log-transformed data and diagnostics with log-transformation the residuals fit a normal distribution better

dts$lgfp<-log10((dts$`% area GFP labeling`))
cap_var_log<-lm(lgfp~`Pupil Variation`,data=dts)
plot(cap_var_log)

## hat values (leverages) are all = 0.125
##  and there are no factor predictors; no plot no. 5

#Means comparisons Significant differences between treatments can be dysected into all pairwise comparisons

anova(cap_var_log)
## Analysis of Variance Table
## 
## Response: lgfp
##                   Df Sum Sq Mean Sq F value Pr(>F)
## `Pupil Variation`  2 1.1113 0.55564  2.5735 0.1001
## Residuals         21 4.5341 0.21591
rst<-lsmeans(cap_var_log,pairwise~`Pupil Variation`,adjust="fdr")

rst$contrasts
##  contrast              estimate    SE df t.ratio p.value
##  miotic - mydriatic       0.360 0.232 21 1.551   0.2038 
##  miotic - unaltered       0.513 0.232 21 2.209   0.1152 
##  mydriatic - unaltered    0.153 0.232 21 0.658   0.5174 
## 
## P value adjustment: fdr method for 3 tests

Finally, the table of back-transformed means to the original scale is represented

##   Pupil Variation   lsmean        SE df  lower.CL  upper.CL
## 1          miotic 5.530593 0.1642815 21 2.5184289 12.145453
## 2       mydriatic 2.412311 0.1642815 21 1.0984778  5.297553
## 3       unaltered 1.696148 0.1642815 21 0.7723637  3.724825