Statistics

Summarize Data

### Stats 
stats$Dental.Spacing <- factor(stats$Dental.Spacing, levels = c("YES", "NO"))
stats$Trabecular.Pattern <- factor(stats$Trabecular.Pattern)
Totals <- aggregate(x = stats[,5:12], list(stats$Diagnosis), table, simplify = TRUE) 
Totals <- as.data.frame(as.list(Totals))
Totals.plot <- Totals %>% gather(Pattern, Total, 2:ncol(Totals))

Totals.plot$Pattern <- gsub(".NO", "_NO", Totals.plot$Pattern)
Totals.plot$Pattern <- gsub(".YES", "_YES", Totals.plot$Pattern)
Totals.plot$Pattern <- gsub(".DENSE", "_DENSE", Totals.plot$Pattern)
Totals.plot$Pattern <- gsub(".MIXED", "_MIXED", Totals.plot$Pattern)
Totals.plot$Pattern <- gsub(".SPARSE", "_SPARSE", Totals.plot$Pattern)
Totals.plot$Pattern <- gsub(".DONE", "_NONE", Totals.plot$Pattern)
Totals.plot$Group.1 <- factor(gsub("CONTROL ", "CONTROL", Totals.plot$Group.1))
Totals.plot <- Totals.plot %>% separate(Pattern, into=c("Diagnosis", "ANSWER"), sep="_")
head(Totals.plot)

##   Group.1           Diagnosis ANSWER Total
## 1  AUTISM Periodontal.Disease     NO   119
## 2 CONTROL Periodontal.Disease     NO    71
## 3      CP Periodontal.Disease     NO    62
## 4   DOWNS Periodontal.Disease     NO    41
## 5      ID Periodontal.Disease     NO   126
## 6  AUTISM Periodontal.Disease    YES    11

ggplot(Totals.plot,aes(x=factor(Diagnosis),y=Total,fill=factor(ANSWER))) + 
  geom_bar(position="fill", stat="identity") +
  scale_y_continuous(labels=c("25%", "50%", '75%', "100%"),
                     breaks=c(0.25,.5,.75,1)) + # you can set the breaks to whatever you want
  facet_wrap(~ Group.1) +
  theme(axis.text.x=element_text(angle=90, hjust=1)) +
  labs(fill="ANSWER")

ggplot(Totals.plot,aes(x=factor(Group.1),y=Total,fill=factor(ANSWER))) + 
  geom_bar(position="dodge", stat="identity") +
#  scale_y_continuous(labels=c("25%", "50%", '75%', "100%"),
#                     breaks=c(0.25,.5,.75,1)) + # you can set the breaks to whatever you want
  facet_wrap(~ Diagnosis) +
   theme(axis.text.x=element_text(angle=90, hjust=1)) +
  labs(fill="ANSWER")

Totals.plot.YES <- Totals.plot[-1*which(Totals.plot$ANSWER == "NO"),]
Totals.plot.YES$Total <- as.numeric(Totals.plot.YES$Total / GroupNumbers[Totals.plot.YES$Group.1])


ggplot(Totals.plot.YES,aes(x=factor(Group.1),y=Total,fill=factor(ANSWER))) + 
  geom_bar(position="dodge", stat="identity") +
#  scale_y_continuous(labels=c("25%", "50%", '75%', "100%"),
#                     breaks=c(0.25,.5,.75,1)) + # you can set the breaks to whatever you want
  facet_wrap(~ Diagnosis) +
   theme(axis.text.x=element_text(angle=90, hjust=1)) +
  labs(fill="ANSWER")

datatable(Totals, extensions = 'FixedColumns',
  options = list(
    dom = 't',
    scrollX = TRUE,
    fixedColumns = list(leftColumns = 2)
  )
)

library(lsmeans)
head(Totals.plot, 20)

##    Group.1           Diagnosis ANSWER Total
## 1   AUTISM Periodontal.Disease     NO   119
## 2  CONTROL Periodontal.Disease     NO    71
## 3       CP Periodontal.Disease     NO    62
## 4    DOWNS Periodontal.Disease     NO    41
## 5       ID Periodontal.Disease     NO   126
## 6   AUTISM Periodontal.Disease    YES    11
## 7  CONTROL Periodontal.Disease    YES    33
## 8       CP Periodontal.Disease    YES    30
## 9    DOWNS Periodontal.Disease    YES    21
## 10      ID Periodontal.Disease    YES    61
## 11  AUTISM              Trauma     NO   109
## 12 CONTROL              Trauma     NO    82
## 13      CP              Trauma     NO    70
## 14   DOWNS              Trauma     NO    35
## 15      ID              Trauma     NO   124
## 16  AUTISM              Trauma    YES    21
## 17 CONTROL              Trauma    YES    22
## 18      CP              Trauma    YES    22
## 19   DOWNS              Trauma    YES    27
## 20      ID              Trauma    YES    63

difference <- function(VARIABLE, REFERENCE) {
  table1 <- Totals.plot[Totals.plot$Diagnosis == VARIABLE,]
  table1.sub <- table1[,-1*which(colnames(table1) == "Diagnosis")]
  table1.reshape <- table1.sub %>% spread(ANSWER, Total)
  table1.glm <- glm(cbind(YES, NO) ~ Group.1, family = binomial(), data = table1.reshape)
  print(anova(table1.glm, test="Chisq"))
  print("")
  print("LSMEANS")
  print(table1.lsm <- lsmeans(table1.glm, "Group.1"))
  print(plot(table1.lsm,
             main = gsub("\\.", " ", VARIABLE)))
  print(contrast(table1.lsm, "del.eff"))
  reference <- which(levels(Totals.plot$Group.1) == REFERENCE)
  print(contrast(table1.lsm, "trt.vs.ctrl", ref=reference))
}

################################################
##############    Example    ###################
################################################

  table1 <- Totals.plot[Totals.plot$Diagnosis == "Attrition",]
  table1.sub <- table1[,-1*which(colnames(table1) == "Diagnosis")]
  table1.reshape <- table1.sub %>% spread(ANSWER, Total)
  table1.glm <- glm(cbind(YES, NO) ~ Group.1, family = binomial(), data = table1.reshape)
  print(anova(table1.glm, test="Chisq"))

## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: cbind(YES, NO)
## 
## Terms added sequentially (first to last)
## 
## 
##         Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                        4     37.318              
## Group.1  4   37.318         0      0.000 1.549e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

  print(table1.lsm <- lsmeans(table1.glm, "Group.1"))

##  Group.1      lsmean        SE df   asymp.LCL  asymp.UCL
##  AUTISM   0.18514243 0.1761637 NA -0.16013213  0.5304170
##  CONTROL -1.15057203 0.2294738 NA -1.60033242 -0.7008116
##  CP       0.04348511 0.2085637 NA -0.36529223  0.4522625
##  DOWNS   -0.12921173 0.2545305 NA -0.62808240  0.3696589
##  ID       0.37879686 0.1488855 NA  0.08698657  0.6706072
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95

  print(plot(table1.lsm,
             main = gsub("\\.", " ", "Attrition")))

  print(contrast(table1.lsm, "del.eff"))

##  contrast           estimate        SE df z.ratio p.value
##  AUTISM effect   0.399517880 0.2061020 NA   1.938  0.0876
##  CONTROL effect -1.270125196 0.2505016 NA  -5.070  <.0001
##  CP effect       0.222446228 0.2327318 NA   0.956  0.4240
##  DOWNS effect    0.006575174 0.2722511 NA   0.024  0.9807
##  ID effect       0.641585915 0.1848389 NA   3.471  0.0013
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: fdr method for 5 tests

  reference <- which(levels(Totals.plot$Group.1) == "CONTROL")
  print(contrast(table1.lsm, "trt.vs.ctrl", ref=reference))

##  contrast         estimate        SE df z.ratio p.value
##  AUTISM - CONTROL 1.335714 0.2892955 NA   4.617  <.0001
##  CP - CONTROL     1.194057 0.3100920 NA   3.851  0.0005
##  DOWNS - CONTROL  1.021360 0.3427011 NA   2.980  0.0108
##  ID - CONTROL     1.529369 0.2735418 NA   5.591  <.0001
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: dunnettx method for 4 tests

  regrid(table1.lsm)

##  Group.1      prob         SE df asymp.LCL asymp.UCL
##  AUTISM  0.5461538 0.04366567 NA 0.4605707 0.6317370
##  CONTROL 0.2403846 0.04190188 NA 0.1582584 0.3225108
##  CP      0.5108696 0.05211628 NA 0.4087235 0.6130156
##  DOWNS   0.4677419 0.06336777 NA 0.3435434 0.5919405
##  ID      0.5935829 0.03591748 NA 0.5231859 0.6639799
## 
## Confidence level used: 0.95

################################################
##############    Example    ###################
################################################
  
# table1 <- Totals.plot[Totals.plot$Diagnosis == "Trabecular.Pattern",]
# table1.sub <- table1[,-1*which(colnames(table1) == "Diagnosis")]
# table1.reshape <- table1.sub %>% spread(ANSWER, Total)
# table1.reshape
difference("Periodontal.Disease", REFERENCE = "AUTISM")

## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: cbind(YES, NO)
## 
## Terms added sequentially (first to last)
## 
## 
##         Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                        4     35.178             
## Group.1  4   35.178         0      0.000 4.27e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] ""
## [1] "LSMEANS"
##  Group.1     lsmean        SE df asymp.LCL  asymp.UCL
##  AUTISM  -2.3812282 0.3151388 NA -2.998889 -1.7635676
##  CONTROL -0.7661723 0.2106835 NA -1.179104 -0.3532402
##  CP      -0.7259370 0.2224014 NA -1.161836 -0.2900383
##  DOWNS   -0.6690496 0.2683455 NA -1.194997 -0.1431022
##  ID      -0.7254080 0.1559806 NA -1.031124 -0.4196917
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95

##  contrast         estimate        SE df z.ratio p.value
##  AUTISM effect  -1.6595865 0.3334655 NA  -4.977  <.0001
##  CONTROL effect  0.3592334 0.2443505 NA   1.470  0.1415
##  CP effect       0.4095275 0.2538992 NA   1.613  0.1334
##  DOWNS effect    0.4806368 0.2925792 NA   1.643  0.1334
##  ID effect       0.4101887 0.2022453 NA   2.028  0.1064
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: fdr method for 5 tests 
##  contrast         estimate        SE df z.ratio p.value
##  CONTROL - AUTISM 1.615056 0.3790778 NA   4.260  0.0001
##  CP - AUTISM      1.655291 0.3857134 NA   4.292  0.0001
##  DOWNS - AUTISM   1.712179 0.4139103 NA   4.137  0.0001
##  ID - AUTISM      1.655820 0.3516282 NA   4.709  <.0001
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: dunnettx method for 4 tests

difference("Trauma", REFERENCE = "CONTROL")

## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: cbind(YES, NO)
## 
## Terms added sequentially (first to last)
## 
## 
##         Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                        4      22.85              
## Group.1  4    22.85         0       0.00 0.0001356 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] ""
## [1] "LSMEANS"
##  Group.1     lsmean        SE df  asymp.LCL  asymp.UCL
##  AUTISM  -1.6468254 0.2383136 NA -2.1139115 -1.1797394
##  CONTROL -1.3156768 0.2401035 NA -1.7862709 -0.8450827
##  CP      -1.1574528 0.2444182 NA -1.6365037 -0.6784019
##  DOWNS   -0.2595112 0.2561415 NA -0.7615393  0.2425169
##  ID      -0.6771468 0.1547176 NA -0.9803877 -0.3739059
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95

##  contrast         estimate        SE df z.ratio p.value
##  AUTISM effect  -0.7943785 0.2640584 NA  -3.008  0.0066
##  CONTROL effect -0.3804427 0.2655742 NA  -1.433  0.1900
##  CP effect      -0.1826627 0.2692389 NA  -0.678  0.4975
##  DOWNS effect    0.9397643 0.2792687 NA   3.365  0.0038
##  ID effect       0.4177197 0.1972930 NA   2.117  0.0571
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: fdr method for 5 tests 
##  contrast           estimate        SE df z.ratio p.value
##  AUTISM - CONTROL -0.3311487 0.3382943 NA  -0.979  0.6967
##  CP - CONTROL      0.1582240 0.3426221 NA   0.462  0.9430
##  DOWNS - CONTROL   1.0561656 0.3510814 NA   3.008  0.0099
##  ID - CONTROL      0.6385300 0.2856347 NA   2.235  0.0864
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: dunnettx method for 4 tests

difference("Dental.Spacing", REFERENCE = "CONTROL")

## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: cbind(YES, NO)
## 
## Terms added sequentially (first to last)
## 
## 
##         Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                        4      64.61              
## Group.1  4    64.61         0       0.00 3.109e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] ""
## [1] "LSMEANS"
##  Group.1     lsmean           SE df     asymp.LCL     asymp.UCL
##  AUTISM   -0.704708 1.864137e-01 NA -1.070072e+00 -3.393439e-01
##  CONTROL  -2.356652 3.487656e-01 NA -3.040220e+00 -1.673084e+00
##  CP      -27.228887 5.171739e+04 NA -1.013915e+05  1.013370e+05
##  DOWNS    -1.326871 3.119796e-01 NA -1.938340e+00 -7.154023e-01
##  ID       -2.122262 2.366179e-01 NA -2.586024e+00 -1.658499e+00
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95

##  contrast         estimate       SE df z.ratio p.value
##  AUTISM effect    7.553960 12929.35 NA   0.001  0.9997
##  CONTROL effect   5.489030 12929.35 NA   0.000  0.9997
##  CP effect      -25.601264 51717.39 NA   0.000  0.9997
##  DOWNS effect     6.776256 12929.35 NA   0.001  0.9997
##  ID effect        5.782018 12929.35 NA   0.000  0.9997
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: fdr method for 5 tests 
##  contrast            estimate           SE df z.ratio p.value
##  AUTISM - CONTROL   1.6519443 3.954586e-01 NA   4.177  0.0001
##  CP - CONTROL     -24.8722351 5.171739e+04 NA   0.000  1.0000
##  DOWNS - CONTROL    1.0297814 4.679409e-01 NA   2.201  0.0939
##  ID - CONTROL       0.2343908 4.214563e-01 NA   0.556  0.9121
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: dunnettx method for 4 tests

difference("Missing.Teeth", REFERENCE = "CONTROL")

## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: cbind(YES, NO)
## 
## Terms added sequentially (first to last)
## 
## 
##         Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                        4     65.629              
## Group.1  4   65.629         0      0.000 1.897e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] ""
## [1] "LSMEANS"
##  Group.1     lsmean        SE df  asymp.LCL  asymp.UCL
##  AUTISM  -2.4849066 0.3291403 NA -3.1300097 -1.8398036
##  CONTROL -0.5520686 0.2036352 NA -0.9511863 -0.1529509
##  CP      -1.5581446 0.2750598 NA -2.0972519 -1.0190373
##  DOWNS    0.2595112 0.2561415 NA -0.2425169  0.7615393
##  ID      -0.7993694 0.1580927 NA -1.1092255 -0.4895133
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95

##  contrast         estimate        SE df z.ratio p.value
##  AUTISM effect  -1.8223888 0.3483048 NA  -5.232  <.0001
##  CONTROL effect  0.5936588 0.2421350 NA   2.452  0.0237
##  CP effect      -0.6639363 0.3011363 NA  -2.205  0.0343
##  DOWNS effect    1.6081335 0.2850644 NA   5.641  <.0001
##  ID effect       0.2845328 0.2078100 NA   1.369  0.1709
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: fdr method for 5 tests 
##  contrast           estimate        SE df z.ratio p.value
##  AUTISM - CONTROL -1.9328381 0.3870408 NA  -4.994  <.0001
##  CP - CONTROL     -1.0060760 0.3422356 NA  -2.940  0.0123
##  DOWNS - CONTROL   0.8115798 0.3272243 NA   2.480  0.0465
##  ID - CONTROL     -0.2473008 0.2577996 NA  -0.959  0.7087
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: dunnettx method for 4 tests

difference("Attrition", REFERENCE = "CONTROL")

## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: cbind(YES, NO)
## 
## Terms added sequentially (first to last)
## 
## 
##         Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                        4     37.318              
## Group.1  4   37.318         0      0.000 1.549e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] ""
## [1] "LSMEANS"
##  Group.1      lsmean        SE df   asymp.LCL  asymp.UCL
##  AUTISM   0.18514243 0.1761637 NA -0.16013213  0.5304170
##  CONTROL -1.15057203 0.2294738 NA -1.60033242 -0.7008116
##  CP       0.04348511 0.2085637 NA -0.36529223  0.4522625
##  DOWNS   -0.12921173 0.2545305 NA -0.62808240  0.3696589
##  ID       0.37879686 0.1488855 NA  0.08698657  0.6706072
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95

##  contrast           estimate        SE df z.ratio p.value
##  AUTISM effect   0.399517880 0.2061020 NA   1.938  0.0876
##  CONTROL effect -1.270125196 0.2505016 NA  -5.070  <.0001
##  CP effect       0.222446228 0.2327318 NA   0.956  0.4240
##  DOWNS effect    0.006575174 0.2722511 NA   0.024  0.9807
##  ID effect       0.641585915 0.1848389 NA   3.471  0.0013
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: fdr method for 5 tests 
##  contrast         estimate        SE df z.ratio p.value
##  AUTISM - CONTROL 1.335714 0.2892955 NA   4.617  <.0001
##  CP - CONTROL     1.194057 0.3100920 NA   3.851  0.0005
##  DOWNS - CONTROL  1.021360 0.3427011 NA   2.980  0.0108
##  ID - CONTROL     1.529369 0.2735418 NA   5.591  <.0001
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: dunnettx method for 4 tests

difference("Partial.Diagnosis", REFERENCE = "CONTROL")

## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: cbind(YES, NO)
## 
## Terms added sequentially (first to last)
## 
## 
##         Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                        4      63.16              
## Group.1  4    63.16         0       0.00 6.281e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] ""
## [1] "LSMEANS"
##  Group.1     lsmean        SE df  asymp.LCL   asymp.UCL
##  AUTISM  -0.9597758 0.1960002 NA -1.3439292 -0.57562252
##  CONTROL -3.9318256 0.7140055 NA -5.3312508 -2.53240049
##  CP      -0.3513979 0.2117411 NA -0.7664029  0.06360711
##  DOWNS   -1.5339304 0.3324409 NA -2.1855025 -0.88235822
##  ID      -0.7498425 0.1566547 NA -1.0568800 -0.44280503
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95

##  contrast          estimate        SE df z.ratio p.value
##  AUTISM effect   0.68197326 0.2855207 NA   2.389  0.0211
##  CONTROL effect -3.03308898 0.7234972 NA  -4.192  0.0001
##  CP effect       1.44244570 0.2958702 NA   4.875  <.0001
##  DOWNS effect   -0.03571989 0.3861562 NA  -0.093  0.9263
##  ID effect       0.94438991 0.2617516 NA   3.608  0.0005
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: fdr method for 5 tests 
##  contrast         estimate        SE df z.ratio p.value
##  AUTISM - CONTROL 2.972050 0.7404188 NA   4.014  0.0002
##  CP - CONTROL     3.580428 0.7447404 NA   4.808  <.0001
##  DOWNS - CONTROL  2.397895 0.7876045 NA   3.045  0.0088
##  ID - CONTROL     3.181983 0.7309888 NA   4.353  0.0001
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: dunnettx method for 4 tests

# TB <- Totals.plot[Totals.plot$Diagnosis == "Trabecular.Pattern",]
# TB.sub <- TB[,-1*which(colnames(TB) == "Diagnosis")]
# TB.reshape <- TB.sub %>% spread(ANSWER, Total)
# TB.glm <- glm(cbind(DENSE, MIXED, NONE, SPARSE) ~ Group.1, family = binomial(, data = TB.reshape)
# print(anova(TB.glm))
# print(TB.lsm <- lsmeans(TB.glm, "Group.1"))
# print(plot(TB.lsm,
#            main = gsub("\\.", " ", VARIABLE)))
# contrast(TB.lsm, "del.eff")

Ran PCA on only binary data.

The matrix used and subsetted is the one below. This is just the first 10 lines but I subsetted your large table for this

datatable(head(statsBin), extensions = 'FixedColumns',
  options = list(
    dom = 't',
    scrollX = TRUE,
    fixedColumns = list(leftColumns = 2)
  )
)

### ALL DATA
logsvd_model = logisticSVD(statsBin, k = 4, max_iters = 2000)
logpca_cv = cv.lpca(statsBin, ks = 4, ms = 1:10)
logpca_model = logisticPCA(statsBin, k = 4, m = which.min(logpca_cv))
clogpca_model = convexLogisticPCA(statsBin, k = 4, m = which.min(logpca_cv))

## ALL Together
Diagnosis <- rownames(statsBin)

plot(logsvd_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Exponential Family PCA") + 
  scale_colour_manual(values = c("blue", "red", "green", "violet"))

plot(logpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Logistic PCA") + 
  scale_colour_manual(values = c("blue", "red", "green", "violet"))

plot(clogpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Convex Logistic PCA") + 
  scale_colour_manual(values = c("blue", "red", "green", "violet"))

### Factor Out Data
ID     <- which(rownames(statsBin) == "ID")
AUTISM <- which(rownames(statsBin) == "AUTISM")
DOWNS  <- which(rownames(statsBin) == "DOWNS")
CP     <- which(rownames(statsBin) == "CP")


### ID vs AUTISM
IDvAUTISM <- statsBin[c(ID, AUTISM),]

logsvd_model = logisticSVD(IDvAUTISM, k = 2, max_iters = 2000)
logpca_cv = cv.lpca(IDvAUTISM, ks = 2, ms = 1:10)

logpca_model = logisticPCA(IDvAUTISM, k = 2, m = which.min(logpca_cv))
clogpca_model = convexLogisticPCA(IDvAUTISM, k = 2, m = which.min(logpca_cv))

# plot(clogpca_model, type = "trace")
# plot(logsvd_model, type = "trace")

Diagnosis <- rownames(IDvAUTISM)

plot(logsvd_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Exponential Family PCA") + 
  scale_colour_manual(values = c("blue", "red"))

plot(logpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Logistic PCA") + 
  scale_colour_manual(values = c("blue", "red"))

plot(clogpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Convex Logistic PCA") + 
  scale_colour_manual(values = c("blue", "red"))

### ID vs DOWNS
IDvDOWNS <- statsBin[c(ID, DOWNS),]

logsvd_model = logisticSVD(IDvDOWNS, k = 2, max_iters = 2000)
logpca_cv = cv.lpca(IDvDOWNS, ks = 2, ms = 1:10)

logpca_model = logisticPCA(IDvDOWNS, k = 2, m = which.min(logpca_cv))
clogpca_model = convexLogisticPCA(IDvDOWNS, k = 2, m = which.min(logpca_cv))

# plot(clogpca_model, type = "trace")
# plot(logsvd_model, type = "trace")

Diagnosis <- rownames(IDvDOWNS)

plot(logsvd_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Exponential Family PCA") + 
  scale_colour_manual(values = c("blue", "green"))

plot(logpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Logistic PCA") + 
  scale_colour_manual(values = c("blue", "green"))

plot(clogpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Convex Logistic PCA") + 
  scale_colour_manual(values = c("blue", "green"))

### ID vs CP
IDvCP <- statsBin[c(ID, CP),]

logsvd_model = logisticSVD(IDvCP, k = 2, max_iters = 2000)
logpca_cv = cv.lpca(IDvCP, ks = 2, ms = 1:10)

logpca_model = logisticPCA(IDvCP, k = 2, m = which.min(logpca_cv))
clogpca_model = convexLogisticPCA(IDvCP, k = 2, m = which.min(logpca_cv))

# plot(clogpca_model, type = "trace")
# plot(logsvd_model, type = "trace")

Diagnosis <- rownames(IDvCP)

plot(logsvd_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Exponential Family PCA") + 
  scale_colour_manual(values = c("blue", "violet"))

plot(logpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Logistic PCA") + 
  scale_colour_manual(values = c("blue", "violet"))

plot(clogpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Convex Logistic PCA") + 
  scale_colour_manual(values = c("blue", "violet"))

### AUTISM vs DOWNS
AUTISMvDOWNS <- statsBin[c(AUTISM, DOWNS),]

logsvd_model = logisticSVD(AUTISMvDOWNS, k = 2, max_iters = 2000)
logpca_cv = cv.lpca(AUTISMvDOWNS, ks = 2, ms = 1:10)

logpca_model = logisticPCA(AUTISMvDOWNS, k = 2, m = which.min(logpca_cv))
clogpca_model = convexLogisticPCA(AUTISMvDOWNS, k = 2, m = which.min(logpca_cv))

# plot(clogpca_model, type = "trace")
# plot(logsvd_model, type = "trace")

Diagnosis <- rownames(AUTISMvDOWNS)

plot(logsvd_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Exponential Family PCA") + 
  scale_colour_manual(values = c("red", "green"))

plot(logpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Logistic PCA") + 
  scale_colour_manual(values = c("red", "green"))

plot(clogpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Convex Logistic PCA") + 
  scale_colour_manual(values = c("red", "green"))

### AUTISM vs CP
AUTISMvCP <- statsBin[c(AUTISM, CP),]

logsvd_model = logisticSVD(AUTISMvCP, k = 2, max_iters = 2000)
logpca_cv = cv.lpca(AUTISMvCP, ks = 2, ms = 1:10)

logpca_model = logisticPCA(AUTISMvCP, k = 2, m = which.min(logpca_cv))
clogpca_model = convexLogisticPCA(AUTISMvCP, k = 2, m = which.min(logpca_cv))

# plot(clogpca_model, type = "trace")
# plot(logsvd_model, type = "trace")

Diagnosis <- rownames(AUTISMvCP)

plot(logsvd_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Exponential Family PCA") + 
  scale_colour_manual(values = c("red", "violet"))

plot(logpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Logistic PCA") + 
  scale_colour_manual(values = c("red", "violet"))

plot(clogpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Convex Logistic PCA") + 
  scale_colour_manual(values = c("red", "violet"))

### DOWNS vs CP
DOWNSvCP <- statsBin[c(DOWNS, CP),]

logsvd_model = logisticSVD(DOWNSvCP, k = 2, max_iters = 2000)
logpca_cv = cv.lpca(DOWNSvCP, ks = 2, ms = 1:10)

logpca_model = logisticPCA(DOWNSvCP, k = 2, m = which.min(logpca_cv))
clogpca_model = convexLogisticPCA(DOWNSvCP, k = 2, m = which.min(logpca_cv))

# plot(clogpca_model, type = "trace")
# plot(logsvd_model, type = "trace")

Diagnosis <- rownames(DOWNSvCP)

plot(logsvd_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Exponential Family PCA") + 
  scale_colour_manual(values = c("green", "violet"))

plot(logpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Logistic PCA") + 
  scale_colour_manual(values = c("green", "violet"))

plot(clogpca_model, type = "scores") + 
  geom_point(aes(colour = Diagnosis)) + 
  ggtitle("Convex Logistic PCA") + 
  scale_colour_manual(values = c("green", "violet"))

Statistics

Alex Chitsazan

March 29, 2017

Set up

Summarize Data

Ran PCA on only binary data.