library(RODBC)
library(mongolite)
library(knitr)
library(psych)
library(kableExtra)
library(stringr)
library(dplyr)
library(tidyr)
library(scales)
library(ggplot2)
library(plotly)
library(maps)
library(mapdata)
library(ggrepel) #not using this at the moment, but it does give the option to add labels.  While not useful for the 

options(knitr.table.format = "html")
mbreast <- mongo("breast")
mdigothr <- mongo("digothr")
mmalegen <- mongo("malegen")
mfemgen <- mongo("femgent")
mother <- mongo("other")
mrespir <- mongo("respir")
mcolrect <- mongo("colrect")
mlymyleuk <- mongo("lymyleuk")
murinary <- mongo("urinary")
breastDF <- mbreast$find(
  query = '{"birthYear" : { "$gt" : 1979 }, "ageDiagnosis" : { "$gt" : 19 }, "survivalMonths" : { "$lt" : 9999 } }', 
  fields = '{ "ageDiagnosis" : true, "yearDiagnosis" : true, "survivalMonths" : true, "_id" : false }')
digothrDF <- mdigothr$find(
  query = '{"birthYear" : { "$gt" : 1979 }, "ageDiagnosis" : { "$gt" : 19 }, "survivalMonths" : { "$lt" : 9999 } }', 
  fields = '{ "ageDiagnosis" : true, "yearDiagnosis" : true, "survivalMonths" : true, "_id" : false }')
malegenDF <- mmalegen$find(
  query = '{"birthYear" : { "$gt" : 1979 }, "ageDiagnosis" : { "$gt" : 19 }, "survivalMonths" : { "$lt" : 9999 } }', 
  fields = '{ "ageDiagnosis" : true, "yearDiagnosis" : true, "survivalMonths" : true, "_id" : false }')
femgenDF <- mfemgen$find(
  query = '{"birthYear" : { "$gt" : 1979 }, "ageDiagnosis" : { "$gt" : 19 }, "survivalMonths" : { "$lt" : 9999 } }', 
  fields = '{ "ageDiagnosis" : true, "yearDiagnosis" : true, "survivalMonths" : true, "_id" : false }')
otherDF <- mother$find(
  query = '{"birthYear" : { "$gt" : 1979 }, "ageDiagnosis" : { "$gt" : 19 }, "survivalMonths" : { "$lt" : 9999 } }', 
  fields = '{ "ageDiagnosis" : true, "yearDiagnosis" : true, "survivalMonths" : true, "_id" : false }')
respirDF <- mrespir$find(
  query = '{"birthYear" : { "$gt" : 1979 }, "ageDiagnosis" : { "$gt" : 19 }, "survivalMonths" : { "$lt" : 9999 } }', 
  fields = '{ "ageDiagnosis" : true, "yearDiagnosis" : true, "survivalMonths" : true, "_id" : false }')
colrectDF <- mcolrect$find(
  query = '{"birthYear" : { "$gt" : 1979 }, "ageDiagnosis" : { "$gt" : 19 }, "survivalMonths" : { "$lt" : 9999 } }', 
  fields = '{ "ageDiagnosis" : true, "yearDiagnosis" : true, "survivalMonths" : true, "_id" : false }')
lymyleukDF <- mlymyleuk$find(
  query = '{"birthYear" : { "$gt" : 1979 }, "ageDiagnosis" : { "$gt" : 19 }, "survivalMonths" : { "$lt" : 9999 } }', 
  fields = '{ "ageDiagnosis" : true, "yearDiagnosis" : true, "survivalMonths" : true, "_id" : false }')
urinaryDF <- murinary$find(
  query = '{"birthYear" : { "$gt" : 1979 }, "ageDiagnosis" : { "$gt" : 19 }, "survivalMonths" : { "$lt" : 9999 } }', 
  fields = '{ "ageDiagnosis" : true, "yearDiagnosis" : true, "survivalMonths" : true, "_id" : false }')

breastDF <- na.omit(breastDF)
digothrDF <- na.omit(digothrDF)
malegenDF <- na.omit(malegenDF)
femgenDF <- na.omit(femgenDF)
otherDF <- na.omit(otherDF)
respirDF <- na.omit(respirDF)
colrectDF <- na.omit(colrectDF)
lymyleukDF <- na.omit(lymyleukDF)
urinaryDF <- na.omit(urinaryDF)

nrow(breastDF)
## [1] 6187
nrow(digothrDF)
## [1] 2751
nrow(malegenDF)
## [1] 10007
nrow(femgenDF)
## [1] 8094
nrow(otherDF)
## [1] 47067
nrow(respirDF)
## [1] 1312
nrow(colrectDF)
## [1] 3373
nrow(lymyleukDF)
## [1] 15810
nrow(urinaryDF)
## [1] 2071
breastDF <- mutate(breastDF,survivalYears = survivalMonths/12, currentYear = survivalYears + yearDiagnosis)
digothrDF <- mutate(digothrDF,survivalYears = survivalMonths/12, currentYear = survivalYears + yearDiagnosis)
malegenDF <- mutate(malegenDF,survivalYears = survivalMonths/12, currentYear = survivalYears + yearDiagnosis)
femgenDF <- mutate(femgenDF,survivalYears = survivalMonths/12, currentYear = survivalYears + yearDiagnosis)
otherDF <- mutate(otherDF,survivalYears = survivalMonths/12, currentYear = survivalYears + yearDiagnosis)
respirDF <- mutate(respirDF,survivalYears = survivalMonths/12, currentYear = survivalYears + yearDiagnosis)
colrectDF <- mutate(colrectDF,survivalYears = survivalMonths/12, currentYear = survivalYears + yearDiagnosis)
lymyleukDF <- mutate(lymyleukDF,survivalYears = survivalMonths/12, currentYear = survivalYears + yearDiagnosis)
urinaryDF <- mutate(urinaryDF,survivalYears = survivalMonths/12, currentYear = survivalYears + yearDiagnosis)

breastDF <- breastDF[ which(breastDF$currentYear < 2016), ]
digothrDF <- digothrDF[ which(digothrDF$currentYear < 2016), ]
malegenDF <- malegenDF[ which(malegenDF$currentYear < 2016), ]
femgenDF <- femgenDF[ which(femgenDF$currentYear < 2016), ]
otherDF <- otherDF[ which(otherDF$currentYear < 2016), ]
respirDF <- respirDF[ which(respirDF$currentYear < 2016), ]
colrectDF <- colrectDF[ which(colrectDF$currentYear < 2016), ]
lymyleukDF <- lymyleukDF[ which(lymyleukDF$currentYear < 2016), ]
urinaryDF <- urinaryDF[ which(urinaryDF$currentYear < 2016), ]
plot_ss <- function(x, y, maintitle, showSquares = FALSE, leastSquares = FALSE){
  plot(x,y,xlab="Diagnosis Year", ylab = "Survival Years", main = maintitle)

  if(leastSquares){
    m1 <- lm(y~x)
    y.hat <- m1$fit
  } else{
    pt1 <- locator(1)
    points(pt1$x, pt1$y, pch = 4)
    pt2 <- locator(1)
    points(pt2$x, pt2$y, pch = 4)
    pts <- data.frame("x" = c(pt1$x, pt2$x),"y" = c(pt1$y, pt2$y))
    m1 <- lm(y ~ x, data = pts)
    y.hat <- predict(m1, newdata = data.frame(x))
  }
  r <- y - y.hat
  abline(m1)

  oSide <- x - r
  LLim <- par()$usr[1]
  RLim <- par()$usr[2]
  oSide[oSide < LLim | oSide > RLim] <- c(x + r)[oSide < LLim | oSide > RLim] # move boxes to avoid margins

  n <- length(y.hat)
  for(i in 1:n){
    lines(rep(x[i], 2), c(y[i], y.hat[i]), lty = 2, col = "blue")
    if(showSquares){
    lines(rep(oSide[i], 2), c(y[i], y.hat[i]), lty = 3, col = "orange")
    lines(c(oSide[i], x[i]), rep(y.hat[i],2), lty = 3, col = "orange")
    lines(c(oSide[i], x[i]), rep(y[i],2), lty = 3, col = "orange")
    }
  }

}

summaryTable <- function(cancerType,maintitle = ""){
  survivalYears = cancerType$survivalYears
  yearDiagnosis = cancerType$yearDiagnosis
  meanTable <- tapply(survivalYears,yearDiagnosis,mean)
  show(nrow(cancerType))
  show(describeBy(survivalYears, group = yearDiagnosis, mat=TRUE))
  barplot(meanTable,beside=T,col=c("#ee7700","#3333ff")
    ,main=maintitle,xlab="Diagnosis Year",ylab="Survival Years")
}


inferenceTests <- function(cancerType, maintitle = "") {
  yearDiagnosis <- cancerType$yearDiagnosis
  survivalYears <- cancerType$survivalYears
  plot_ss(x = yearDiagnosis, y = survivalYears, maintitle, showSquares = FALSE)
  m2 <- lm(survivalYears ~ yearDiagnosis, data = cancerType)
  summary(m2)
}

inferenceTest0 <- function(cancerType) {
  m2 <- lm(survivalYears ~ yearDiagnosis, data = cancerType)
  hist(m2$residuals)
  qqnorm(m2$residuals)
  qqline(m2$residuals)  
}
## [1] 6187
##      item group1 vars    n       mean        sd     median    trimmed
## X11     1   2000    1    1  4.8333333        NA  4.8333333  4.8333333
## X12     2   2001    1    4 11.2500000 6.4481551 14.3750000 11.2500000
## X13     3   2002    1    8  8.7187500 4.0362783  8.1666667  8.7187500
## X14     4   2003    1   25  8.4800000 4.8088629 11.7500000  8.8015873
## X15     5   2004    1   36  9.0000000 3.7311431 11.1250000  9.4750000
## X16     6   2005    1   52  7.4535256 4.0209965 10.0833333  7.9047619
## X17     7   2006    1  110  7.3901515 2.9913427  9.0833333  7.8399621
## X18     8   2007    1  167  6.7085828 2.5418827  8.0833333  7.1432099
## X19     9   2008    1  213  6.1799687 2.1934915  7.1666667  6.6033138
## X110   10   2009    1  294  5.2814626 1.9273094  6.1250000  5.6295904
## X111   11   2010    1  380  4.6989035 1.4272917  5.1666667  5.0052083
## X112   12   2011    1  542  3.8831488 1.0787816  4.2500000  4.1027266
## X113   13   2012    1  742  3.0940027 0.7670125  3.2500000  3.2330247
## X114   14   2013    1  922  2.1900759 0.6302859  2.3333333  2.3020551
## X115   15   2014    1 1204  1.3306340 0.4141566  1.3333333  1.3723202
## X116   16   2015    1 1487  0.4263058 0.2864006  0.4166667  0.4208648
##           mad       min        max      range        skew   kurtosis
## X11  0.000000 4.8333333  4.8333333  0.0000000          NA         NA
## X12  0.370650 1.5833333 14.6666667 13.0833333 -0.74743079 -1.6892610
## X13  5.992175 3.3333333 13.6666667 10.3333333  0.02933688 -1.8303679
## X14  1.606150 0.6666667 12.9166667 12.2500000 -0.57176243 -1.5495128
## X15  0.926625 1.1666667 11.9166667 10.7500000 -1.00612693 -0.6902704
## X16  1.111950 0.0000000 10.9166667 10.9166667 -0.72250143 -1.2674157
## X17  0.988400 0.4166667  9.9166667  9.5000000 -1.07570346 -0.4758606
## X18  0.741300 0.0000000  8.9166667  8.9166667 -1.27574746  0.2345301
## X19  0.741300 0.0000000  7.9166667  7.9166667 -1.49460194  0.8460625
## X110 0.864850 0.0000000  6.9166667  6.9166667 -1.37294026  0.5221796
## X111 0.617750 0.0000000  5.9166667  5.9166667 -1.79145247  2.1844618
## X112 0.494200 0.0000000  4.9166667  4.9166667 -1.82058148  2.6078321
## X113 0.494200 0.0000000  3.9166667  3.9166667 -1.82604429  3.3642527
## X114 0.370650 0.0000000  2.9166667  2.9166667 -1.67463537  2.6260787
## X115 0.370650 0.0000000  1.9166667  1.9166667 -0.97996611  1.2579829
## X116 0.370650 0.0000000  0.9166667  0.9166667  0.12436243 -1.1749750
##               se
## X11           NA
## X12  3.224077531
## X13  1.427039885
## X14  0.961772573
## X15  0.621857177
## X16  0.557611894
## X17  0.285213333
## X18  0.196696787
## X19  0.150295562
## X110 0.112402967
## X111 0.073218520
## X112 0.046337637
## X113 0.028157915
## X114 0.020757359
## X115 0.011935794
## X116 0.007427086

## [1] 2751
##      item group1 vars   n      mean        sd    median   trimmed      mad
## X11     1   2000    1   1 0.0000000        NA 0.0000000 0.0000000 0.000000
## X12     2   2001    1   4 7.0416667 7.0120597 6.7083333 7.0416667 8.401400
## X13     3   2002    1  25 6.5033333 6.0540811 2.5000000 6.4206349 3.335850
## X14     4   2003    1  18 3.3796296 5.0072025 0.9583333 2.9947917 1.050175
## X15     5   2004    1  40 5.0479167 4.9439842 2.0416667 4.8463542 2.903425
## X16     6   2005    1  54 4.3503086 4.2719522 1.7083333 4.1174242 2.285675
## X17     7   2006    1  74 5.0078829 4.0398078 4.7083333 5.0291667 6.486375
## X18     8   2007    1 112 4.7328869 3.6154133 4.4583333 4.8046296 5.745075
## X19     9   2008    1 127 4.2749344 3.0771301 4.7500000 4.3495146 4.077150
## X110   10   2009    1 161 3.8995859 2.7140344 5.2500000 4.0109819 2.223900
## X111   11   2010    1 195 3.3089744 2.2985747 4.3333333 3.4007431 2.223900
## X112   12   2011    1 240 2.8833333 1.7975589 3.9166667 2.9913194 1.235500
## X113   13   2012    1 336 2.2418155 1.3598404 2.9583333 2.3120370 1.173725
## X114   14   2013    1 357 1.6979458 0.9852583 2.0833333 1.7572590 0.864850
## X115   15   2014    1 482 1.0937068 0.5643574 1.1666667 1.1247841 0.617750
## X116   16   2015    1 525 0.3979365 0.2896714 0.3333333 0.3873713 0.370650
##             min        max      range        skew   kurtosis         se
## X11  0.00000000  0.0000000  0.0000000          NA         NA         NA
## X12  0.50000000 14.2500000 13.7500000  0.03045457 -2.3851378 3.50602986
## X13  0.00000000 13.8333333 13.8333333  0.21167918 -1.9628543 1.21081622
## X14  0.08333333 12.8333333 12.7500000  1.18568524 -0.5526444 1.18020895
## X15  0.00000000 11.8333333 11.8333333  0.33422363 -1.7918197 0.78171254
## X16  0.00000000 10.9166667 10.9166667  0.42874584 -1.6643959 0.58133906
## X17  0.00000000  9.8333333  9.8333333 -0.00374778 -1.7948115 0.46961812
## X18  0.00000000  8.9166667  8.9166667 -0.07806678 -1.8345818 0.34162445
## X19  0.00000000  7.9166667  7.9166667 -0.15853819 -1.7474296 0.27305114
## X110 0.00000000  6.9166667  6.9166667 -0.33537151 -1.6899573 0.21389587
## X111 0.00000000  5.9166667  5.9166667 -0.30473273 -1.6849115 0.16460435
## X112 0.00000000  4.9166667  4.9166667 -0.45477198 -1.5115634 0.11603193
## X113 0.00000000  3.9166667  3.9166667 -0.40991629 -1.4414820 0.07418537
## X114 0.00000000  2.9166667  2.9166667 -0.53315576 -1.2703405 0.05214540
## X115 0.00000000  1.9166667  1.9166667 -0.47500423 -0.8386924 0.02570577
## X116 0.00000000  0.9166667  0.9166667  0.21437922 -1.2163451 0.01264230

## [1] 10007
##      item group1 vars    n       mean        sd     median    trimmed
## X11     1   2000    1   15 14.9222222 0.5321664 15.0833333 14.9487179
## X12     2   2001    1   63 13.0436508 3.4531509 14.2500000 13.9967320
## X13     3   2002    1  122 11.8422131 3.7454565 13.1666667 12.9030612
## X14     4   2003    1  174 10.9631226 3.3284823 12.1666667 11.8619048
## X15     5   2004    1  259  9.9350064 3.3234692 11.2500000 10.8197767
## X16     6   2005    1  332  8.9879518 3.0375609 10.2500000  9.7453008
## X17     7   2006    1  414  8.1050725 2.6939117  9.1666667  8.7735944
## X18     8   2007    1  548  7.2718978 2.4084870  8.2500000  7.8532197
## X19     9   2008    1  614  6.5605320 1.9686920  7.2500000  7.0628388
## X110   10   2009    1  754  5.7393899 1.5529718  6.2500000  6.1207230
## X111   11   2010    1  863  4.7932599 1.4422518  5.2500000  5.1431500
## X112   12   2011    1  929  3.9175637 1.1610983  4.3333333  4.1814318
## X113   13   2012    1 1050  3.0260317 0.9452198  3.3333333  3.2305556
## X114   14   2013    1 1197  2.1032442 0.7801287  2.3333333  2.2367918
## X115   15   2014    1 1341  1.2644171 0.4978179  1.3333333  1.3258776
## X116   16   2015    1 1332  0.3999625 0.2912436  0.3333333  0.3892276
##          mad       min        max      range       skew   kurtosis
## X11  0.61775 13.916667 15.5833333  1.6666667 -0.4882215 -1.1731810
## X12  0.37065  1.166667 14.9166667 13.7500000 -2.5402678  5.0939138
## X13  0.61775  0.000000 13.9166667 13.9166667 -2.4144538  4.3800296
## X14  0.74130  0.000000 12.9166667 12.9166667 -2.2863806  3.8959040
## X15  0.49420  0.000000 11.9166667 11.9166667 -2.2208680  3.3923638
## X16  0.61775  0.000000 10.9166667 10.9166667 -1.9733311  2.3827282
## X17  0.74130  0.000000  9.9166667  9.9166667 -1.9963119  2.6150102
## X18  0.49420  0.000000  8.9166667  8.9166667 -1.9611846  2.4359881
## X19  0.61775  0.000000  7.9166667  7.9166667 -2.1699348  3.4885641
## X110 0.49420  0.000000  6.9166667  6.9166667 -2.4212024  5.1671078
## X111 0.49420  0.000000  5.9166667  5.9166667 -2.1380289  3.5604130
## X112 0.49420  0.000000  4.9166667  4.9166667 -2.0345731  3.3480521
## X113 0.49420  0.000000  3.9166667  3.9166667 -1.9333999  3.0314334
## X114 0.49420  0.000000  2.9166667  2.9166667 -1.4130029  1.0086906
## X115 0.49420  0.000000  1.9166667  1.9166667 -0.9610746  0.3508472
## X116 0.37065  0.000000  0.9166667  0.9166667  0.2137514 -1.1854673
##               se
## X11  0.137404769
## X12  0.435056120
## X13  0.339097698
## X14  0.252331591
## X15  0.206510325
## X16  0.166707812
## X17  0.132398536
## X18  0.102885464
## X19  0.079449942
## X110 0.056555897
## X111 0.049094821
## X112 0.038094401
## X113 0.029170116
## X114 0.022548579
## X115 0.013594277
## X116 0.007980024

## [1] 8094
##      item group1 vars    n       mean        sd     median    trimmed
## X11     1   2000    1   13 13.2820513 4.7093212 15.2500000 14.1666667
## X12     2   2001    1   47 12.3581560 4.0371507 14.1666667 13.1837607
## X13     3   2002    1   90 11.2657407 4.0349769 13.0416667 12.2013889
## X14     4   2003    1  144 11.2216435 3.1166951 12.3333333 12.0905172
## X15     5   2004    1  167 10.3822355 2.6684426 11.2500000 11.0864198
## X16     6   2005    1  272  8.7876838 3.1988371 10.1666667  9.5309633
## X17     7   2006    1  289  7.8194925 2.9737796  9.1666667  8.4445637
## X18     8   2007    1  345  7.0045894 2.7381850  8.2500000  7.5737064
## X19     9   2008    1  471  6.1475584 2.3707525  7.2500000  6.6366048
## X110   10   2009    1  569  5.3598418 2.0546839  6.2500000  5.7711524
## X111   11   2010    1  621  4.6298980 1.5502598  5.2500000  4.9523810
## X112   12   2011    1  745  3.6623043 1.3913272  4.2500000  3.9124791
## X113   13   2012    1  865  2.9251445 1.0015783  3.2500000  3.1108706
## X114   14   2013    1  923  2.0811665 0.7780727  2.3333333  2.2095174
## X115   15   2014    1 1173  1.2473714 0.4828535  1.3333333  1.3012957
## X116   16   2015    1 1360  0.3949755 0.2834579  0.3333333  0.3825061
##           mad        min        max      range       skew   kurtosis
## X11  0.370650 1.25000000 15.5833333 14.3333333 -1.7494770  1.3073755
## X12  0.494200 0.00000000 14.9166667 14.9166667 -1.8616069  2.0115736
## X13  0.679525 0.00000000 13.9166667 13.9166667 -1.7780638  1.6943768
## X14  0.494200 0.41666667 12.9166667 12.5000000 -2.5816628  5.3915540
## X15  0.494200 0.08333333 11.9166667 11.8333333 -2.7264030  6.4690347
## X16  0.741300 0.00000000 10.9166667 10.9166667 -1.8201064  1.7540934
## X17  0.741300 0.00000000  9.9166667  9.9166667 -1.6527736  1.1836903
## X18  0.617750 0.00000000  8.9166667  8.9166667 -1.6207744  1.0042703
## X19  0.617750 0.00000000  7.9166667  7.9166667 -1.5748017  0.9644577
## X110 0.617750 0.00000000  6.9166667  6.9166667 -1.5869224  0.9622148
## X111 0.494200 0.00000000  5.9166667  5.9166667 -1.6751411  1.5518152
## X112 0.617750 0.00000000  4.9166667  4.9166667 -1.4233457  0.6429668
## X113 0.494200 0.00000000  3.9166667  3.9166667 -1.5220534  1.2720098
## X114 0.494200 0.00000000  2.9166667  2.9166667 -1.3733708  0.8404641
## X115 0.494200 0.00000000  1.9166667  1.9166667 -0.8949719  0.2688231
## X116 0.370650 0.00000000  0.9166667  0.9166667  0.2806307 -1.0513799
##               se
## X11  1.306130708
## X12  0.588878946
## X13  0.425323909
## X14  0.259724590
## X15  0.206490291
## X16  0.193957990
## X17  0.174928209
## X18  0.147418927
## X19  0.109238509
## X110 0.086136832
## X111 0.062209782
## X112 0.050974286
## X113 0.034054685
## X114 0.025610577
## X115 0.014098287
## X116 0.007686331

## [1] 1312
##      item group1 vars   n      mean        sd    median  trimmed      mad
## X11     1   2000    1   3 5.0833333 7.9385662  0.500000 5.083333 0.000000
## X12     2   2001    1   7 7.6190476 6.3113392  5.916667 7.619048 8.030750
## X13     3   2002    1  18 8.0787037 5.8365168 10.416667 8.213542 5.065550
## X14     4   2003    1  18 8.7592593 5.1427251 11.791667 9.052083 1.359050
## X15     5   2004    1  36 6.6597222 5.1095295 10.875000 6.802778 1.420825
## X16     6   2005    1  53 5.5974843 4.5687044  6.250000 5.643411 6.301050
## X17     7   2006    1  39 5.8824786 3.8944742  8.333333 6.060606 2.100350
## X18     8   2007    1  69 4.9879227 3.4807558  5.833333 5.074561 4.200700
## X19     9   2008    1  79 4.2858650 3.0871622  4.666667 4.366667 4.077150
## X110   10   2009    1  84 3.8134921 2.4676494  4.083333 3.898284 3.459400
## X111   11   2010    1 113 3.3997050 2.2520427  4.666667 3.494505 1.729700
## X112   12   2011    1 123 2.8604336 1.8398572  3.833333 2.957071 1.482600
## X113   13   2012    1 134 2.4322139 1.2432847  3.000000 2.544753 0.988400
## X114   14   2013    1 157 1.8253715 0.9407190  2.166667 1.913386 0.741300
## X115   15   2014    1 171 1.1530214 0.5500080  1.166667 1.196472 0.617750
## X116   16   2015    1 208 0.4014423 0.2750505  0.375000 0.390873 0.308875
##             min        max      range        skew   kurtosis         se
## X11  0.50000000 14.2500000 13.7500000  0.38490018 -2.3333333 4.58333333
## X12  0.50000000 14.2500000 13.7500000  0.08691345 -2.1263827 2.38546200
## X13  0.08333333 13.9166667 13.8333333 -0.28505468 -1.8614169 1.37568020
## X14  0.00000000 12.8333333 12.8333333 -0.69232173 -1.4943969 1.21215193
## X15  0.00000000 11.9166667 11.9166667 -0.20058282 -1.9204423 0.85158825
## X16  0.00000000 10.9166667 10.9166667 -0.05316921 -1.8783755 0.62755981
## X17  0.00000000  9.8333333  9.8333333 -0.34640948 -1.7115492 0.62361496
## X18  0.00000000  8.9166667  8.9166667 -0.18655038 -1.7919717 0.41903375
## X19  0.00000000  7.9166667  7.9166667 -0.18960012 -1.7425643 0.34733288
## X110 0.00000000  6.9166667  6.9166667 -0.16388037 -1.6645718 0.26924263
## X111 0.00000000  5.9166667  5.9166667 -0.32585411 -1.6921376 0.21185436
## X112 0.00000000  4.9166667  4.9166667 -0.39881135 -1.5707762 0.16589434
## X113 0.00000000  3.9166667  3.9166667 -0.66511742 -1.0695465 0.10740344
## X114 0.00000000  2.9166667  2.9166667 -0.77654935 -0.9243973 0.07507755
## X115 0.00000000  1.9166667  1.9166667 -0.60474181 -0.6060992 0.04206016
## X116 0.00000000  0.9166667  0.9166667  0.31758379 -1.0091003 0.01907132

## [1] 3373
##      item group1 vars   n       mean        sd     median    trimmed
## X11     1   2000    1   2 15.0000000 0.0000000 15.0000000 15.0000000
## X12     2   2001    1   8  6.8750000 6.4406632  3.5833333  6.8750000
## X13     3   2002    1  10  6.9416667 5.9093003  7.5000000  6.9583333
## X14     4   2003    1  27  7.6913580 5.4204531 10.9166667  7.8804348
## X15     5   2004    1  62  8.0766129 4.3051918 10.9166667  8.5666667
## X16     6   2005    1  47  7.3989362 3.8959108  9.8333333  7.7970085
## X17     7   2006    1  93  6.1164875 3.7503852  8.7500000  6.3688889
## X18     8   2007    1 109  5.5145260 3.2126863  7.2500000  5.7106742
## X19     9   2008    1 130  5.2519231 2.8257701  7.0833333  5.5440705
## X110   10   2009    1 177  4.7655367 2.3455178  6.0833333  5.0565268
## X111   11   2010    1 224  3.8232887 1.9563605  5.0000000  4.0226852
## X112   12   2011    1 291  3.3719931 1.5115169  4.1666667  3.5625894
## X113   13   2012    1 390  2.6722222 1.1852350  3.1666667  2.8373397
## X114   14   2013    1 430  1.9443798 0.8522849  2.2500000  2.0513566
## X115   15   2014    1 604  1.2580022 0.4717761  1.3333333  1.3102617
## X116   16   2015    1 769  0.4024707 0.2940239  0.4166667  0.3918152
##           mad         min        max      range        skew   kurtosis
## X11  0.000000 15.00000000 15.0000000  0.0000000         NaN        NaN
## X12  3.521175  0.75000000 14.6666667 13.9166667  0.33550940 -2.0210282
## X13  8.833825  0.25000000 13.5000000 13.2500000 -0.01543171 -2.0216641
## X14  2.841650  0.25000000 12.9166667 12.6666667 -0.34245042 -1.8458635
## X15  1.050175  0.08333333 11.9166667 11.8333333 -0.79445584 -1.1111647
## X16  1.359050  0.00000000 10.9166667 10.9166667 -0.82307381 -0.9793109
## X17  1.359050  0.00000000  9.9166667  9.9166667 -0.45007454 -1.6265609
## X18  2.223900  0.00000000  8.9166667  8.9166667 -0.39814366 -1.5765905
## X19  1.111950  0.00000000  7.9166667  7.9166667 -0.72098320 -1.2046739
## X110 0.864850  0.00000000  6.9166667  6.9166667 -0.90994785 -0.8328925
## X111 1.111950  0.00000000  5.9166667  5.9166667 -0.67327189 -1.0686429
## X112 0.864850  0.00000000  4.9166667  4.9166667 -0.91227499 -0.6362036
## X113 0.741300  0.00000000  3.9166667  3.9166667 -1.06085297 -0.2578700
## X114 0.617750  0.00000000  2.9166667  2.9166667 -0.96710630 -0.2703407
## X115 0.370650  0.00000000  1.9166667  1.9166667 -0.89683473  0.3303978
## X116 0.370650  0.00000000  0.9166667  0.9166667  0.17197686 -1.1959179
##              se
## X11  0.00000000
## X12  2.27711832
## X13  1.86868484
## X14  1.04316668
## X15  0.54675990
## X16  0.56827699
## X17  0.38889683
## X18  0.30771954
## X19  0.24783644
## X110 0.17629992
## X111 0.13071483
## X112 0.08860669
## X113 0.06001671
## X114 0.04110083
## X115 0.01919630
## X116 0.01060277

## [1] 15810
##      item group1 vars    n       mean        sd     median    trimmed
## X11     1   2000    1   51  9.6388889 7.0359526 15.0000000 10.0772358
## X12     2   2001    1  146 10.3835616 5.5435275 14.0000000 11.0381356
## X13     3   2002    1  223  9.0164425 5.5580343 12.7500000  9.5204842
## X14     4   2003    1  374  9.2653743 4.7722213 12.0833333  9.9330556
## X15     5   2004    1  471  8.5658174 4.3142518 11.0833333  9.1657825
## X16     6   2005    1  636  8.0061583 3.8015767 10.1666667  8.5906863
## X17     7   2006    1  785  7.2599788 3.4319061  9.0833333  7.8003445
## X18     8   2007    1  897  6.4783538 3.0392023  8.1666667  6.9390357
## X19     9   2008    1 1053  5.8186135 2.6250002  7.1666667  6.2492092
## X110   10   2009    1 1144  5.0777244 2.2451841  6.1666667  5.4504185
## X111   11   2010    1 1252  4.3706736 1.8325120  5.1666667  4.6862941
## X112   12   2011    1 1486  3.5810902 1.4612352  4.2500000  3.8271008
## X113   13   2012    1 1596  2.8782895 1.0766159  3.2500000  3.0713354
## X114   14   2013    1 1812  2.0277318 0.8083840  2.2500000  2.1474713
## X115   15   2014    1 1891  1.2708444 0.4883064  1.3333333  1.3299185
## X116   16   2015    1 1993  0.4185483 0.2876103  0.4166667  0.4124347
##          mad min        max      range       skew    kurtosis          se
## X11  0.86485   0 15.6666667 15.6666667 -0.4394575 -1.81869191 0.985230436
## X12  1.11195   0 14.9166667 14.9166667 -0.8481886 -1.06931356 0.458785601
## X13  1.48260   0 13.9166667 13.9166667 -0.6571649 -1.40566244 0.372193507
## X14  0.98840   0 12.9166667 12.9166667 -1.0393957 -0.73650517 0.246765691
## X15  0.98840   0 11.9166667 11.9166667 -1.0168391 -0.74676391 0.198790231
## X16  0.86485   0 10.9166667 10.9166667 -1.1351040 -0.44886427 0.150742320
## X17  0.86485   0  9.9166667  9.9166667 -1.1799383 -0.34569199 0.122489980
## X18  0.86485   0  8.9166667  8.9166667 -1.1454111 -0.39247486 0.101476010
## X19  0.74130   0  7.9166667  7.9166667 -1.2284365 -0.15514855 0.080893784
## X110 0.74130   0  6.9166667  6.9166667 -1.2509971 -0.04121102 0.066380258
## X111 0.74130   0  5.9166667  5.9166667 -1.3247583  0.20067010 0.051789852
## X112 0.61775   0  4.9166667  4.9166667 -1.3085414  0.27475664 0.037906240
## X113 0.49420   0  3.9166667  3.9166667 -1.4506606  0.91154716 0.026949106
## X114 0.49420   0  2.9166667  2.9166667 -1.1865344  0.30935753 0.018990596
## X115 0.49420   0  1.9166667  1.9166667 -0.9562487  0.41036932 0.011229144
## X116 0.37065   0  0.9166667  0.9166667  0.1331035 -1.19023429 0.006442447

## [1] 2071
##      item group1 vars   n       mean        sd     median    trimmed
## X11     1   2000    1   1 15.0833333        NA 15.0833333 15.0833333
## X12     2   2001    1   7 12.2738095 5.3443277 14.0833333 12.2738095
## X13     3   2002    1   7 12.0833333 2.5617377 13.4166667 12.0833333
## X14     4   2003    1  23 10.4746377 3.6034000 12.1666667 11.0131579
## X15     5   2004    1  25 10.0633333 3.7141247 11.5833333 10.8293651
## X16     6   2005    1  41  8.8414634 3.3642733 10.3333333  9.5883838
## X17     7   2006    1  52  7.3573718 3.4537615  9.1666667  7.9126984
## X18     8   2007    1  72  6.6597222 2.9570395  8.1666667  7.1867816
## X19     9   2008    1  96  5.6788194 2.7185732  7.0833333  6.0448718
## X110   10   2009    1 128  5.3580729 2.0337635  6.2500000  5.7363782
## X111   11   2010    1 138  4.8272947 1.4750437  5.3333333  5.1882440
## X112   12   2011    1 209  3.5741627 1.4667849  4.1666667  3.8249507
## X113   13   2012    1 236  3.0716808 0.9138478  3.3750000  3.2614035
## X114   14   2013    1 250  2.0296667 0.7753473  2.2500000  2.1625000
## X115   15   2014    1 356  1.2673221 0.4604596  1.3333333  1.3196387
## X116   16   2015    1 430  0.4044574 0.2892289  0.4166667  0.3948643
##           mad         min        max      range       skew   kurtosis
## X11  0.000000 15.08333333 15.0833333  0.0000000         NA         NA
## X12  0.247100  0.16666667 14.6666667 14.5000000 -1.6127063  0.7816982
## X13  0.494200  7.83333333 13.9166667  6.0833333 -0.7726650 -1.4911531
## X14  0.617750  2.83333333 12.9166667 10.0833333 -1.3163970 -0.1541933
## X15  0.494200  0.08333333 11.9166667 11.8333333 -2.1218300  2.7578731
## X16  0.494200  0.41666667 10.9166667 10.5000000 -1.7067668  1.1672779
## X17  0.741300  0.00000000  9.9166667  9.9166667 -1.2322516 -0.2345104
## X18  0.617750  0.08333333  8.9166667  8.8333333 -1.4134617  0.3870985
## X19  0.988400  0.08333333  7.9166667  7.8333333 -1.1179675 -0.4647938
## X110 0.617750  0.00000000  6.9166667  6.9166667 -1.5304521  0.8081577
## X111 0.494200  0.00000000  5.9166667  5.9166667 -2.2293506  3.9125863
## X112 0.617750  0.00000000  4.9166667  4.9166667 -1.3735918  0.4969708
## X113 0.432425  0.00000000  3.9166667  3.9166667 -1.9396044  3.0473382
## X114 0.370650  0.00000000  2.9166667  2.9166667 -1.4252670  1.0198441
## X115 0.370650  0.00000000  1.9166667  1.9166667 -0.9509548  0.6532527
## X116 0.370650  0.00000000  0.9166667  0.9166667  0.1689730 -1.1606098
##              se
## X11          NA
## X12  2.01996600
## X13  0.96824584
## X14  0.75136084
## X15  0.74282494
## X16  0.52541122
## X17  0.47895055
## X18  0.34849045
## X19  0.27746322
## X110 0.17976100
## X111 0.12556406
## X112 0.10145963
## X113 0.05948642
## X114 0.04903727
## X115 0.02440431
## X116 0.01394786

## [1] 47067
##      item group1 vars    n       mean        sd     median    trimmed
## X11     1   2000    1   76 11.8026316 5.6964713 15.0833333 12.6518817
## X12     2   2001    1  227 11.6174743 4.8363106 14.0833333 12.5560109
## X13     3   2002    1  422 11.0491706 4.3201427 13.1666667 11.9733728
## X14     4   2003    1  616 10.2195617 3.9381780 12.1666667 11.0543185
## X15     5   2004    1 1135  9.5611601 3.5633420 11.1666667 10.3745875
## X16     6   2005    1 1467  8.9190525 3.0391071 10.2500000  9.6552482
## X17     7   2006    1 1780  8.0466292 2.7428164  9.2500000  8.6986774
## X18     8   2007    1 2204  7.1564958 2.5178313  8.2500000  7.7442366
## X19     9   2008    1 2647  6.3954477 2.1130324  7.2500000  6.8964921
## X110   10   2009    1 3229  5.5849850 1.8006077  6.2500000  6.0169890
## X111   11   2010    1 3808  4.6887693 1.5260534  5.2500000  5.0318788
## X112   12   2011    1 4412  3.8652539 1.2621613  4.3333333  4.1508026
## X113   13   2012    1 5117  3.0147385 0.9827431  3.3333333  3.2248067
## X114   14   2013    1 5665  2.1314063 0.7498012  2.3333333  2.2714354
## X115   15   2014    1 6681  1.2748092 0.4768585  1.3333333  1.3361397
## X116   16   2015    1 7581  0.4035308 0.2938067  0.4166667  0.3937345
##          mad min        max      range       skew   kurtosis          se
## X11  0.61775   0 15.8333333 15.8333333 -1.1779759 -0.4291945 0.653430075
## X12  0.74130   0 14.9166667 14.9166667 -1.4903066  0.5624016 0.320997206
## X13  0.74130   0 13.9166667 13.9166667 -1.6262512  1.0034717 0.210301256
## X14  0.74130   0 12.9166667 12.9166667 -1.6000670  0.9832322 0.158673713
## X15  0.61775   0 11.9166667 11.9166667 -1.7549313  1.4866455 0.105769287
## X16  0.61775   0 10.9166667 10.9166667 -1.9167504  2.2357118 0.079347079
## X17  0.61775   0  9.9166667  9.9166667 -1.8664227  2.0791928 0.065010982
## X18  0.49420   0  8.9166667  8.9166667 -1.8260935  1.8826980 0.053631610
## X19  0.61775   0  7.9166667  7.9166667 -1.9288946  2.4466829 0.041070425
## X110 0.49420   0  6.9166667  6.9166667 -2.0044366  2.7759574 0.031687288
## X111 0.49420   0  5.9166667  5.9166667 -1.8743631  2.3454865 0.024729837
## X112 0.49420   0  4.9166667  4.9166667 -1.9031351  2.5647548 0.019001904
## X113 0.49420   0  3.9166667  3.9166667 -1.8394490  2.5103369 0.013738278
## X114 0.49420   0  2.9166667  2.9166667 -1.5769186  1.6716638 0.009961991
## X115 0.37065   0  1.9166667  1.9166667 -1.0365136  0.6951526 0.005834032
## X116 0.37065   0  0.9166667  0.9166667  0.1720125 -1.2146785 0.003374415

inferenceTests(breastDF, "Breast Cancer - Inference Analysis")

## 
## Call:
## lm(formula = survivalYears ~ yearDiagnosis, data = cancerType)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8860 -0.2762  0.1405  0.5572  3.1973 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.568e+03  1.178e+01   133.0   <2e-16 ***
## yearDiagnosis -7.777e-01  5.855e-03  -132.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.212 on 6185 degrees of freedom
## Multiple R-squared:  0.7404, Adjusted R-squared:  0.7404 
## F-statistic: 1.764e+04 on 1 and 6185 DF,  p-value: < 2.2e-16

H0: There is no evidence that Breast Cancer Survival Years are improving as Diagnosis Year increases.

H1: There is sufficient evidence that Breast Cancer Survival Years are improving as Diagnosis Year increases.

inferenceTests(digothrDF, "Digestive Cancer - Inference Analysis")

## 
## Call:
## lm(formula = survivalYears ~ yearDiagnosis, data = cancerType)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6980 -0.9995 -0.0329  1.0674  7.0685 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   940.81931   25.60226   36.75   <2e-16 ***
## yearDiagnosis  -0.46656    0.01273  -36.66   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.071 on 2749 degrees of freedom
## Multiple R-squared:  0.3284, Adjusted R-squared:  0.3281 
## F-statistic:  1344 on 1 and 2749 DF,  p-value: < 2.2e-16

H0: There is no evidence that Digestive Cancer Survival Years are improving as Diagnosis Year increases.

H1: There is sufficient evidence that Digestive Cancer Survival Years are improving as Diagnosis Year increases.

inferenceTests(malegenDF, "Male Genital Cancer - Inference Analysis")

## 
## Call:
## lm(formula = survivalYears ~ yearDiagnosis, data = cancerType)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.7591  -0.1415   0.3572   0.7752   2.2832 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.762e+03  9.303e+00   189.4   <2e-16 ***
## yearDiagnosis -8.743e-01  4.626e-03  -189.0   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.608 on 10005 degrees of freedom
## Multiple R-squared:  0.7812, Adjusted R-squared:  0.7811 
## F-statistic: 3.572e+04 on 1 and 10005 DF,  p-value: < 2.2e-16

H0: There is no evidence that Male Genital Cancer Survival Years are improving as Diagnosis Year increases.

H1: There is sufficient evidence that Male Genital Cancer Survival Years are improving as Diagnosis Year increases.

inferenceTests(femgenDF, "Female Genital Cancer - Inference Analysis")

## 
## Call:
## lm(formula = survivalYears ~ yearDiagnosis, data = cancerType)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.2799  -0.1660   0.3601   0.8415   2.6368 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.717e+03  1.084e+01   158.5   <2e-16 ***
## yearDiagnosis -8.519e-01  5.388e-03  -158.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.682 on 8092 degrees of freedom
## Multiple R-squared:  0.7555, Adjusted R-squared:  0.7555 
## F-statistic: 2.5e+04 on 1 and 8092 DF,  p-value: < 2.2e-16

H0: There is no evidence that Female Genital Cancer Survival Years are improving as Diagnosis Year increases.

H1: There is sufficient evidence that Female Genital Cancer Survival Years are improving as Diagnosis Year increases.