Project Description:

Project Goal:

Data Sources

R Libraries:

Load necessary libraries -

library(RODBC)
library(dplyr)
library(stringr)
library(ggplot2)
library(plotly)
library(kableExtra)
library(data.table)
library(knitr)
library(psych)
library(tidyr)
library(scales)
library(maps)
library(mapdata)

Below are the steps followed to perform database migration -

  1. Establish MySQL DB Connection:

I have used RODBC package and an ODBC data source called ‘MySQL_SEERS_Analysis’ in order to connect to the database and retrieve tables into respective data frames.

con <- odbcConnect("MySQL_SEERS_Analysis")

Data Sets:

A. SEERS Cancer Patients Master:

seerMasterDF <- as.data.frame(sqlFetch(con,"Cancer_Patients_Master"),stringsAsFactors = FALSE)

### Derive Year attributes 
seerMasterDF <- seerMasterDF %>% mutate(survivalYears = survivalMonths/12, currentYear = survivalYears + yearDiagnosis) %>% subset(currentYear < 2016 & birthYear > 1979 & ageDiagnosis > 19 & survivalMonths < 9999)

head(seerMasterDF) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="300px")
SEERDiagnosticUnit personID locality maritalStatus race derivedHispanicOrigin sex ageDiagnosis birthYear sequenceNumber monthDiagnosis yearDiagnosis primarySite laterality histology behavior histologicType behaviorCode grade diagnosticConfirmation reportingSourceType survivalMonths survivalYears currentYear
1598 Breast 54027867 Rural Georgia married Black Not Latino F 28 1980 0 12 2008 C509 Left 8500 3 8500 Malignant III Positive histology Hospital inpatient 0 0.0000000 2008.000
1770 Breast 54030257 Rural Georgia single Black Not Latino F 29 1980 2 12 2009 C504 Right 8500 3 8500 Malignant III Positive histology Hospital inpatient 24 2.0000000 2011.000
2100 Breast 54033947 Rural Georgia single Black Not Latino F 26 1987 0 3 2013 C508 Right 8500 3 8500 Malignant III Positive histology Hospital inpatient 33 2.7500000 2015.750
2252 Breast 54035851 Rural Georgia married White Not Latino F 32 1981 0 4 2014 C502 Right 8500 3 8500 Malignant II Positive histology Hospital inpatient 20 1.6666667 2015.667
2429 Breast 54038191 Rural Georgia divorced White Not Latino F 35 1980 0 10 2015 C508 Left 8500 3 8500 Malignant III Positive histology Hospital inpatient 2 0.1666667 2015.167
2440 Breast 54038338 Rural Georgia single Black Not Latino F 30 1984 0 11 2015 C508 Right 8500 3 8500 Malignant III Positive histology Hospital inpatient 1 0.0833333 2015.083
### Create individual data frames based on cancer types

breastDF <- seerMasterDF %>% subset(SEERDiagnosticUnit == "Breast")
digothrDF <- seerMasterDF %>% subset(SEERDiagnosticUnit == "Digothr")
malegenDF <- seerMasterDF %>% subset(SEERDiagnosticUnit == "Malegen")
femgenDF <- seerMasterDF %>% subset(SEERDiagnosticUnit == "Femgen")
respirDF <- seerMasterDF %>% subset(SEERDiagnosticUnit == "Respir")
colrectDF <- seerMasterDF %>% subset(SEERDiagnosticUnit == "Colrect")
lymyleukDF <- seerMasterDF %>% subset(SEERDiagnosticUnit == "Lymyleuk")
urinaryDF <- seerMasterDF %>% subset(SEERDiagnosticUnit == "Urinary")
otherDF <- seerMasterDF %>% subset(SEERDiagnosticUnit == "Other")

Project Function Definitions

plot_ss <- function(x, y, maintitle, showSquares = FALSE, leastSquares = FALSE){
  plot(x,y,xlab="Diagnosis Year", ylab = "Survival Years", main = maintitle)

  if(leastSquares){
    m1 <- lm(y~x)
    y.hat <- m1$fit
  } else{
    pt1 <- locator(1)
    points(pt1$x, pt1$y, pch = 4)
    pt2 <- locator(1)
    points(pt2$x, pt2$y, pch = 4)
    pts <- data.frame("x" = c(pt1$x, pt2$x),"y" = c(pt1$y, pt2$y))
    m1 <- lm(y ~ x, data = pts)
    y.hat <- predict(m1, newdata = data.frame(x))
  }
  r <- y - y.hat
  abline(m1)

  oSide <- x - r
  LLim <- par()$usr[1]
  RLim <- par()$usr[2]
  oSide[oSide < LLim | oSide > RLim] <- c(x + r)[oSide < LLim | oSide > RLim] # move boxes to avoid margins

  n <- length(y.hat)
  for(i in 1:n){
    lines(rep(x[i], 2), c(y[i], y.hat[i]), lty = 2, col = "blue")
    if(showSquares){
    lines(rep(oSide[i], 2), c(y[i], y.hat[i]), lty = 3, col = "orange")
    lines(c(oSide[i], x[i]), rep(y.hat[i],2), lty = 3, col = "orange")
    lines(c(oSide[i], x[i]), rep(y[i],2), lty = 3, col = "orange")
    }
  }

}

summaryTable <- function(cancerType,maintitle = " test"){
  survivalYears = cancerType$survivalYears
  yearDiagnosis = cancerType$yearDiagnosis
  meanTable <- tapply(survivalYears,yearDiagnosis,mean)
  show(nrow(cancerType))
  show(describeBy(survivalYears, group = yearDiagnosis, mat=TRUE))
  barplot(meanTable,beside=T,col=c("#ee7700","#3333ff")
    ,main=maintitle,xlab="Diagnosis Year",ylab="Survival Years")
}


inferenceTests <- function(cancerType, maintitle = "") {
  yearDiagnosis <- cancerType$yearDiagnosis
  survivalYears <- cancerType$survivalYears
  plot_ss(x = yearDiagnosis, y = survivalYears, maintitle, showSquares = FALSE)
  m2 <- lm(survivalYears ~ yearDiagnosis, data = cancerType)
  summary(m2)
}

inferenceTest0 <- function(cancerType) {
  m2 <- lm(survivalYears ~ yearDiagnosis, data = cancerType)
  hist(m2$residuals)
  qqnorm(m2$residuals)
  qqline(m2$residuals)  
}
## [1] 6187
##      item group1 vars    n       mean        sd     median    trimmed
## X11     1   2000    1    1  4.8333333        NA  4.8333333  4.8333333
## X12     2   2001    1    4 11.2500000 6.4481551 14.3750000 11.2500000
## X13     3   2002    1    8  8.7187500 4.0362783  8.1666667  8.7187500
## X14     4   2003    1   25  8.4800000 4.8088629 11.7500000  8.8015873
## X15     5   2004    1   36  9.0000000 3.7311431 11.1250000  9.4750000
## X16     6   2005    1   52  7.4535256 4.0209965 10.0833333  7.9047619
## X17     7   2006    1  110  7.3901515 2.9913427  9.0833333  7.8399621
## X18     8   2007    1  167  6.7085828 2.5418827  8.0833333  7.1432099
## X19     9   2008    1  213  6.1799687 2.1934915  7.1666667  6.6033138
## X110   10   2009    1  294  5.2814626 1.9273094  6.1250000  5.6295904
## X111   11   2010    1  380  4.6989035 1.4272917  5.1666667  5.0052083
## X112   12   2011    1  542  3.8831488 1.0787816  4.2500000  4.1027266
## X113   13   2012    1  742  3.0940027 0.7670125  3.2500000  3.2330247
## X114   14   2013    1  922  2.1900759 0.6302859  2.3333333  2.3020551
## X115   15   2014    1 1204  1.3306340 0.4141566  1.3333333  1.3723202
## X116   16   2015    1 1487  0.4263058 0.2864006  0.4166667  0.4208648
##           mad       min        max      range        skew   kurtosis
## X11  0.000000 4.8333333  4.8333333  0.0000000          NA         NA
## X12  0.370650 1.5833333 14.6666667 13.0833333 -0.74743079 -1.6892610
## X13  5.992175 3.3333333 13.6666667 10.3333333  0.02933688 -1.8303679
## X14  1.606150 0.6666667 12.9166667 12.2500000 -0.57176243 -1.5495128
## X15  0.926625 1.1666667 11.9166667 10.7500000 -1.00612693 -0.6902704
## X16  1.111950 0.0000000 10.9166667 10.9166667 -0.72250143 -1.2674157
## X17  0.988400 0.4166667  9.9166667  9.5000000 -1.07570346 -0.4758606
## X18  0.741300 0.0000000  8.9166667  8.9166667 -1.27574746  0.2345301
## X19  0.741300 0.0000000  7.9166667  7.9166667 -1.49460194  0.8460625
## X110 0.864850 0.0000000  6.9166667  6.9166667 -1.37294026  0.5221796
## X111 0.617750 0.0000000  5.9166667  5.9166667 -1.79145247  2.1844618
## X112 0.494200 0.0000000  4.9166667  4.9166667 -1.82058148  2.6078321
## X113 0.494200 0.0000000  3.9166667  3.9166667 -1.82604429  3.3642527
## X114 0.370650 0.0000000  2.9166667  2.9166667 -1.67463537  2.6260787
## X115 0.370650 0.0000000  1.9166667  1.9166667 -0.97996611  1.2579829
## X116 0.370650 0.0000000  0.9166667  0.9166667  0.12436243 -1.1749750
##               se
## X11           NA
## X12  3.224077531
## X13  1.427039885
## X14  0.961772573
## X15  0.621857177
## X16  0.557611894
## X17  0.285213333
## X18  0.196696787
## X19  0.150295562
## X110 0.112402967
## X111 0.073218520
## X112 0.046337637
## X113 0.028157915
## X114 0.020757359
## X115 0.011935794
## X116 0.007427086

## [1] 2751
##      item group1 vars   n      mean        sd    median   trimmed      mad
## X11     1   2000    1   1 0.0000000        NA 0.0000000 0.0000000 0.000000
## X12     2   2001    1   4 7.0416667 7.0120597 6.7083333 7.0416667 8.401400
## X13     3   2002    1  25 6.5033333 6.0540811 2.5000000 6.4206349 3.335850
## X14     4   2003    1  18 3.3796296 5.0072025 0.9583333 2.9947917 1.050175
## X15     5   2004    1  40 5.0479167 4.9439842 2.0416667 4.8463542 2.903425
## X16     6   2005    1  54 4.3503086 4.2719522 1.7083333 4.1174242 2.285675
## X17     7   2006    1  74 5.0078829 4.0398078 4.7083333 5.0291667 6.486375
## X18     8   2007    1 112 4.7328869 3.6154133 4.4583333 4.8046296 5.745075
## X19     9   2008    1 127 4.2749344 3.0771301 4.7500000 4.3495146 4.077150
## X110   10   2009    1 161 3.8995859 2.7140344 5.2500000 4.0109819 2.223900
## X111   11   2010    1 195 3.3089744 2.2985747 4.3333333 3.4007431 2.223900
## X112   12   2011    1 240 2.8833333 1.7975589 3.9166667 2.9913194 1.235500
## X113   13   2012    1 336 2.2418155 1.3598404 2.9583333 2.3120370 1.173725
## X114   14   2013    1 357 1.6979458 0.9852583 2.0833333 1.7572590 0.864850
## X115   15   2014    1 482 1.0937068 0.5643574 1.1666667 1.1247841 0.617750
## X116   16   2015    1 525 0.3979365 0.2896714 0.3333333 0.3873713 0.370650
##             min        max      range        skew   kurtosis         se
## X11  0.00000000  0.0000000  0.0000000          NA         NA         NA
## X12  0.50000000 14.2500000 13.7500000  0.03045457 -2.3851378 3.50602986
## X13  0.00000000 13.8333333 13.8333333  0.21167918 -1.9628543 1.21081622
## X14  0.08333333 12.8333333 12.7500000  1.18568524 -0.5526444 1.18020895
## X15  0.00000000 11.8333333 11.8333333  0.33422363 -1.7918197 0.78171254
## X16  0.00000000 10.9166667 10.9166667  0.42874584 -1.6643959 0.58133906
## X17  0.00000000  9.8333333  9.8333333 -0.00374778 -1.7948115 0.46961812
## X18  0.00000000  8.9166667  8.9166667 -0.07806678 -1.8345818 0.34162445
## X19  0.00000000  7.9166667  7.9166667 -0.15853819 -1.7474296 0.27305114
## X110 0.00000000  6.9166667  6.9166667 -0.33537151 -1.6899573 0.21389587
## X111 0.00000000  5.9166667  5.9166667 -0.30473273 -1.6849115 0.16460435
## X112 0.00000000  4.9166667  4.9166667 -0.45477198 -1.5115634 0.11603193
## X113 0.00000000  3.9166667  3.9166667 -0.40991629 -1.4414820 0.07418537
## X114 0.00000000  2.9166667  2.9166667 -0.53315576 -1.2703405 0.05214540
## X115 0.00000000  1.9166667  1.9166667 -0.47500423 -0.8386924 0.02570577
## X116 0.00000000  0.9166667  0.9166667  0.21437922 -1.2163451 0.01264230

## [1] 10007
##      item group1 vars    n       mean        sd     median    trimmed
## X11     1   2000    1   15 14.9222222 0.5321664 15.0833333 14.9487179
## X12     2   2001    1   63 13.0436508 3.4531509 14.2500000 13.9967320
## X13     3   2002    1  122 11.8422131 3.7454565 13.1666667 12.9030612
## X14     4   2003    1  174 10.9631226 3.3284823 12.1666667 11.8619048
## X15     5   2004    1  259  9.9350064 3.3234692 11.2500000 10.8197767
## X16     6   2005    1  332  8.9879518 3.0375609 10.2500000  9.7453008
## X17     7   2006    1  414  8.1050725 2.6939117  9.1666667  8.7735944
## X18     8   2007    1  548  7.2718978 2.4084870  8.2500000  7.8532197
## X19     9   2008    1  614  6.5605320 1.9686920  7.2500000  7.0628388
## X110   10   2009    1  754  5.7393899 1.5529718  6.2500000  6.1207230
## X111   11   2010    1  863  4.7932599 1.4422518  5.2500000  5.1431500
## X112   12   2011    1  929  3.9175637 1.1610983  4.3333333  4.1814318
## X113   13   2012    1 1050  3.0260317 0.9452198  3.3333333  3.2305556
## X114   14   2013    1 1197  2.1032442 0.7801287  2.3333333  2.2367918
## X115   15   2014    1 1341  1.2644171 0.4978179  1.3333333  1.3258776
## X116   16   2015    1 1332  0.3999625 0.2912436  0.3333333  0.3892276
##          mad       min        max      range       skew   kurtosis
## X11  0.61775 13.916667 15.5833333  1.6666667 -0.4882215 -1.1731810
## X12  0.37065  1.166667 14.9166667 13.7500000 -2.5402678  5.0939138
## X13  0.61775  0.000000 13.9166667 13.9166667 -2.4144538  4.3800296
## X14  0.74130  0.000000 12.9166667 12.9166667 -2.2863806  3.8959040
## X15  0.49420  0.000000 11.9166667 11.9166667 -2.2208680  3.3923638
## X16  0.61775  0.000000 10.9166667 10.9166667 -1.9733311  2.3827282
## X17  0.74130  0.000000  9.9166667  9.9166667 -1.9963119  2.6150102
## X18  0.49420  0.000000  8.9166667  8.9166667 -1.9611846  2.4359881
## X19  0.61775  0.000000  7.9166667  7.9166667 -2.1699348  3.4885641
## X110 0.49420  0.000000  6.9166667  6.9166667 -2.4212024  5.1671078
## X111 0.49420  0.000000  5.9166667  5.9166667 -2.1380289  3.5604130
## X112 0.49420  0.000000  4.9166667  4.9166667 -2.0345731  3.3480521
## X113 0.49420  0.000000  3.9166667  3.9166667 -1.9333999  3.0314334
## X114 0.49420  0.000000  2.9166667  2.9166667 -1.4130029  1.0086906
## X115 0.49420  0.000000  1.9166667  1.9166667 -0.9610746  0.3508472
## X116 0.37065  0.000000  0.9166667  0.9166667  0.2137514 -1.1854673
##               se
## X11  0.137404769
## X12  0.435056120
## X13  0.339097698
## X14  0.252331591
## X15  0.206510325
## X16  0.166707812
## X17  0.132398536
## X18  0.102885464
## X19  0.079449942
## X110 0.056555897
## X111 0.049094821
## X112 0.038094401
## X113 0.029170116
## X114 0.022548579
## X115 0.013594277
## X116 0.007980024

## [1] 8094
##      item group1 vars    n       mean        sd     median    trimmed
## X11     1   2000    1   13 13.2820513 4.7093212 15.2500000 14.1666667
## X12     2   2001    1   47 12.3581560 4.0371507 14.1666667 13.1837607
## X13     3   2002    1   90 11.2657407 4.0349769 13.0416667 12.2013889
## X14     4   2003    1  144 11.2216435 3.1166951 12.3333333 12.0905172
## X15     5   2004    1  167 10.3822355 2.6684426 11.2500000 11.0864198
## X16     6   2005    1  272  8.7876838 3.1988371 10.1666667  9.5309633
## X17     7   2006    1  289  7.8194925 2.9737796  9.1666667  8.4445637
## X18     8   2007    1  345  7.0045894 2.7381850  8.2500000  7.5737064
## X19     9   2008    1  471  6.1475584 2.3707525  7.2500000  6.6366048
## X110   10   2009    1  569  5.3598418 2.0546839  6.2500000  5.7711524
## X111   11   2010    1  621  4.6298980 1.5502598  5.2500000  4.9523810
## X112   12   2011    1  745  3.6623043 1.3913272  4.2500000  3.9124791
## X113   13   2012    1  865  2.9251445 1.0015783  3.2500000  3.1108706
## X114   14   2013    1  923  2.0811665 0.7780727  2.3333333  2.2095174
## X115   15   2014    1 1173  1.2473714 0.4828535  1.3333333  1.3012957
## X116   16   2015    1 1360  0.3949755 0.2834579  0.3333333  0.3825061
##           mad        min        max      range       skew   kurtosis
## X11  0.370650 1.25000000 15.5833333 14.3333333 -1.7494770  1.3073755
## X12  0.494200 0.00000000 14.9166667 14.9166667 -1.8616069  2.0115736
## X13  0.679525 0.00000000 13.9166667 13.9166667 -1.7780638  1.6943768
## X14  0.494200 0.41666667 12.9166667 12.5000000 -2.5816628  5.3915540
## X15  0.494200 0.08333333 11.9166667 11.8333333 -2.7264030  6.4690347
## X16  0.741300 0.00000000 10.9166667 10.9166667 -1.8201064  1.7540934
## X17  0.741300 0.00000000  9.9166667  9.9166667 -1.6527736  1.1836903
## X18  0.617750 0.00000000  8.9166667  8.9166667 -1.6207744  1.0042703
## X19  0.617750 0.00000000  7.9166667  7.9166667 -1.5748017  0.9644577
## X110 0.617750 0.00000000  6.9166667  6.9166667 -1.5869224  0.9622148
## X111 0.494200 0.00000000  5.9166667  5.9166667 -1.6751411  1.5518152
## X112 0.617750 0.00000000  4.9166667  4.9166667 -1.4233457  0.6429668
## X113 0.494200 0.00000000  3.9166667  3.9166667 -1.5220534  1.2720098
## X114 0.494200 0.00000000  2.9166667  2.9166667 -1.3733708  0.8404641
## X115 0.494200 0.00000000  1.9166667  1.9166667 -0.8949719  0.2688231
## X116 0.370650 0.00000000  0.9166667  0.9166667  0.2806307 -1.0513799
##               se
## X11  1.306130708
## X12  0.588878946
## X13  0.425323909
## X14  0.259724590
## X15  0.206490291
## X16  0.193957990
## X17  0.174928209
## X18  0.147418927
## X19  0.109238509
## X110 0.086136832
## X111 0.062209782
## X112 0.050974286
## X113 0.034054685
## X114 0.025610577
## X115 0.014098287
## X116 0.007686331

## [1] 1312
##      item group1 vars   n      mean        sd    median  trimmed      mad
## X11     1   2000    1   3 5.0833333 7.9385662  0.500000 5.083333 0.000000
## X12     2   2001    1   7 7.6190476 6.3113392  5.916667 7.619048 8.030750
## X13     3   2002    1  18 8.0787037 5.8365168 10.416667 8.213542 5.065550
## X14     4   2003    1  18 8.7592593 5.1427251 11.791667 9.052083 1.359050
## X15     5   2004    1  36 6.6597222 5.1095295 10.875000 6.802778 1.420825
## X16     6   2005    1  53 5.5974843 4.5687044  6.250000 5.643411 6.301050
## X17     7   2006    1  39 5.8824786 3.8944742  8.333333 6.060606 2.100350
## X18     8   2007    1  69 4.9879227 3.4807558  5.833333 5.074561 4.200700
## X19     9   2008    1  79 4.2858650 3.0871622  4.666667 4.366667 4.077150
## X110   10   2009    1  84 3.8134921 2.4676494  4.083333 3.898284 3.459400
## X111   11   2010    1 113 3.3997050 2.2520427  4.666667 3.494505 1.729700
## X112   12   2011    1 123 2.8604336 1.8398572  3.833333 2.957071 1.482600
## X113   13   2012    1 134 2.4322139 1.2432847  3.000000 2.544753 0.988400
## X114   14   2013    1 157 1.8253715 0.9407190  2.166667 1.913386 0.741300
## X115   15   2014    1 171 1.1530214 0.5500080  1.166667 1.196472 0.617750
## X116   16   2015    1 208 0.4014423 0.2750505  0.375000 0.390873 0.308875
##             min        max      range        skew   kurtosis         se
## X11  0.50000000 14.2500000 13.7500000  0.38490018 -2.3333333 4.58333333
## X12  0.50000000 14.2500000 13.7500000  0.08691345 -2.1263827 2.38546200
## X13  0.08333333 13.9166667 13.8333333 -0.28505468 -1.8614169 1.37568020
## X14  0.00000000 12.8333333 12.8333333 -0.69232173 -1.4943969 1.21215193
## X15  0.00000000 11.9166667 11.9166667 -0.20058282 -1.9204423 0.85158825
## X16  0.00000000 10.9166667 10.9166667 -0.05316921 -1.8783755 0.62755981
## X17  0.00000000  9.8333333  9.8333333 -0.34640948 -1.7115492 0.62361496
## X18  0.00000000  8.9166667  8.9166667 -0.18655038 -1.7919717 0.41903375
## X19  0.00000000  7.9166667  7.9166667 -0.18960012 -1.7425643 0.34733288
## X110 0.00000000  6.9166667  6.9166667 -0.16388037 -1.6645718 0.26924263
## X111 0.00000000  5.9166667  5.9166667 -0.32585411 -1.6921376 0.21185436
## X112 0.00000000  4.9166667  4.9166667 -0.39881135 -1.5707762 0.16589434
## X113 0.00000000  3.9166667  3.9166667 -0.66511742 -1.0695465 0.10740344
## X114 0.00000000  2.9166667  2.9166667 -0.77654935 -0.9243973 0.07507755
## X115 0.00000000  1.9166667  1.9166667 -0.60474181 -0.6060992 0.04206016
## X116 0.00000000  0.9166667  0.9166667  0.31758379 -1.0091003 0.01907132

## [1] 3373
##      item group1 vars   n       mean        sd     median    trimmed
## X11     1   2000    1   2 15.0000000 0.0000000 15.0000000 15.0000000
## X12     2   2001    1   8  6.8750000 6.4406632  3.5833333  6.8750000
## X13     3   2002    1  10  6.9416667 5.9093003  7.5000000  6.9583333
## X14     4   2003    1  27  7.6913580 5.4204531 10.9166667  7.8804348
## X15     5   2004    1  62  8.0766129 4.3051918 10.9166667  8.5666667
## X16     6   2005    1  47  7.3989362 3.8959108  9.8333333  7.7970085
## X17     7   2006    1  93  6.1164875 3.7503852  8.7500000  6.3688889
## X18     8   2007    1 109  5.5145260 3.2126863  7.2500000  5.7106742
## X19     9   2008    1 130  5.2519231 2.8257701  7.0833333  5.5440705
## X110   10   2009    1 177  4.7655367 2.3455178  6.0833333  5.0565268
## X111   11   2010    1 224  3.8232887 1.9563605  5.0000000  4.0226852
## X112   12   2011    1 291  3.3719931 1.5115169  4.1666667  3.5625894
## X113   13   2012    1 390  2.6722222 1.1852350  3.1666667  2.8373397
## X114   14   2013    1 430  1.9443798 0.8522849  2.2500000  2.0513566
## X115   15   2014    1 604  1.2580022 0.4717761  1.3333333  1.3102617
## X116   16   2015    1 769  0.4024707 0.2940239  0.4166667  0.3918152
##           mad         min        max      range        skew   kurtosis
## X11  0.000000 15.00000000 15.0000000  0.0000000         NaN        NaN
## X12  3.521175  0.75000000 14.6666667 13.9166667  0.33550940 -2.0210282
## X13  8.833825  0.25000000 13.5000000 13.2500000 -0.01543171 -2.0216641
## X14  2.841650  0.25000000 12.9166667 12.6666667 -0.34245042 -1.8458635
## X15  1.050175  0.08333333 11.9166667 11.8333333 -0.79445584 -1.1111647
## X16  1.359050  0.00000000 10.9166667 10.9166667 -0.82307381 -0.9793109
## X17  1.359050  0.00000000  9.9166667  9.9166667 -0.45007454 -1.6265609
## X18  2.223900  0.00000000  8.9166667  8.9166667 -0.39814366 -1.5765905
## X19  1.111950  0.00000000  7.9166667  7.9166667 -0.72098320 -1.2046739
## X110 0.864850  0.00000000  6.9166667  6.9166667 -0.90994785 -0.8328925
## X111 1.111950  0.00000000  5.9166667  5.9166667 -0.67327189 -1.0686429
## X112 0.864850  0.00000000  4.9166667  4.9166667 -0.91227499 -0.6362036
## X113 0.741300  0.00000000  3.9166667  3.9166667 -1.06085297 -0.2578700
## X114 0.617750  0.00000000  2.9166667  2.9166667 -0.96710630 -0.2703407
## X115 0.370650  0.00000000  1.9166667  1.9166667 -0.89683473  0.3303978
## X116 0.370650  0.00000000  0.9166667  0.9166667  0.17197686 -1.1959179
##              se
## X11  0.00000000
## X12  2.27711832
## X13  1.86868484
## X14  1.04316668
## X15  0.54675990
## X16  0.56827699
## X17  0.38889683
## X18  0.30771954
## X19  0.24783644
## X110 0.17629992
## X111 0.13071483
## X112 0.08860669
## X113 0.06001671
## X114 0.04110083
## X115 0.01919630
## X116 0.01060277

## [1] 15810
##      item group1 vars    n       mean        sd     median    trimmed
## X11     1   2000    1   51  9.6388889 7.0359526 15.0000000 10.0772358
## X12     2   2001    1  146 10.3835616 5.5435275 14.0000000 11.0381356
## X13     3   2002    1  223  9.0164425 5.5580343 12.7500000  9.5204842
## X14     4   2003    1  374  9.2653743 4.7722213 12.0833333  9.9330556
## X15     5   2004    1  471  8.5658174 4.3142518 11.0833333  9.1657825
## X16     6   2005    1  636  8.0061583 3.8015767 10.1666667  8.5906863
## X17     7   2006    1  785  7.2599788 3.4319061  9.0833333  7.8003445
## X18     8   2007    1  897  6.4783538 3.0392023  8.1666667  6.9390357
## X19     9   2008    1 1053  5.8186135 2.6250002  7.1666667  6.2492092
## X110   10   2009    1 1144  5.0777244 2.2451841  6.1666667  5.4504185
## X111   11   2010    1 1252  4.3706736 1.8325120  5.1666667  4.6862941
## X112   12   2011    1 1486  3.5810902 1.4612352  4.2500000  3.8271008
## X113   13   2012    1 1596  2.8782895 1.0766159  3.2500000  3.0713354
## X114   14   2013    1 1812  2.0277318 0.8083840  2.2500000  2.1474713
## X115   15   2014    1 1891  1.2708444 0.4883064  1.3333333  1.3299185
## X116   16   2015    1 1993  0.4185483 0.2876103  0.4166667  0.4124347
##          mad min        max      range       skew    kurtosis          se
## X11  0.86485   0 15.6666667 15.6666667 -0.4394575 -1.81869191 0.985230436
## X12  1.11195   0 14.9166667 14.9166667 -0.8481886 -1.06931356 0.458785601
## X13  1.48260   0 13.9166667 13.9166667 -0.6571649 -1.40566244 0.372193507
## X14  0.98840   0 12.9166667 12.9166667 -1.0393957 -0.73650517 0.246765691
## X15  0.98840   0 11.9166667 11.9166667 -1.0168391 -0.74676391 0.198790231
## X16  0.86485   0 10.9166667 10.9166667 -1.1351040 -0.44886427 0.150742320
## X17  0.86485   0  9.9166667  9.9166667 -1.1799383 -0.34569199 0.122489980
## X18  0.86485   0  8.9166667  8.9166667 -1.1454111 -0.39247486 0.101476010
## X19  0.74130   0  7.9166667  7.9166667 -1.2284365 -0.15514855 0.080893784
## X110 0.74130   0  6.9166667  6.9166667 -1.2509971 -0.04121102 0.066380258
## X111 0.74130   0  5.9166667  5.9166667 -1.3247583  0.20067010 0.051789852
## X112 0.61775   0  4.9166667  4.9166667 -1.3085414  0.27475664 0.037906240
## X113 0.49420   0  3.9166667  3.9166667 -1.4506606  0.91154716 0.026949106
## X114 0.49420   0  2.9166667  2.9166667 -1.1865344  0.30935753 0.018990596
## X115 0.49420   0  1.9166667  1.9166667 -0.9562487  0.41036932 0.011229144
## X116 0.37065   0  0.9166667  0.9166667  0.1331035 -1.19023429 0.006442447

## [1] 2071
##      item group1 vars   n       mean        sd     median    trimmed
## X11     1   2000    1   1 15.0833333        NA 15.0833333 15.0833333
## X12     2   2001    1   7 12.2738095 5.3443277 14.0833333 12.2738095
## X13     3   2002    1   7 12.0833333 2.5617377 13.4166667 12.0833333
## X14     4   2003    1  23 10.4746377 3.6034000 12.1666667 11.0131579
## X15     5   2004    1  25 10.0633333 3.7141247 11.5833333 10.8293651
## X16     6   2005    1  41  8.8414634 3.3642733 10.3333333  9.5883838
## X17     7   2006    1  52  7.3573718 3.4537615  9.1666667  7.9126984
## X18     8   2007    1  72  6.6597222 2.9570395  8.1666667  7.1867816
## X19     9   2008    1  96  5.6788194 2.7185732  7.0833333  6.0448718
## X110   10   2009    1 128  5.3580729 2.0337635  6.2500000  5.7363782
## X111   11   2010    1 138  4.8272947 1.4750437  5.3333333  5.1882440
## X112   12   2011    1 209  3.5741627 1.4667849  4.1666667  3.8249507
## X113   13   2012    1 236  3.0716808 0.9138478  3.3750000  3.2614035
## X114   14   2013    1 250  2.0296667 0.7753473  2.2500000  2.1625000
## X115   15   2014    1 356  1.2673221 0.4604596  1.3333333  1.3196387
## X116   16   2015    1 430  0.4044574 0.2892289  0.4166667  0.3948643
##           mad         min        max      range       skew   kurtosis
## X11  0.000000 15.08333333 15.0833333  0.0000000         NA         NA
## X12  0.247100  0.16666667 14.6666667 14.5000000 -1.6127063  0.7816982
## X13  0.494200  7.83333333 13.9166667  6.0833333 -0.7726650 -1.4911531
## X14  0.617750  2.83333333 12.9166667 10.0833333 -1.3163970 -0.1541933
## X15  0.494200  0.08333333 11.9166667 11.8333333 -2.1218300  2.7578731
## X16  0.494200  0.41666667 10.9166667 10.5000000 -1.7067668  1.1672779
## X17  0.741300  0.00000000  9.9166667  9.9166667 -1.2322516 -0.2345104
## X18  0.617750  0.08333333  8.9166667  8.8333333 -1.4134617  0.3870985
## X19  0.988400  0.08333333  7.9166667  7.8333333 -1.1179675 -0.4647938
## X110 0.617750  0.00000000  6.9166667  6.9166667 -1.5304521  0.8081577
## X111 0.494200  0.00000000  5.9166667  5.9166667 -2.2293506  3.9125863
## X112 0.617750  0.00000000  4.9166667  4.9166667 -1.3735918  0.4969708
## X113 0.432425  0.00000000  3.9166667  3.9166667 -1.9396044  3.0473382
## X114 0.370650  0.00000000  2.9166667  2.9166667 -1.4252670  1.0198441
## X115 0.370650  0.00000000  1.9166667  1.9166667 -0.9509548  0.6532527
## X116 0.370650  0.00000000  0.9166667  0.9166667  0.1689730 -1.1606098
##              se
## X11          NA
## X12  2.01996600
## X13  0.96824584
## X14  0.75136084
## X15  0.74282494
## X16  0.52541122
## X17  0.47895055
## X18  0.34849045
## X19  0.27746322
## X110 0.17976100
## X111 0.12556406
## X112 0.10145963
## X113 0.05948642
## X114 0.04903727
## X115 0.02440431
## X116 0.01394786

## [1] 47067
##      item group1 vars    n       mean        sd     median    trimmed
## X11     1   2000    1   76 11.8026316 5.6964713 15.0833333 12.6518817
## X12     2   2001    1  227 11.6174743 4.8363106 14.0833333 12.5560109
## X13     3   2002    1  422 11.0491706 4.3201427 13.1666667 11.9733728
## X14     4   2003    1  616 10.2195617 3.9381780 12.1666667 11.0543185
## X15     5   2004    1 1135  9.5611601 3.5633420 11.1666667 10.3745875
## X16     6   2005    1 1467  8.9190525 3.0391071 10.2500000  9.6552482
## X17     7   2006    1 1780  8.0466292 2.7428164  9.2500000  8.6986774
## X18     8   2007    1 2204  7.1564958 2.5178313  8.2500000  7.7442366
## X19     9   2008    1 2647  6.3954477 2.1130324  7.2500000  6.8964921
## X110   10   2009    1 3229  5.5849850 1.8006077  6.2500000  6.0169890
## X111   11   2010    1 3808  4.6887693 1.5260534  5.2500000  5.0318788
## X112   12   2011    1 4412  3.8652539 1.2621613  4.3333333  4.1508026
## X113   13   2012    1 5117  3.0147385 0.9827431  3.3333333  3.2248067
## X114   14   2013    1 5665  2.1314063 0.7498012  2.3333333  2.2714354
## X115   15   2014    1 6681  1.2748092 0.4768585  1.3333333  1.3361397
## X116   16   2015    1 7581  0.4035308 0.2938067  0.4166667  0.3937345
##          mad min        max      range       skew   kurtosis          se
## X11  0.61775   0 15.8333333 15.8333333 -1.1779759 -0.4291945 0.653430075
## X12  0.74130   0 14.9166667 14.9166667 -1.4903066  0.5624016 0.320997206
## X13  0.74130   0 13.9166667 13.9166667 -1.6262512  1.0034717 0.210301256
## X14  0.74130   0 12.9166667 12.9166667 -1.6000670  0.9832322 0.158673713
## X15  0.61775   0 11.9166667 11.9166667 -1.7549313  1.4866455 0.105769287
## X16  0.61775   0 10.9166667 10.9166667 -1.9167504  2.2357118 0.079347079
## X17  0.61775   0  9.9166667  9.9166667 -1.8664227  2.0791928 0.065010982
## X18  0.49420   0  8.9166667  8.9166667 -1.8260935  1.8826980 0.053631610
## X19  0.61775   0  7.9166667  7.9166667 -1.9288946  2.4466829 0.041070425
## X110 0.49420   0  6.9166667  6.9166667 -2.0044366  2.7759574 0.031687288
## X111 0.49420   0  5.9166667  5.9166667 -1.8743631  2.3454865 0.024729837
## X112 0.49420   0  4.9166667  4.9166667 -1.9031351  2.5647548 0.019001904
## X113 0.49420   0  3.9166667  3.9166667 -1.8394490  2.5103369 0.013738278
## X114 0.49420   0  2.9166667  2.9166667 -1.5769186  1.6716638 0.009961991
## X115 0.37065   0  1.9166667  1.9166667 -1.0365136  0.6951526 0.005834032
## X116 0.37065   0  0.9166667  0.9166667  0.1720125 -1.2146785 0.003374415

inferenceTests(breastDF, "Breast Cancer - Inference Analysis")

## 
## Call:
## lm(formula = survivalYears ~ yearDiagnosis, data = cancerType)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8860 -0.2762  0.1405  0.5572  3.1973 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.568e+03  1.178e+01   133.0   <2e-16 ***
## yearDiagnosis -7.777e-01  5.855e-03  -132.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.212 on 6185 degrees of freedom
## Multiple R-squared:  0.7404, Adjusted R-squared:  0.7404 
## F-statistic: 1.764e+04 on 1 and 6185 DF,  p-value: < 2.2e-16

H0: There is no evidence that Breast Cancer Survival Years are improving as Diagnosis Year increases.

H1: There is sufficient evidence that Breast Cancer Survival Years are improving as Diagnosis Year increases.

inferenceTests(digothrDF, "Digestive Cancer - Inference Analysis")

## 
## Call:
## lm(formula = survivalYears ~ yearDiagnosis, data = cancerType)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6980 -0.9995 -0.0329  1.0674  7.0685 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   940.81931   25.60226   36.75   <2e-16 ***
## yearDiagnosis  -0.46656    0.01273  -36.66   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.071 on 2749 degrees of freedom
## Multiple R-squared:  0.3284, Adjusted R-squared:  0.3281 
## F-statistic:  1344 on 1 and 2749 DF,  p-value: < 2.2e-16

H0: There is no evidence that Digestive Cancer Survival Years are improving as Diagnosis Year increases.

H1: There is sufficient evidence that Digestive Cancer Survival Years are improving as Diagnosis Year increases.

inferenceTests(malegenDF, "Male Genital Cancer - Inference Analysis")

## 
## Call:
## lm(formula = survivalYears ~ yearDiagnosis, data = cancerType)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.7591  -0.1415   0.3572   0.7752   2.2832 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.762e+03  9.303e+00   189.4   <2e-16 ***
## yearDiagnosis -8.743e-01  4.626e-03  -189.0   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.608 on 10005 degrees of freedom
## Multiple R-squared:  0.7812, Adjusted R-squared:  0.7811 
## F-statistic: 3.572e+04 on 1 and 10005 DF,  p-value: < 2.2e-16

H0: There is no evidence that Male Genital Cancer Survival Years are improving as Diagnosis Year increases.

H1: There is sufficient evidence that Male Genital Cancer Survival Years are improving as Diagnosis Year increases.

inferenceTests(femgenDF, "Female Genital Cancer - Inference Analysis")

## 
## Call:
## lm(formula = survivalYears ~ yearDiagnosis, data = cancerType)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.2799  -0.1660   0.3601   0.8415   2.6368 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.717e+03  1.084e+01   158.5   <2e-16 ***
## yearDiagnosis -8.519e-01  5.388e-03  -158.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.682 on 8092 degrees of freedom
## Multiple R-squared:  0.7555, Adjusted R-squared:  0.7555 
## F-statistic: 2.5e+04 on 1 and 8092 DF,  p-value: < 2.2e-16

H0: There is no evidence that Female Genital Cancer Survival Years are improving as Diagnosis Year increases.

H1: There is sufficient evidence that Female Genital Cancer Survival Years are improving as Diagnosis Year increases.