current_path <- getwd()

\(~\)

1 Load Sample Data

library('tidyverse')

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.6     v dplyr   1.0.4
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(NHANES)

source(file.path(current_path,'SankeyNetwork_Helpers.R'))

NHANES_DATA_12 <- NHANES %>%
  filter(!is.na(Depressed)) %>%
  mutate_if(is.factor,
            fct_explicit_na)

factor_vars <- NHANES_DATA_12 %>% 
  select_if(is.factor) %>%
  colnames()
  
factor_vars

##  [1] "SurveyYr"         "Gender"           "AgeDecade"        "Race1"           
##  [5] "Race3"            "Education"        "MaritalStatus"    "HHIncome"        
##  [9] "HomeOwn"          "Work"             "BMICatUnder20yrs" "BMI_WHO"         
## [13] "Diabetes"         "HealthGen"        "LittleInterest"   "Depressed"       
## [17] "SleepTrouble"     "PhysActive"       "TVHrsDay"         "CompHrsDay"      
## [21] "Alcohol12PlusYr"  "SmokeNow"         "Smoke100"         "Smoke100n"       
## [25] "Marijuana"        "RegularMarij"     "HardDrugs"        "SexEver"         
## [29] "SameSex"          "SexOrientation"   "PregnantNow"

factor_features <- factor_vars[!factor_vars %in% c('Depressed')]
factor_features

##  [1] "SurveyYr"         "Gender"           "AgeDecade"        "Race1"           
##  [5] "Race3"            "Education"        "MaritalStatus"    "HHIncome"        
##  [9] "HomeOwn"          "Work"             "BMICatUnder20yrs" "BMI_WHO"         
## [13] "Diabetes"         "HealthGen"        "LittleInterest"   "SleepTrouble"    
## [17] "PhysActive"       "TVHrsDay"         "CompHrsDay"       "Alcohol12PlusYr" 
## [21] "SmokeNow"         "Smoke100"         "Smoke100n"        "Marijuana"       
## [25] "RegularMarij"     "HardDrugs"        "SexEver"          "SameSex"         
## [29] "SexOrientation"   "PregnantNow"

\(~\)

1.1 Test

make_sankey_graph_from_features(data = NHANES_DATA_12,
                                id = 'ID',
                                target = 'Depressed',
                                features = c('Gender','HardDrugs'))

## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(target)` instead of `target` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

## New names:
## * `` -> ...1
## * `` -> ...2

## `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'HardDrugs'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.

## Links is a tbl_df. Converting to a plain data frame.

Use at your own risk for more than 5 !

make_sankey_graph_from_features(data = NHANES_DATA_12,
                                id = 'ID',
                                target = 'Depressed',
                                features = sample(factor_features, 5))

## New names:
## * `` -> ...1
## * `` -> ...2

## `summarise()` has grouped output by 'AgeDecade'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'HealthGen'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'HomeOwn'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'CompHrsDay'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'Smoke100n'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'AgeDecade'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'AgeDecade'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'AgeDecade'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'AgeDecade'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'HealthGen'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'HealthGen'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'HealthGen'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'HomeOwn'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'HomeOwn'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'CompHrsDay'. You can override using the `.groups` argument.

## Links is a tbl_df. Converting to a plain data frame.

\(~\)

2 Code Appendix

\(~\)

current_path <- getwd()

library('tidyverse')
library(NHANES)

source(file.path(current_path,'SankeyNetwork_Helpers.R'))

NHANES_DATA_12 <- NHANES %>%
  filter(!is.na(Depressed)) %>%
  mutate_if(is.factor,
            fct_explicit_na)

factor_vars <- NHANES_DATA_12 %>% 
  select_if(is.factor) %>%
  colnames()
  
factor_vars

factor_features <- factor_vars[!factor_vars %in% c('Depressed')]
factor_features

make_sankey_graph_from_features(data = NHANES_DATA_12,
                                id = 'ID',
                                target = 'Depressed',
                                features = c('Gender','HardDrugs'))
make_sankey_graph_from_features(data = NHANES_DATA_12,
                                id = 'ID',
                                target = 'Depressed',
                                features = sample(factor_features, 5))

Automated Sankey Network Diagram Example Revisited

J Kyle Armstrong, PhD

24 February 2021

1 Load Sample Data

1.1 Test

2 Code Appendix