R graphics

ggstatsplot

Between Test

# Libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(hrbrthemes)
library(viridis)
library(data.table)
library(gapminder)
library(ggstatsplot)

  
setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/MY_Papers_V2/TRB 2021/EScotter_BayesianRule/")
it01 <- fread("IT_aadtMaster.csv")
names(it01)
##  [1] "State"                 "SD_ID"                 "Route_ID"             
##  [4] "S_DFO"                 "E_DFO"                 "Seg_Length"           
##  [7] "Counted_Uncounted_Seg" "Route_Name"            "FC"                   
## [10] "RU"                    "FC_RU"                 "URBAN_CODE"           
## [13] "Route_Sys"             "Paved_Unpaved"         "District_ID"          
## [16] "County_ID"             "County_Name"           "Count_ID"             
## [19] "Count_Lat"             "Count_Long"            "AADT_1995"            
## [22] "AADT_1996"             "AADT_1997"             "AADT_1998"            
## [25] "AADT_1999"             "AADT_2000"             "AADT_2001"            
## [28] "AADT_2002"             "AADT_2003"             "AADT_2004"            
## [31] "AADT_2005"             "AADT_2006"             "AADT_2007"            
## [34] "AADT_2008"             "AADT_2009"             "AADT_2010"            
## [37] "AADT_2011"             "AADT_2012"             "AADT_2013"            
## [40] "AADT_2014"             "AADT_2015"             "AADT_2016"            
## [43] "AADT_2017"             "AADT_2018"             "Latest_AADT"          
## [46] "Stratum"               "Default_AADT"          "Tract_Number"         
## [49] "BG_Number"             "GEOID_US"              "GEOID"                
## [52] "BG_Area_SqMet"         "BG_Area_SqMi"          "Agg_Earn"             
## [55] "Agg_Inc"               "Agg_Rooms"             "Workers"              
## [58] "Agg_Veh"               "Empl"                  "HU"                   
## [61] "OHU"                   "Pop"                   "C_Pop"                
## [64] "C_HU"                  "WAC"                   "RAC"                  
## [67] "WAC_RAC"               "Pop_Empl"              "Agg_Earn_Den"         
## [70] "Agg_Inc_Den"           "Agg_Room_Den"          "Worker_Den"           
## [73] "Agg_Veh_Den"           "Empl_Den"              "HU_Den"               
## [76] "OHU_Den"               "Pop_Den"               "C_Pop_Den"            
## [79] "C_HU_Den"              "WAC_Den"               "RAC_Den"              
## [82] "WAC_RAC_Den"           "Pop_Empl_Den"          "Dist_IH"              
## [85] "Dist_US"               "V86"
it01 %>% mutate_if(is.character, as.factor) -> it01
glimpse(it01)
## Observations: 55,970
## Variables: 86
## $ State                 <fct> MT, MT, MT, MT, MT, MT, MT, MT, MT, MT, MT, M...
## $ SD_ID                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
## $ Route_ID              <fct> C032131S, C002652N, C002650N, C002650N, C0026...
## $ S_DFO                 <dbl> 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.0...
## $ E_DFO                 <dbl> 0.949, 0.796, 1.145, 1.145, 0.423, 0.435, 0.4...
## $ Seg_Length            <dbl> 0.94872127, 0.79549011, 1.14513904, 1.1451390...
## $ Counted_Uncounted_Seg <fct> Counted, Counted, Counted, Counted, Counted, ...
## $ Route_Name            <fct> COTE LN, 1ST ST SE, RAILWAY ST, RAILWAY ST, S...
## $ FC                    <int> 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 7, 7, 7, 7, 6, ...
## $ RU                    <fct> U, R, R, R, R, R, R, R, R, R, R, R, R, R, R, ...
## $ FC_RU                 <fct> 7U, 7R, 7R, 7R, 7R, 7R, 7R, 7R, 6R, 6R, 7R, 7...
## $ URBAN_CODE            <int> 57736, 99999, 99999, 99999, 99999, 99999, 999...
## $ Route_Sys             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Paved_Unpaved         <fct> PAVED, PAVED, PAVED, PAVED, PAVED, PAVED, PAV...
## $ District_ID           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ County_ID             <int> 63, 35, 35, 35, 35, 35, 35, 35, 61, 61, 61, 6...
## $ County_Name           <fct> MISSOULA, GLACIER, GLACIER, GLACIER, GLACIER,...
## $ Count_ID              <fct> 32-3A-038, 18-5-018, 18-5-016, 18-5-015, 18-5...
## $ Count_Lat             <dbl> 46.89283, 48.63417, 48.63583, 48.63782, 48.63...
## $ Count_Long            <dbl> -114.1078, -112.3319, -112.3297, -112.3333, -...
## $ AADT_1995             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1996             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1997             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1998             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1999             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2000             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2001             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2002             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2003             <int> 2630, NA, NA, NA, NA, NA, NA, 4240, 170, NA, ...
## $ AADT_2004             <int> NA, 980, 3240, 2650, 1040, NA, NA, NA, NA, NA...
## $ AADT_2005             <int> 2810, 820, 2870, 2270, 920, NA, NA, 4090, NA,...
## $ AADT_2006             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2007             <int> 3100, NA, NA, NA, NA, NA, NA, NA, 110, NA, NA...
## $ AADT_2008             <int> NA, NA, NA, 3330, 1020, NA, NA, 3860, NA, NA,...
## $ AADT_2009             <int> 2960, 1360, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2010             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2011             <int> 2640, NA, 3250, 3140, 940, NA, NA, 3880, 90, ...
## $ AADT_2012             <int> NA, 410, NA, NA, NA, NA, NA, NA, 100, NA, NA,...
## $ AADT_2013             <int> NA, NA, 3380, 2450, 860, NA, NA, 3150, NA, NA...
## $ AADT_2014             <int> 2720, 460, 2880, 2410, 730, NA, NA, 2700, NA,...
## $ AADT_2015             <int> NA, NA, NA, NA, NA, 3341, 3341, NA, 120, 60, ...
## $ AADT_2016             <int> NA, NA, NA, NA, NA, NA, 2776, NA, NA, NA, NA,...
## $ AADT_2017             <int> 2669, 922, 2893, 2388, 791, NA, NA, 3054, NA,...
## $ AADT_2018             <int> NA, 377, NA, NA, 1381, NA, NA, NA, NA, NA, NA...
## $ Latest_AADT           <int> 2669, 377, 2893, 2388, 1381, 3341, 2776, 3054...
## $ Stratum               <fct> 7U, 7R, 7R, 7R, 7R, 7R, 7R, 7R, 6R, 6R, 7R, 7...
## $ Default_AADT          <dbl> 736, 58, 58, 58, 58, 58, 58, 58, 124, 124, 58...
## $ Tract_Number          <int> 202, 976000, 976000, 976000, 976000, 976000, ...
## $ BG_Number             <int> 4, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 2, ...
## $ GEOID_US              <fct> 15000US300630002024, 15000US300359760003, 150...
## $ GEOID                 <int64> 300630002024, 300359760003, 300359760003, 3...
## $ BG_Area_SqMet         <int64> 7831315, 1028636, 1028636, 1028636, 1028636...
## $ BG_Area_SqMi          <dbl> 3.0236876, 0.3971586, 0.3971586, 0.3971586, 0...
## $ Agg_Earn              <int> 32912900, 13060600, 13060600, 13060600, 13060...
## $ Agg_Inc               <int> 41450100, 14921700, 14921700, 14921700, 14921...
## $ Agg_Rooms             <int> 2474, 1283, 1283, 1283, 1283, 1283, 1283, 128...
## $ Workers               <dbl> 522, 170, 170, 170, 170, 170, 170, 170, 391, ...
## $ Agg_Veh               <int> 905, 353, 353, 353, 353, 353, 353, 353, 964, ...
## $ Empl                  <int> 540, 190, 190, 190, 190, 190, 190, 190, 429, ...
## $ HU                    <int> 426, 265, 265, 265, 265, 265, 265, 265, 869, ...
## $ OHU                   <int> 426, 194, 194, 194, 194, 194, 194, 194, 468, ...
## $ Pop                   <int> 1406, 433, 433, 433, 433, 433, 433, 433, 1217...
## $ C_Pop                 <int> 1375, 654, 654, 654, 654, 654, 654, 654, 1216...
## $ C_HU                  <int> 492, 343, 343, 343, 343, 343, 343, 343, 812, ...
## $ WAC                   <int> 112, 826, 826, 826, 826, 826, 826, 826, 414, ...
## $ RAC                   <int> 730, 258, 258, 258, 258, 258, 258, 258, 344, ...
## $ WAC_RAC               <int> 842, 1084, 1084, 1084, 1084, 1084, 1084, 1084...
## $ Pop_Empl              <int> 1946, 623, 623, 623, 623, 623, 623, 623, 1646...
## $ Agg_Earn_Den          <int64> 10885020, 32885101, 32885101, 32885101, 328...
## $ Agg_Inc_Den           <int64> 13708460, 37571138, 37571138, 37571138, 375...
## $ Agg_Room_Den          <int> 818, 3230, 3230, 3230, 3230, 3230, 3230, 3230...
## $ Worker_Den            <int> 173, 428, 428, 428, 428, 428, 428, 428, 1, 1,...
## $ Agg_Veh_Den           <int> 299, 889, 889, 889, 889, 889, 889, 889, 2, 2,...
## $ Empl_Den              <int> 179, 478, 478, 478, 478, 478, 478, 478, 1, 1,...
## $ HU_Den                <int> 141, 667, 667, 667, 667, 667, 667, 667, 2, 2,...
## $ OHU_Den               <int> 141, 488, 488, 488, 488, 488, 488, 488, 1, 1,...
## $ Pop_Den               <int> 465, 1090, 1090, 1090, 1090, 1090, 1090, 1090...
## $ C_Pop_Den             <int> 455, 1647, 1647, 1647, 1647, 1647, 1647, 1647...
## $ C_HU_Den              <int> 163, 864, 864, 864, 864, 864, 864, 864, 2, 2,...
## $ WAC_Den               <int> 37, 2080, 2080, 2080, 2080, 2080, 2080, 2080,...
## $ RAC_Den               <int> 241, 650, 650, 650, 650, 650, 650, 650, 1, 1,...
## $ WAC_RAC_Den           <int> 278, 2729, 2729, 2729, 2729, 2729, 2729, 2729...
## $ Pop_Empl_Den          <int> 644, 1569, 1569, 1569, 1569, 1569, 1569, 1569...
## $ Dist_IH               <dbl> 5.50023402, 22.78645620, 22.67396564, 22.7340...
## $ Dist_US               <dbl> 3.54804381, 0.14351486, 0.08519120, 0.1304387...
## $ V86                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## FIGURE 2
dat1= filter(it01, State=="MN")
dim(dat1)
## [1] 11498    86
dat2= dat1[,c("Default_AADT", "FC_RU")]
dat3=na.omit(dat2)
dim(dat3)
## [1] 11482     2
# for reproducibility
set.seed(123)

ggstatsplot::ggscatterstats(
  data = filter(dat1, FC_RU=="7U"),
  x = WAC, # predictor/independent variable
  y = Default_AADT, # dependent variable
  xlab = "WAC", # label for the x-axis
  ylab = "AADT", # label for the y-axis
  label.expression = "Default_AADT < 300 & WAC > 100", # expression for deciding which points to label
  point.alpha = 0.7,
  point.size = 4,
  point.color = "grey50",
  marginal = TRUE, # show marginal distribution
  marginal.type = "density", # type of plot for marginal distribution
  centrality.parameter = "mean", # centrality parameter to be plotted
  margins = "both", # marginal distribution on both axes
  xfill = "#CC79A7", # fill for marginals on the x-axis
  yfill = "#009E73", # fill for marginals on the y-axis
  type = "pearson", # type of linear association
  title = "WAC and AADT",
  messages = FALSE
)

Correlation

# let's use just 5% of the data to speed it up
ggstatsplot::grouped_ggcorrmat(
  # arguments relevant for ggstatsplot::ggcorrmat
  data = dat1,
  type = "robust", # percentage bend correlation coefficient
  beta = 0.2, # bending constant
  p.adjust.method = "holm", # method to adjust p-values for multiple comparisons
  grouping.var = FC_RU,
  title.prefix = "Facility Type",
  cor.vars = c(Default_AADT, Agg_Rooms:RAC),
  lab.size = 3.5,
  # arguments relevant for ggstatsplot::combine_plots
  title.text = "By Facility Type",
  title.args = list(size = 16, color = "red"),
  plotgrid.args = list(
    labels = c("(a)", "(b)", "(c)"),
    nrow = 2,
    ncol = 2
  )
)