R graphics

ggstatsplot

Between Test

# Libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(hrbrthemes)
library(viridis)
##https://petolau.github.io/Analyzing-double-seasonal-time-series-with-GAM-in-R/
##https://semba-blog.netlify.com/02/22/2019/exploring-time-series-data-in-r/

  
library(data.table)
library(gapminder)
library(ggstatsplot)

  
setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/MY_Papers_V2/TRB 2021/EScotter_BayesianRule/")
it01 <- fread("IT_aadtMaster.csv")
names(it01)
##  [1] "State"                 "SD_ID"                 "Route_ID"             
##  [4] "S_DFO"                 "E_DFO"                 "Seg_Length"           
##  [7] "Counted_Uncounted_Seg" "Route_Name"            "FC"                   
## [10] "RU"                    "FC_RU"                 "URBAN_CODE"           
## [13] "Route_Sys"             "Paved_Unpaved"         "District_ID"          
## [16] "County_ID"             "County_Name"           "Count_ID"             
## [19] "Count_Lat"             "Count_Long"            "AADT_1995"            
## [22] "AADT_1996"             "AADT_1997"             "AADT_1998"            
## [25] "AADT_1999"             "AADT_2000"             "AADT_2001"            
## [28] "AADT_2002"             "AADT_2003"             "AADT_2004"            
## [31] "AADT_2005"             "AADT_2006"             "AADT_2007"            
## [34] "AADT_2008"             "AADT_2009"             "AADT_2010"            
## [37] "AADT_2011"             "AADT_2012"             "AADT_2013"            
## [40] "AADT_2014"             "AADT_2015"             "AADT_2016"            
## [43] "AADT_2017"             "AADT_2018"             "Latest_AADT"          
## [46] "Stratum"               "Default_AADT"          "Tract_Number"         
## [49] "BG_Number"             "GEOID_US"              "GEOID"                
## [52] "BG_Area_SqMet"         "BG_Area_SqMi"          "Agg_Earn"             
## [55] "Agg_Inc"               "Agg_Rooms"             "Workers"              
## [58] "Agg_Veh"               "Empl"                  "HU"                   
## [61] "OHU"                   "Pop"                   "C_Pop"                
## [64] "C_HU"                  "WAC"                   "RAC"                  
## [67] "WAC_RAC"               "Pop_Empl"              "Agg_Earn_Den"         
## [70] "Agg_Inc_Den"           "Agg_Room_Den"          "Worker_Den"           
## [73] "Agg_Veh_Den"           "Empl_Den"              "HU_Den"               
## [76] "OHU_Den"               "Pop_Den"               "C_Pop_Den"            
## [79] "C_HU_Den"              "WAC_Den"               "RAC_Den"              
## [82] "WAC_RAC_Den"           "Pop_Empl_Den"          "Dist_IH"              
## [85] "Dist_US"               "V86"
it01 %>% mutate_if(is.character, as.factor) -> it01
glimpse(it01)
## Observations: 55,970
## Variables: 86
## $ State                 <fct> MT, MT, MT, MT, MT, MT, MT, MT, MT, MT, MT, M...
## $ SD_ID                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
## $ Route_ID              <fct> C032131S, C002652N, C002650N, C002650N, C0026...
## $ S_DFO                 <dbl> 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.0...
## $ E_DFO                 <dbl> 0.949, 0.796, 1.145, 1.145, 0.423, 0.435, 0.4...
## $ Seg_Length            <dbl> 0.94872127, 0.79549011, 1.14513904, 1.1451390...
## $ Counted_Uncounted_Seg <fct> Counted, Counted, Counted, Counted, Counted, ...
## $ Route_Name            <fct> COTE LN, 1ST ST SE, RAILWAY ST, RAILWAY ST, S...
## $ FC                    <int> 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 7, 7, 7, 7, 6, ...
## $ RU                    <fct> U, R, R, R, R, R, R, R, R, R, R, R, R, R, R, ...
## $ FC_RU                 <fct> 7U, 7R, 7R, 7R, 7R, 7R, 7R, 7R, 6R, 6R, 7R, 7...
## $ URBAN_CODE            <int> 57736, 99999, 99999, 99999, 99999, 99999, 999...
## $ Route_Sys             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Paved_Unpaved         <fct> PAVED, PAVED, PAVED, PAVED, PAVED, PAVED, PAV...
## $ District_ID           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ County_ID             <int> 63, 35, 35, 35, 35, 35, 35, 35, 61, 61, 61, 6...
## $ County_Name           <fct> MISSOULA, GLACIER, GLACIER, GLACIER, GLACIER,...
## $ Count_ID              <fct> 32-3A-038, 18-5-018, 18-5-016, 18-5-015, 18-5...
## $ Count_Lat             <dbl> 46.89283, 48.63417, 48.63583, 48.63782, 48.63...
## $ Count_Long            <dbl> -114.1078, -112.3319, -112.3297, -112.3333, -...
## $ AADT_1995             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1996             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1997             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1998             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1999             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2000             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2001             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2002             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2003             <int> 2630, NA, NA, NA, NA, NA, NA, 4240, 170, NA, ...
## $ AADT_2004             <int> NA, 980, 3240, 2650, 1040, NA, NA, NA, NA, NA...
## $ AADT_2005             <int> 2810, 820, 2870, 2270, 920, NA, NA, 4090, NA,...
## $ AADT_2006             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2007             <int> 3100, NA, NA, NA, NA, NA, NA, NA, 110, NA, NA...
## $ AADT_2008             <int> NA, NA, NA, 3330, 1020, NA, NA, 3860, NA, NA,...
## $ AADT_2009             <int> 2960, 1360, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2010             <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2011             <int> 2640, NA, 3250, 3140, 940, NA, NA, 3880, 90, ...
## $ AADT_2012             <int> NA, 410, NA, NA, NA, NA, NA, NA, 100, NA, NA,...
## $ AADT_2013             <int> NA, NA, 3380, 2450, 860, NA, NA, 3150, NA, NA...
## $ AADT_2014             <int> 2720, 460, 2880, 2410, 730, NA, NA, 2700, NA,...
## $ AADT_2015             <int> NA, NA, NA, NA, NA, 3341, 3341, NA, 120, 60, ...
## $ AADT_2016             <int> NA, NA, NA, NA, NA, NA, 2776, NA, NA, NA, NA,...
## $ AADT_2017             <int> 2669, 922, 2893, 2388, 791, NA, NA, 3054, NA,...
## $ AADT_2018             <int> NA, 377, NA, NA, 1381, NA, NA, NA, NA, NA, NA...
## $ Latest_AADT           <int> 2669, 377, 2893, 2388, 1381, 3341, 2776, 3054...
## $ Stratum               <fct> 7U, 7R, 7R, 7R, 7R, 7R, 7R, 7R, 6R, 6R, 7R, 7...
## $ Default_AADT          <dbl> 736, 58, 58, 58, 58, 58, 58, 58, 124, 124, 58...
## $ Tract_Number          <int> 202, 976000, 976000, 976000, 976000, 976000, ...
## $ BG_Number             <int> 4, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 2, ...
## $ GEOID_US              <fct> 15000US300630002024, 15000US300359760003, 150...
## $ GEOID                 <int64> 300630002024, 300359760003, 300359760003, 3...
## $ BG_Area_SqMet         <int64> 7831315, 1028636, 1028636, 1028636, 1028636...
## $ BG_Area_SqMi          <dbl> 3.0236876, 0.3971586, 0.3971586, 0.3971586, 0...
## $ Agg_Earn              <int> 32912900, 13060600, 13060600, 13060600, 13060...
## $ Agg_Inc               <int> 41450100, 14921700, 14921700, 14921700, 14921...
## $ Agg_Rooms             <int> 2474, 1283, 1283, 1283, 1283, 1283, 1283, 128...
## $ Workers               <dbl> 522, 170, 170, 170, 170, 170, 170, 170, 391, ...
## $ Agg_Veh               <int> 905, 353, 353, 353, 353, 353, 353, 353, 964, ...
## $ Empl                  <int> 540, 190, 190, 190, 190, 190, 190, 190, 429, ...
## $ HU                    <int> 426, 265, 265, 265, 265, 265, 265, 265, 869, ...
## $ OHU                   <int> 426, 194, 194, 194, 194, 194, 194, 194, 468, ...
## $ Pop                   <int> 1406, 433, 433, 433, 433, 433, 433, 433, 1217...
## $ C_Pop                 <int> 1375, 654, 654, 654, 654, 654, 654, 654, 1216...
## $ C_HU                  <int> 492, 343, 343, 343, 343, 343, 343, 343, 812, ...
## $ WAC                   <int> 112, 826, 826, 826, 826, 826, 826, 826, 414, ...
## $ RAC                   <int> 730, 258, 258, 258, 258, 258, 258, 258, 344, ...
## $ WAC_RAC               <int> 842, 1084, 1084, 1084, 1084, 1084, 1084, 1084...
## $ Pop_Empl              <int> 1946, 623, 623, 623, 623, 623, 623, 623, 1646...
## $ Agg_Earn_Den          <int64> 10885020, 32885101, 32885101, 32885101, 328...
## $ Agg_Inc_Den           <int64> 13708460, 37571138, 37571138, 37571138, 375...
## $ Agg_Room_Den          <int> 818, 3230, 3230, 3230, 3230, 3230, 3230, 3230...
## $ Worker_Den            <int> 173, 428, 428, 428, 428, 428, 428, 428, 1, 1,...
## $ Agg_Veh_Den           <int> 299, 889, 889, 889, 889, 889, 889, 889, 2, 2,...
## $ Empl_Den              <int> 179, 478, 478, 478, 478, 478, 478, 478, 1, 1,...
## $ HU_Den                <int> 141, 667, 667, 667, 667, 667, 667, 667, 2, 2,...
## $ OHU_Den               <int> 141, 488, 488, 488, 488, 488, 488, 488, 1, 1,...
## $ Pop_Den               <int> 465, 1090, 1090, 1090, 1090, 1090, 1090, 1090...
## $ C_Pop_Den             <int> 455, 1647, 1647, 1647, 1647, 1647, 1647, 1647...
## $ C_HU_Den              <int> 163, 864, 864, 864, 864, 864, 864, 864, 2, 2,...
## $ WAC_Den               <int> 37, 2080, 2080, 2080, 2080, 2080, 2080, 2080,...
## $ RAC_Den               <int> 241, 650, 650, 650, 650, 650, 650, 650, 1, 1,...
## $ WAC_RAC_Den           <int> 278, 2729, 2729, 2729, 2729, 2729, 2729, 2729...
## $ Pop_Empl_Den          <int> 644, 1569, 1569, 1569, 1569, 1569, 1569, 1569...
## $ Dist_IH               <dbl> 5.50023402, 22.78645620, 22.67396564, 22.7340...
## $ Dist_US               <dbl> 3.54804381, 0.14351486, 0.08519120, 0.1304387...
## $ V86                   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
dat1= subset(it01, FC_RU=="7R")
dat1= filter(it01, FC_RU=="7R")
dim(dat1)
## [1] 26141    86
dat2= dat1[,c("Default_AADT","State")]
dim(dat2)
## [1] 26141     2
dat3=na.omit(dat2)
dim(dat3)
## [1] 6213    2
summary(dat1)
##  State          SD_ID             Route_ID         S_DFO       
##  GA: 1769   Min.   :    2   0         : 3582   Min.   : 0.000  
##  MN: 3908   1st Qu.: 4847   4000171530:   10   1st Qu.: 0.000  
##  MT:  439   Median : 8294   4000100576:    9   Median : 0.205  
##  NC:13646   Mean   : 9559   4000141481:    9   Mean   : 1.331  
##  NM:  103   3rd Qu.:13920   4000110657:    8   3rd Qu.: 1.879  
##  PA: 6276   Max.   :22852   4000115957:    8   Max.   :34.679  
##                             (Other)   :22515   NA's   :10184   
##      E_DFO          Seg_Length       Counted_Uncounted_Seg    Route_Name   
##  Min.   : 0.000   Min.   :    0.00          :13646                 :15518  
##  1st Qu.: 0.529   1st Qu.:    0.42   Counted:12495         MAIN ST :   57  
##  Median : 1.377   Median :    1.05                         CR 62   :   45  
##  Mean   : 2.175   Mean   :  859.33                         RIDGE RD:   35  
##  3rd Qu.: 2.915   3rd Qu.:    2.42                         CR 101  :   33  
##  Max.   :52.261   Max.   :76486.08                         (Other) :10452  
##  NA's   :10184                                             NA's    :    1  
##        FC    RU        FC_RU        URBAN_CODE      Route_Sys     
##  Min.   :7   R:26141   6R:    0   Min.   :    0   Min.   : 3.000  
##  1st Qu.:7   U:    0   7R:26141   1st Qu.:    0   1st Qu.: 4.000  
##  Median :7             7U:    0   Median :    0   Median : 7.000  
##  Mean   :7                        Mean   :13927   Mean   : 6.144  
##  3rd Qu.:7                        3rd Qu.:    0   3rd Qu.: 7.000  
##  Max.   :7                        Max.   :99999   Max.   :21.000  
##                                   NA's   :10287   NA's   :22233   
##  Paved_Unpaved  District_ID       County_ID        County_Name   
##       :20025   Min.   : 1.000   Min.   :  1.00   ROBESON :  353  
##  Paved: 5677   1st Qu.: 3.000   1st Qu.: 49.00   RANDOLPH:  346  
##  PAVED:  439   Median : 4.000   Median : 97.00   SAMPSON :  343  
##                Mean   : 5.506   Mean   : 97.82   DUPLIN  :  328  
##                3rd Qu.: 9.000   3rd Qu.:143.00   MOORE   :  289  
##                Max.   :12.000   Max.   :321.00   IREDELL :  280  
##                NA's   :18096                     (Other) :24202  
##      Count_ID       Count_Lat       Count_Long        AADT_1995    
##          :19922   Min.   :30.46   Min.   :-115.98   Min.   :  10   
##  099-8029:    3   1st Qu.:40.13   1st Qu.: -93.65   1st Qu.:  60   
##  011-8043:    2   Median :41.26   Median : -80.50   Median : 115   
##  031-8096:    2   Mean   :41.48   Mean   : -85.23   Mean   : 251   
##  035-8023:    2   3rd Qu.:44.77   3rd Qu.: -77.61   3rd Qu.: 280   
##  035-8025:    2   Max.   :48.97   Max.   : -74.72   Max.   :4550   
##  (Other) : 6208   NA's   :13646   NA's   :13646     NA's   :25083  
##  AADT_1996      AADT_1997        AADT_1998       AADT_1999     
##  Mode:logical   Mode:logical   Min.   :  10    Min.   :   5.0  
##  NA's:26141     NA's:26141     1st Qu.:  70    1st Qu.:  80.0  
##                                Median : 130    Median : 145.0  
##                                Mean   : 244    Mean   : 286.2  
##                                3rd Qu.: 270    3rd Qu.: 335.0  
##                                Max.   :3100    Max.   :4400.0  
##                                NA's   :25673   NA's   :25110   
##    AADT_2000        AADT_2001        AADT_2002         AADT_2003      
##  Min.   :  10.0   Min.   :  10.0   Min.   :   10.0   Min.   :    5.0  
##  1st Qu.:  75.0   1st Qu.:  80.0   1st Qu.:  240.0   1st Qu.:  220.0  
##  Median : 155.0   Median : 135.0   Median :  430.0   Median :  410.0  
##  Mean   : 283.8   Mean   : 263.7   Mean   :  675.2   Mean   :  645.3  
##  3rd Qu.: 355.0   3rd Qu.: 290.0   3rd Qu.:  770.0   3rd Qu.:  730.0  
##  Max.   :3850.0   Max.   :2300.0   Max.   :36000.0   Max.   :19000.0  
##  NA's   :25894    NA's   :25046    NA's   :19592     NA's   :18207    
##    AADT_2004       AADT_2005         AADT_2006         AADT_2007    
##  Min.   :    5   Min.   :    5.0   Min.   :   15.0   Min.   :   10  
##  1st Qu.:  240   1st Qu.:  210.0   1st Qu.:  240.0   1st Qu.:  220  
##  Median :  450   Median :  400.0   Median :  430.0   Median :  410  
##  Mean   :  686   Mean   :  629.5   Mean   :  666.7   Mean   :  637  
##  3rd Qu.:  790   3rd Qu.:  730.0   3rd Qu.:  770.0   3rd Qu.:  740  
##  Max.   :31000   Max.   :18000.0   Max.   :32000.0   Max.   :18000  
##  NA's   :19383   NA's   :17982     NA's   :19332     NA's   :18092  
##    AADT_2008         AADT_2009         AADT_2010         AADT_2011      
##  Min.   :    7.0   Min.   :    0.0   Min.   :    0.0   Min.   :    0.0  
##  1st Qu.:  216.0   1st Qu.:  140.0   1st Qu.:  210.0   1st Qu.:  180.0  
##  Median :  400.0   Median :  310.0   Median :  400.0   Median :  350.0  
##  Mean   :  644.8   Mean   :  530.8   Mean   :  623.1   Mean   :  557.6  
##  3rd Qu.:  740.0   3rd Qu.:  614.8   3rd Qu.:  720.0   3rd Qu.:  650.0  
##  Max.   :38000.0   Max.   :18000.0   Max.   :42000.0   Max.   :18000.0  
##  NA's   :18756     NA's   :15083     NA's   :18702     NA's   :16806    
##    AADT_2012         AADT_2013       AADT_2014         AADT_2015      
##  Min.   :    0.0   Min.   :    0   Min.   :    0.0   Min.   :    1.0  
##  1st Qu.:  170.0   1st Qu.:  160   1st Qu.:  150.0   1st Qu.:  150.0  
##  Median :  349.0   Median :  330   Median :  310.0   Median :  310.0  
##  Mean   :  566.8   Mean   :  539   Mean   :  545.1   Mean   :  533.3  
##  3rd Qu.:  660.0   3rd Qu.:  630   3rd Qu.:  610.0   3rd Qu.:  616.2  
##  Max.   :33000.0   Max.   :16000   Max.   :28000.0   Max.   :20000.0  
##  NA's   :17260     NA's   :17605   NA's   :19867     NA's   :15647    
##    AADT_2016         AADT_2017         AADT_2018       Latest_AADT     
##  Min.   :    2.0   Min.   :    4.0   Min.   :   3.0   Min.   :    1.0  
##  1st Qu.:  160.0   1st Qu.:   90.0   1st Qu.:  61.0   1st Qu.:  130.0  
##  Median :  330.0   Median :  190.0   Median : 136.0   Median :  280.0  
##  Mean   :  563.1   Mean   :  342.3   Mean   : 299.2   Mean   :  487.3  
##  3rd Qu.:  640.0   3rd Qu.:  400.0   3rd Qu.: 310.0   3rd Qu.:  580.0  
##  Max.   :19000.0   Max.   :15100.0   Max.   :7845.0   Max.   :28000.0  
##  NA's   :16725     NA's   :23042     NA's   :24267    NA's   :11       
##     Stratum       Default_AADT    Tract_Number      BG_Number    
##         :19928   Min.   :  5     Min.   :   100   Min.   :1.000  
##  703    : 1011   1st Qu.: 45     1st Qu.: 30700   1st Qu.:1.000  
##  701    :  758   Median : 60     Median :460200   Median :2.000  
##  7R     :  439   Mean   :135     Mean   :488970   Mean   :2.151  
##  39     :  168   3rd Qu.:265     3rd Qu.:950400   3rd Qu.:3.000  
##  77     :  160   Max.   :650     Max.   :980100   Max.   :8.000  
##  (Other): 3677   NA's   :19928                                   
##                 GEOID_US         GEOID              BG_Area_SqMet        
##  15000US271079602001:   33   Min.   :130019501001   Min.   :     300737  
##  15000US371539702002:   32   1st Qu.:370059501003   1st Qu.:   28685823  
##  15000US270510701002:   28   Median :371139704003   Median :   54859325  
##  15000US370079202001:   26   Mean   :350542104420   Mean   :  115672634  
##  15000US370079206001:   26   3rd Qu.:371950016002   3rd Qu.:  100102736  
##  15000US370079203001:   25   Max.   :421330240022   Max.   :12869141335  
##  (Other)            :25971                                               
##   BG_Area_SqMi         Agg_Earn            Agg_Inc            Agg_Rooms    
##  Min.   :   0.116   Min.   :   912700   Min.   :  1699800   Min.   :  102  
##  1st Qu.:  11.076   1st Qu.: 15923250   1st Qu.: 23258300   1st Qu.: 2925  
##  Median :  21.181   Median : 22546900   Median : 31993500   Median : 3872  
##  Mean   :  44.661   Mean   : 27105916   Mean   : 37371002   Mean   : 4235  
##  3rd Qu.:  38.650   3rd Qu.: 32540900   3rd Qu.: 44271800   3rd Qu.: 5052  
##  Max.   :4968.803   Max.   :320626700   Max.   :397346300   Max.   :31653  
##                     NA's   :10          NA's   :2           NA's   :9      
##     Workers          Agg_Veh          Empl              HU        
##  Min.   :   0.0   Min.   : 115   Min.   :   0.0   Min.   :   0.0  
##  1st Qu.: 414.0   1st Qu.: 822   1st Qu.: 451.0   1st Qu.: 485.0  
##  Median : 574.0   Median :1107   Median : 626.0   Median : 649.0  
##  Mean   : 642.7   Mean   :1213   Mean   : 701.9   Mean   : 703.4  
##  3rd Qu.: 787.0   3rd Qu.:1482   3rd Qu.: 860.0   3rd Qu.: 852.0  
##  Max.   :4892.0   Max.   :6817   Max.   :5725.0   Max.   :4179.0  
##                   NA's   :1799                                    
##       OHU              Pop            C_Pop           C_HU     
##  Min.   :   0.0   Min.   :    0   Min.   :   0   Min.   :   0  
##  1st Qu.: 393.0   1st Qu.:  974   1st Qu.:1017   1st Qu.: 486  
##  Median : 521.0   Median : 1320   Median :1338   Median : 636  
##  Mean   : 565.9   Mean   : 1464   Mean   :1460   Mean   : 690  
##  3rd Qu.: 689.0   3rd Qu.: 1785   3rd Qu.:1769   3rd Qu.: 821  
##  Max.   :3552.0   Max.   :11220   Max.   :9305   Max.   :3597  
##                                                                
##       WAC               RAC          WAC_RAC         Pop_Empl    
##  Min.   :    1.0   Min.   :   4   Min.   :    4   Min.   :    0  
##  1st Qu.:   76.0   1st Qu.: 399   1st Qu.:  543   1st Qu.: 1434  
##  Median :  163.0   Median : 537   Median :  766   Median : 1953  
##  Mean   :  313.2   Mean   : 595   Mean   :  908   Mean   : 2166  
##  3rd Qu.:  361.0   3rd Qu.: 723   3rd Qu.: 1090   3rd Qu.: 2626  
##  Max.   :14334.0   Max.   :4231   Max.   :15733   Max.   :16945  
##  NA's   :21                                                      
##   Agg_Earn_Den        Agg_Inc_Den         Agg_Room_Den       Worker_Den     
##  Min.   :     2598   Min.   :     6093   Min.   :    1.0   Min.   :   0.00  
##  1st Qu.:   510452   1st Qu.:   750207   1st Qu.:   98.0   1st Qu.:  13.00  
##  Median :  1137832   Median :  1638896   Median :  191.0   Median :  29.00  
##  Mean   :  2536385   Mean   :  3522216   Mean   :  397.3   Mean   :  61.21  
##  3rd Qu.:  2487753   3rd Qu.:  3452413   3rd Qu.:  378.0   3rd Qu.:  61.00  
##  Max.   :109686755   Max.   :160292056   Max.   :18404.0   Max.   :3195.00  
##  NA's   :       10   NA's   :        2   NA's   :9                          
##   Agg_Veh_Den        Empl_Den           HU_Den           OHU_Den       
##  Min.   :   0.0   Min.   :   0.00   Min.   :   0.00   Min.   :   0.00  
##  1st Qu.:  27.0   1st Qu.:  15.00   1st Qu.:  16.00   1st Qu.:  13.00  
##  Median :  56.0   Median :  32.00   Median :  32.00   Median :  27.00  
##  Mean   : 108.5   Mean   :  67.06   Mean   :  66.52   Mean   :  55.97  
##  3rd Qu.: 112.0   3rd Qu.:  67.00   3rd Qu.:  63.00   3rd Qu.:  53.00  
##  Max.   :4659.0   Max.   :3583.00   Max.   :3476.00   Max.   :2691.00  
##  NA's   :1799                                                          
##     Pop_Den       C_Pop_Den         C_HU_Den         WAC_Den       
##  Min.   :   0   Min.   :   0.0   Min.   :   0.0   Min.   :   0.00  
##  1st Qu.:  32   1st Qu.:  34.0   1st Qu.:  16.0   1st Qu.:   2.00  
##  Median :  69   Median :  71.0   Median :  32.0   Median :   7.00  
##  Mean   : 140   Mean   : 139.5   Mean   :  65.5   Mean   :  45.98  
##  3rd Qu.: 139   3rd Qu.: 139.0   3rd Qu.:  61.0   3rd Qu.:  24.00  
##  Max.   :7303   Max.   :5607.0   Max.   :2839.0   Max.   :5400.00  
##                                                   NA's   :21       
##     RAC_Den        WAC_RAC_Den      Pop_Empl_Den      Dist_IH       
##  Min.   :   0.0   Min.   :   0.0   Min.   :    0   Min.   :  0.000  
##  1st Qu.:  13.0   1st Qu.:  17.0   1st Qu.:   48   1st Qu.:  8.451  
##  Median :  28.0   Median :  38.0   Median :  102   Median : 18.281  
##  Mean   :  58.2   Mean   : 104.2   Mean   :  207   Mean   : 28.244  
##  3rd Qu.:  57.0   3rd Qu.:  86.0   3rd Qu.:  206   3rd Qu.: 35.387  
##  Max.   :2618.0   Max.   :5733.0   Max.   :10886   Max.   :215.189  
##                                                    NA's   :6603     
##     Dist_US          V86         
##  Min.   :  0.000   Mode:logical  
##  1st Qu.:  1.208   NA's:26141    
##  Median :  3.739                 
##  Mean   :  6.023                 
##  3rd Qu.:  7.724                 
##  Max.   :158.150                 
##  NA's   :6386
table(dat1$State)
## 
##    GA    MN    MT    NC    NM    PA 
##  1769  3908   439 13646   103  6276
dat4=subset(dat3, State!="PA" |State!="NC")

### FIGURE 1
## without cleaning
ggstatsplot::ggbetweenstats(dat1,
                            x = State,
                            y = Default_AADT,
                            nboot = 10,
                            messages = FALSE
)

## cleaning
ggstatsplot::ggbetweenstats(dat3,
  x = State,
  y = Default_AADT,
  nboot = 10,
  messages = FALSE
)

Regular

## FIGURE 2
dat1= filter(it01, State=="MN")
dim(dat1)
## [1] 11498    86
dat2= dat1[,c("Default_AADT", "FC_RU")]
dat3=na.omit(dat2)
dim(dat3)
## [1] 11482     2
# for reproducibility
set.seed(123)

# parametric t-test and box plot
p1 <-
  ggstatsplot::ggbetweenstats(
    data = dat3,
    x = FC_RU,
    y = Default_AADT,
    xlab = "Facility",
    ylab = "AADT",
    plot.type = "box",
    type = "p",
    effsize.type = "d",
    conf.level = 0.99,
    title = "Parametric test",
    package = "ggsci",
    palette = "nrc_npg",
    messages = FALSE
  )

# Mann-Whitney U test (nonparametric t) and violin plot
p2 <-
  ggstatsplot::ggbetweenstats(
    data = dat3,
    x = FC_RU,
    y = Default_AADT,
    xlab = "Facility",
    ylab = "AADT",
    plot.type = "violin",
    type = "np",
    conf.level = 0.99,
    title = "Non-parametric Test (violin plot)",
    package = "ggsci",
    palette = "uniform_startrek",
    messages = FALSE
  )

# robust t-test and boxviolin plot
p3 <-
  ggstatsplot::ggbetweenstats(
    data = dat3,
    x = FC_RU,
    y = Default_AADT,
    xlab = "Facility",
    ylab = "AADT",
    plot.type = "boxviolin",
    type = "r",
    conf.level = 0.99,
    title = "Robust Test (box & violin plot)",
    tr = 0.005,
    package = "wesanderson",
    palette = "Royal2",
    nboot = 15,
    k = 3,
    messages = FALSE
  )

# Bayes Factor for parametric t-test and boxviolin plot
p4 <-
  ggstatsplot::ggbetweenstats(
    data = dat3,
    x = FC_RU,
    y = Default_AADT,
    xlab = "Facility",
    ylab = "AADT",
    type = "bf",
    plot.type = "box",
    title = "Bayesian Test (box plot)",
    package = "ggsci",
    palette = "nrc_npg",
    messages = FALSE
  )

# combining the individual plots into a single plot
ggstatsplot::combine_plots(
  p1, p2, p3, p4,
  nrow = 2,
  labels = c("(a)", "(b)", "(c)", "(d)"),
  title.text = "Comparison of traffic volume by facility type",
  caption.text = "Source: State DOTs",
  title.size = 14,
  caption.size = 12
)

names(dat1)
##  [1] "State"                 "SD_ID"                 "Route_ID"             
##  [4] "S_DFO"                 "E_DFO"                 "Seg_Length"           
##  [7] "Counted_Uncounted_Seg" "Route_Name"            "FC"                   
## [10] "RU"                    "FC_RU"                 "URBAN_CODE"           
## [13] "Route_Sys"             "Paved_Unpaved"         "District_ID"          
## [16] "County_ID"             "County_Name"           "Count_ID"             
## [19] "Count_Lat"             "Count_Long"            "AADT_1995"            
## [22] "AADT_1996"             "AADT_1997"             "AADT_1998"            
## [25] "AADT_1999"             "AADT_2000"             "AADT_2001"            
## [28] "AADT_2002"             "AADT_2003"             "AADT_2004"            
## [31] "AADT_2005"             "AADT_2006"             "AADT_2007"            
## [34] "AADT_2008"             "AADT_2009"             "AADT_2010"            
## [37] "AADT_2011"             "AADT_2012"             "AADT_2013"            
## [40] "AADT_2014"             "AADT_2015"             "AADT_2016"            
## [43] "AADT_2017"             "AADT_2018"             "Latest_AADT"          
## [46] "Stratum"               "Default_AADT"          "Tract_Number"         
## [49] "BG_Number"             "GEOID_US"              "GEOID"                
## [52] "BG_Area_SqMet"         "BG_Area_SqMi"          "Agg_Earn"             
## [55] "Agg_Inc"               "Agg_Rooms"             "Workers"              
## [58] "Agg_Veh"               "Empl"                  "HU"                   
## [61] "OHU"                   "Pop"                   "C_Pop"                
## [64] "C_HU"                  "WAC"                   "RAC"                  
## [67] "WAC_RAC"               "Pop_Empl"              "Agg_Earn_Den"         
## [70] "Agg_Inc_Den"           "Agg_Room_Den"          "Worker_Den"           
## [73] "Agg_Veh_Den"           "Empl_Den"              "HU_Den"               
## [76] "OHU_Den"               "Pop_Den"               "C_Pop_Den"            
## [79] "C_HU_Den"              "WAC_Den"               "RAC_Den"              
## [82] "WAC_RAC_Den"           "Pop_Empl_Den"          "Dist_IH"              
## [85] "Dist_US"               "V86"
# let's use just 5% of the data to speed it up
ggstatsplot::ggcorrmat(
  data = dat1,
  cor.vars = c(Default_AADT, Agg_Rooms:RAC), # note how the variables are getting selected
  ggcorrplot.args = list(outline.color = "black", hc.order = TRUE)
)