Between Test
# Libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(hrbrthemes)
library(viridis)
library(data.table)
library(gapminder)
library(ggstatsplot)
setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/MY_Papers_V2/TRB 2021/EScotter_BayesianRule/")
it01 <- fread("IT_aadtMaster.csv")
names(it01)
## [1] "State" "SD_ID" "Route_ID"
## [4] "S_DFO" "E_DFO" "Seg_Length"
## [7] "Counted_Uncounted_Seg" "Route_Name" "FC"
## [10] "RU" "FC_RU" "URBAN_CODE"
## [13] "Route_Sys" "Paved_Unpaved" "District_ID"
## [16] "County_ID" "County_Name" "Count_ID"
## [19] "Count_Lat" "Count_Long" "AADT_1995"
## [22] "AADT_1996" "AADT_1997" "AADT_1998"
## [25] "AADT_1999" "AADT_2000" "AADT_2001"
## [28] "AADT_2002" "AADT_2003" "AADT_2004"
## [31] "AADT_2005" "AADT_2006" "AADT_2007"
## [34] "AADT_2008" "AADT_2009" "AADT_2010"
## [37] "AADT_2011" "AADT_2012" "AADT_2013"
## [40] "AADT_2014" "AADT_2015" "AADT_2016"
## [43] "AADT_2017" "AADT_2018" "Latest_AADT"
## [46] "Stratum" "Default_AADT" "Tract_Number"
## [49] "BG_Number" "GEOID_US" "GEOID"
## [52] "BG_Area_SqMet" "BG_Area_SqMi" "Agg_Earn"
## [55] "Agg_Inc" "Agg_Rooms" "Workers"
## [58] "Agg_Veh" "Empl" "HU"
## [61] "OHU" "Pop" "C_Pop"
## [64] "C_HU" "WAC" "RAC"
## [67] "WAC_RAC" "Pop_Empl" "Agg_Earn_Den"
## [70] "Agg_Inc_Den" "Agg_Room_Den" "Worker_Den"
## [73] "Agg_Veh_Den" "Empl_Den" "HU_Den"
## [76] "OHU_Den" "Pop_Den" "C_Pop_Den"
## [79] "C_HU_Den" "WAC_Den" "RAC_Den"
## [82] "WAC_RAC_Den" "Pop_Empl_Den" "Dist_IH"
## [85] "Dist_US" "V86"
it01 %>% mutate_if(is.character, as.factor) -> it01
glimpse(it01)
## Observations: 55,970
## Variables: 86
## $ State <fct> MT, MT, MT, MT, MT, MT, MT, MT, MT, MT, MT, M...
## $ SD_ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
## $ Route_ID <fct> C032131S, C002652N, C002650N, C002650N, C0026...
## $ S_DFO <dbl> 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.0...
## $ E_DFO <dbl> 0.949, 0.796, 1.145, 1.145, 0.423, 0.435, 0.4...
## $ Seg_Length <dbl> 0.94872127, 0.79549011, 1.14513904, 1.1451390...
## $ Counted_Uncounted_Seg <fct> Counted, Counted, Counted, Counted, Counted, ...
## $ Route_Name <fct> COTE LN, 1ST ST SE, RAILWAY ST, RAILWAY ST, S...
## $ FC <int> 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 7, 7, 7, 7, 6, ...
## $ RU <fct> U, R, R, R, R, R, R, R, R, R, R, R, R, R, R, ...
## $ FC_RU <fct> 7U, 7R, 7R, 7R, 7R, 7R, 7R, 7R, 6R, 6R, 7R, 7...
## $ URBAN_CODE <int> 57736, 99999, 99999, 99999, 99999, 99999, 999...
## $ Route_Sys <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Paved_Unpaved <fct> PAVED, PAVED, PAVED, PAVED, PAVED, PAVED, PAV...
## $ District_ID <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ County_ID <int> 63, 35, 35, 35, 35, 35, 35, 35, 61, 61, 61, 6...
## $ County_Name <fct> MISSOULA, GLACIER, GLACIER, GLACIER, GLACIER,...
## $ Count_ID <fct> 32-3A-038, 18-5-018, 18-5-016, 18-5-015, 18-5...
## $ Count_Lat <dbl> 46.89283, 48.63417, 48.63583, 48.63782, 48.63...
## $ Count_Long <dbl> -114.1078, -112.3319, -112.3297, -112.3333, -...
## $ AADT_1995 <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1996 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1997 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1998 <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_1999 <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2000 <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2001 <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2002 <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2003 <int> 2630, NA, NA, NA, NA, NA, NA, 4240, 170, NA, ...
## $ AADT_2004 <int> NA, 980, 3240, 2650, 1040, NA, NA, NA, NA, NA...
## $ AADT_2005 <int> 2810, 820, 2870, 2270, 920, NA, NA, 4090, NA,...
## $ AADT_2006 <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2007 <int> 3100, NA, NA, NA, NA, NA, NA, NA, 110, NA, NA...
## $ AADT_2008 <int> NA, NA, NA, 3330, 1020, NA, NA, 3860, NA, NA,...
## $ AADT_2009 <int> 2960, 1360, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2010 <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AADT_2011 <int> 2640, NA, 3250, 3140, 940, NA, NA, 3880, 90, ...
## $ AADT_2012 <int> NA, 410, NA, NA, NA, NA, NA, NA, 100, NA, NA,...
## $ AADT_2013 <int> NA, NA, 3380, 2450, 860, NA, NA, 3150, NA, NA...
## $ AADT_2014 <int> 2720, 460, 2880, 2410, 730, NA, NA, 2700, NA,...
## $ AADT_2015 <int> NA, NA, NA, NA, NA, 3341, 3341, NA, 120, 60, ...
## $ AADT_2016 <int> NA, NA, NA, NA, NA, NA, 2776, NA, NA, NA, NA,...
## $ AADT_2017 <int> 2669, 922, 2893, 2388, 791, NA, NA, 3054, NA,...
## $ AADT_2018 <int> NA, 377, NA, NA, 1381, NA, NA, NA, NA, NA, NA...
## $ Latest_AADT <int> 2669, 377, 2893, 2388, 1381, 3341, 2776, 3054...
## $ Stratum <fct> 7U, 7R, 7R, 7R, 7R, 7R, 7R, 7R, 6R, 6R, 7R, 7...
## $ Default_AADT <dbl> 736, 58, 58, 58, 58, 58, 58, 58, 124, 124, 58...
## $ Tract_Number <int> 202, 976000, 976000, 976000, 976000, 976000, ...
## $ BG_Number <int> 4, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 2, ...
## $ GEOID_US <fct> 15000US300630002024, 15000US300359760003, 150...
## $ GEOID <int64> 300630002024, 300359760003, 300359760003, 3...
## $ BG_Area_SqMet <int64> 7831315, 1028636, 1028636, 1028636, 1028636...
## $ BG_Area_SqMi <dbl> 3.0236876, 0.3971586, 0.3971586, 0.3971586, 0...
## $ Agg_Earn <int> 32912900, 13060600, 13060600, 13060600, 13060...
## $ Agg_Inc <int> 41450100, 14921700, 14921700, 14921700, 14921...
## $ Agg_Rooms <int> 2474, 1283, 1283, 1283, 1283, 1283, 1283, 128...
## $ Workers <dbl> 522, 170, 170, 170, 170, 170, 170, 170, 391, ...
## $ Agg_Veh <int> 905, 353, 353, 353, 353, 353, 353, 353, 964, ...
## $ Empl <int> 540, 190, 190, 190, 190, 190, 190, 190, 429, ...
## $ HU <int> 426, 265, 265, 265, 265, 265, 265, 265, 869, ...
## $ OHU <int> 426, 194, 194, 194, 194, 194, 194, 194, 468, ...
## $ Pop <int> 1406, 433, 433, 433, 433, 433, 433, 433, 1217...
## $ C_Pop <int> 1375, 654, 654, 654, 654, 654, 654, 654, 1216...
## $ C_HU <int> 492, 343, 343, 343, 343, 343, 343, 343, 812, ...
## $ WAC <int> 112, 826, 826, 826, 826, 826, 826, 826, 414, ...
## $ RAC <int> 730, 258, 258, 258, 258, 258, 258, 258, 344, ...
## $ WAC_RAC <int> 842, 1084, 1084, 1084, 1084, 1084, 1084, 1084...
## $ Pop_Empl <int> 1946, 623, 623, 623, 623, 623, 623, 623, 1646...
## $ Agg_Earn_Den <int64> 10885020, 32885101, 32885101, 32885101, 328...
## $ Agg_Inc_Den <int64> 13708460, 37571138, 37571138, 37571138, 375...
## $ Agg_Room_Den <int> 818, 3230, 3230, 3230, 3230, 3230, 3230, 3230...
## $ Worker_Den <int> 173, 428, 428, 428, 428, 428, 428, 428, 1, 1,...
## $ Agg_Veh_Den <int> 299, 889, 889, 889, 889, 889, 889, 889, 2, 2,...
## $ Empl_Den <int> 179, 478, 478, 478, 478, 478, 478, 478, 1, 1,...
## $ HU_Den <int> 141, 667, 667, 667, 667, 667, 667, 667, 2, 2,...
## $ OHU_Den <int> 141, 488, 488, 488, 488, 488, 488, 488, 1, 1,...
## $ Pop_Den <int> 465, 1090, 1090, 1090, 1090, 1090, 1090, 1090...
## $ C_Pop_Den <int> 455, 1647, 1647, 1647, 1647, 1647, 1647, 1647...
## $ C_HU_Den <int> 163, 864, 864, 864, 864, 864, 864, 864, 2, 2,...
## $ WAC_Den <int> 37, 2080, 2080, 2080, 2080, 2080, 2080, 2080,...
## $ RAC_Den <int> 241, 650, 650, 650, 650, 650, 650, 650, 1, 1,...
## $ WAC_RAC_Den <int> 278, 2729, 2729, 2729, 2729, 2729, 2729, 2729...
## $ Pop_Empl_Den <int> 644, 1569, 1569, 1569, 1569, 1569, 1569, 1569...
## $ Dist_IH <dbl> 5.50023402, 22.78645620, 22.67396564, 22.7340...
## $ Dist_US <dbl> 3.54804381, 0.14351486, 0.08519120, 0.1304387...
## $ V86 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## FIGURE 2
dat1= filter(it01, State=="MN")
dim(dat1)
## [1] 11498 86
dat2= dat1[,c("Default_AADT", "FC_RU")]
dat3=na.omit(dat2)
dim(dat3)
## [1] 11482 2
# for reproducibility
set.seed(123)
ggstatsplot::ggscatterstats(
data = filter(dat1, FC_RU=="7U"),
x = WAC, # predictor/independent variable
y = Default_AADT, # dependent variable
xlab = "WAC", # label for the x-axis
ylab = "AADT", # label for the y-axis
label.expression = "Default_AADT < 300 & WAC > 100", # expression for deciding which points to label
point.alpha = 0.7,
point.size = 4,
point.color = "grey50",
marginal = TRUE, # show marginal distribution
marginal.type = "density", # type of plot for marginal distribution
centrality.parameter = "mean", # centrality parameter to be plotted
margins = "both", # marginal distribution on both axes
xfill = "#CC79A7", # fill for marginals on the x-axis
yfill = "#009E73", # fill for marginals on the y-axis
type = "pearson", # type of linear association
title = "WAC and AADT",
messages = FALSE
)
