#Load necessary packages here

#install.packages('janitor')

#Load necessary libraries here

library(tidyverse)
library(readxl)
library(janitor)
library(readr)
library(sjPlot)

#Load data from local repository

# Step 1: Load raw data
tnbs_raw <- read_excel("Label_TNBS_July_all_modified.xlsx") %>% select(-`...112`)
colnames(tnbs_raw)
##   [1] "StartDate"             "EndDate"               "IPAddress"            
##   [4] "Duration (in seconds)" "RecordedDate"          "ResponseId"           
##   [7] "LocationLatitude"      "LocationLongitude"     "Q3_1"                 
##  [10] "Q3_2"                  "Q3_3"                  "Q3_4"                 
##  [13] "Q3_5"                  "Q3_6"                  "Q3_7"                 
##  [16] "Q4_1"                  "Q4_2"                  "Q4_3"                 
##  [19] "Q4_4"                  "Q4_5"                  "Q4_6"                 
##  [22] "Q4_7"                  "Q5_1"                  "Q5_2"                 
##  [25] "Q5_3"                  "Q5_4"                  "Q5_5"                 
##  [28] "Q5_6"                  "Q5_7"                  "Q6"                   
##  [31] "Q7"                    "Q7_7_TEXT"             "Q9"                   
##  [34] "Q9_7_TEXT"             "Q10"                   "Q11"                  
##  [37] "Q12"                   "Q13"                   "Q14"                  
##  [40] "Q14_10_TEXT"           "Q15_1"                 "Q16_1"                
##  [43] "Q18_1"                 "Q18_2"                 "Q18_3"                
##  [46] "Q18_4"                 "Q19_1"                 "Q19_2"                
##  [49] "Q19_3"                 "Q19_4"                 "Q19_5"                
##  [52] "Q19_6"                 "Q19_7"                 "Q19_8"                
##  [55] "Q20_1"                 "Q21"                   "Q23"                  
##  [58] "Q24"                   "Q25"                   "Q26_1"                
##  [61] "Q27"                   "Q28"                   "Q28_7_TEXT"           
##  [64] "Q29"                   "Q29_8_TEXT"            "Q30"                  
##  [67] "Q31"                   "Q32"                   "Q33"                  
##  [70] "Q34"                   "Q35"                   "Q36"                  
##  [73] "Q37"                   "Q38"                   "Q38_4_TEXT"           
##  [76] "Q39"                   "Q41"                   "Q42_1"                
##  [79] "Q42_3"                 "Q42_4"                 "Q43_1"                
##  [82] "Q44"                   "Q45"                   "Q46"                  
##  [85] "Q47"                   "Q47_12_TEXT"           "Q48"                  
##  [88] "Q49"                   "Q50"                   "Q51"                  
##  [91] "Q52_1"                 "Q52_2"                 "Q52_3"                
##  [94] "Q52_4"                 "Q52_12"                "Q52_5"                
##  [97] "Q52_11"                "Q52_6"                 "Q52_10"               
## [100] "Q52_9"                 "Q52_7"                 "Q52_8"                
## [103] "Q52_8_TEXT"            "Q52_13"                "Q53"                  
## [106] "Q54_1"                 "_os_api_gender"        "_os_api_age"          
## [109] "_os_api_ethnicity"     "_os_api_region"        "_os_api_hispanic"
na_count <- sapply(tnbs_raw, function(x) sum(is.na(x)))
na_df <- data.frame(Variable = names(na_count), NA_Count = na_count)

# View the top rows of the result
head(na_df)
##                                    Variable NA_Count
## StartDate                         StartDate        0
## EndDate                             EndDate        0
## IPAddress                         IPAddress        0
## Duration (in seconds) Duration (in seconds)        0
## RecordedDate                   RecordedDate        0
## ResponseId                       ResponseId        0
sapply(tnbs_raw, class)  # returns class of each column
## $StartDate
## [1] "POSIXct" "POSIXt" 
## 
## $EndDate
## [1] "POSIXct" "POSIXt" 
## 
## $IPAddress
## [1] "character"
## 
## $`Duration (in seconds)`
## [1] "numeric"
## 
## $RecordedDate
## [1] "POSIXct" "POSIXt" 
## 
## $ResponseId
## [1] "character"
## 
## $LocationLatitude
## [1] "numeric"
## 
## $LocationLongitude
## [1] "numeric"
## 
## $Q3_1
## [1] "character"
## 
## $Q3_2
## [1] "character"
## 
## $Q3_3
## [1] "character"
## 
## $Q3_4
## [1] "character"
## 
## $Q3_5
## [1] "character"
## 
## $Q3_6
## [1] "character"
## 
## $Q3_7
## [1] "character"
## 
## $Q4_1
## [1] "character"
## 
## $Q4_2
## [1] "character"
## 
## $Q4_3
## [1] "character"
## 
## $Q4_4
## [1] "character"
## 
## $Q4_5
## [1] "character"
## 
## $Q4_6
## [1] "character"
## 
## $Q4_7
## [1] "character"
## 
## $Q5_1
## [1] "character"
## 
## $Q5_2
## [1] "character"
## 
## $Q5_3
## [1] "character"
## 
## $Q5_4
## [1] "character"
## 
## $Q5_5
## [1] "character"
## 
## $Q5_6
## [1] "character"
## 
## $Q5_7
## [1] "character"
## 
## $Q6
## [1] "character"
## 
## $Q7
## [1] "character"
## 
## $Q7_7_TEXT
## [1] "character"
## 
## $Q9
## [1] "character"
## 
## $Q9_7_TEXT
## [1] "character"
## 
## $Q10
## [1] "character"
## 
## $Q11
## [1] "character"
## 
## $Q12
## [1] "character"
## 
## $Q13
## [1] "character"
## 
## $Q14
## [1] "character"
## 
## $Q14_10_TEXT
## [1] "character"
## 
## $Q15_1
## [1] "numeric"
## 
## $Q16_1
## [1] "numeric"
## 
## $Q18_1
## [1] "character"
## 
## $Q18_2
## [1] "character"
## 
## $Q18_3
## [1] "character"
## 
## $Q18_4
## [1] "character"
## 
## $Q19_1
## [1] "character"
## 
## $Q19_2
## [1] "character"
## 
## $Q19_3
## [1] "character"
## 
## $Q19_4
## [1] "character"
## 
## $Q19_5
## [1] "character"
## 
## $Q19_6
## [1] "character"
## 
## $Q19_7
## [1] "character"
## 
## $Q19_8
## [1] "character"
## 
## $Q20_1
## [1] "numeric"
## 
## $Q21
## [1] "character"
## 
## $Q23
## [1] "character"
## 
## $Q24
## [1] "character"
## 
## $Q25
## [1] "character"
## 
## $Q26_1
## [1] "character"
## 
## $Q27
## [1] "character"
## 
## $Q28
## [1] "character"
## 
## $Q28_7_TEXT
## [1] "logical"
## 
## $Q29
## [1] "character"
## 
## $Q29_8_TEXT
## [1] "character"
## 
## $Q30
## [1] "character"
## 
## $Q31
## [1] "character"
## 
## $Q32
## [1] "character"
## 
## $Q33
## [1] "character"
## 
## $Q34
## [1] "character"
## 
## $Q35
## [1] "character"
## 
## $Q36
## [1] "character"
## 
## $Q37
## [1] "character"
## 
## $Q38
## [1] "character"
## 
## $Q38_4_TEXT
## [1] "character"
## 
## $Q39
## [1] "character"
## 
## $Q41
## [1] "character"
## 
## $Q42_1
## [1] "character"
## 
## $Q42_3
## [1] "character"
## 
## $Q42_4
## [1] "character"
## 
## $Q43_1
## [1] "numeric"
## 
## $Q44
## [1] "character"
## 
## $Q45
## [1] "character"
## 
## $Q46
## [1] "character"
## 
## $Q47
## [1] "character"
## 
## $Q47_12_TEXT
## [1] "character"
## 
## $Q48
## [1] "character"
## 
## $Q49
## [1] "character"
## 
## $Q50
## [1] "character"
## 
## $Q51
## [1] "character"
## 
## $Q52_1
## [1] "numeric"
## 
## $Q52_2
## [1] "numeric"
## 
## $Q52_3
## [1] "numeric"
## 
## $Q52_4
## [1] "numeric"
## 
## $Q52_12
## [1] "numeric"
## 
## $Q52_5
## [1] "numeric"
## 
## $Q52_11
## [1] "numeric"
## 
## $Q52_6
## [1] "numeric"
## 
## $Q52_10
## [1] "numeric"
## 
## $Q52_9
## [1] "numeric"
## 
## $Q52_7
## [1] "numeric"
## 
## $Q52_8
## [1] "numeric"
## 
## $Q52_8_TEXT
## [1] "character"
## 
## $Q52_13
## [1] "numeric"
## 
## $Q53
## [1] "character"
## 
## $Q54_1
## [1] "character"
## 
## $`_os_api_gender`
## [1] "character"
## 
## $`_os_api_age`
## [1] "numeric"
## 
## $`_os_api_ethnicity`
## [1] "character"
## 
## $`_os_api_region`
## [1] "character"
## 
## $`_os_api_hispanic`
## [1] "character"
sjmisc::frq(x = tnbs_raw[c(
  "Q28", "Q29", "Q30", "Q32", "Q35", "Q36", "Q38", "Q23", "Q24", "Q50", "Q51",
  "Q25", "_os_api_gender", "_os_api_age", "_os_api_ethnicity", "_os_api_region"
)], out = "viewer")
Q28 <character>
val label frq raw.prc valid.prc cum.prc
Female 312 50.65 50.90 50.90
Male 293 47.56 47.80 98.69
Prefer not to say 7 1.14 1.14 99.84
Prefer to self-describe 1 0.16 0.16 100.00
NA NA 3 0.49 NA NA
total N=616 · valid N=613 · x̄=1.51 · σ=0.53

 

Q29 <character>
val label frq raw.prc valid.prc cum.prc
American Indian or Alaska Native 10 1.62 1.62 1.62
American Indian or Alaska Native,Black or African American 1 0.16 0.16 1.79
American Indian or Alaska Native,Black or African American,White 1 0.16 0.16 1.95
American Indian or Alaska Native,Hispanic or Latino 1 0.16 0.16 2.11
American Indian or Alaska Native,White 3 0.49 0.49 2.60
Another category, please provide: 1 0.16 0.16 2.76
Asian 47 7.63 7.63 10.39
Asian,American Indian or Alaska Native,Black or African American,White 1 0.16 0.16 10.55
Asian,Black or African American 2 0.32 0.32 10.88
Asian,Black or African American,Hispanic or Latino 1 0.16 0.16 11.04
Asian,Hispanic or Latino 4 0.65 0.65 11.69
Asian,Hispanic or Latino,White 1 0.16 0.16 11.85
Asian,White 3 0.49 0.49 12.34
Black or African American 169 27.44 27.44 39.77
Black or African American,Hispanic or Latino 7 1.14 1.14 40.91
Black or African American,Hispanic or Latino,White 1 0.16 0.16 41.07
Black or African American,Middle Eastern or North Arfican 2 0.32 0.32 41.40
Black or African American,Native Hawaiian or other Pacific Islander 1 0.16 0.16 41.56
Black or African American,White 6 0.97 0.97 42.53
Hispanic or Latino 37 6.01 6.01 48.54
Hispanic or Latino,Middle Eastern or North Arfican,Native Hawaiian or other Pacific Islander 1 0.16 0.16 48.70
Hispanic or Latino,Native Hawaiian or other Pacific Islander 1 0.16 0.16 48.86
Hispanic or Latino,White 13 2.11 2.11 50.97
Middle Eastern or North Arfican 4 0.65 0.65 51.62
Native Hawaiian or other Pacific Islander 3 0.49 0.49 52.11
White 295 47.89 47.89 100.00
NA NA 0 0.00 NA NA
total N=616 · valid N=616 · x̄=19.60 · σ=7.15

 

Q30 <character>
val label frq raw.prc valid.prc cum.prc
Bachelor’s degree or higher 143 23.21 23.33 23.33
Graduate or professional degree 67 10.88 10.93 34.26
High school graduate (includes equivalency) 181 29.38 29.53 63.78
Less than high school graduate 39 6.33 6.36 70.15
Some college or associate’s degree 183 29.71 29.85 100.00
NA NA 3 0.49 NA NA
total N=616 · valid N=613 · x̄=3.08 · σ=1.52

 

Q32 <character>
val label frq raw.prc valid.prc cum.prc
No 79 12.82 12.85 12.85
Prefer not to answer 4 0.65 0.65 13.50
Yes 532 86.36 86.50 100.00
NA NA 1 0.16 NA NA
total N=616 · valid N=615 · x̄=2.74 · σ=0.67

 

Q35 <character>
val label frq raw.prc valid.prc cum.prc
I don’t know 15 2.44 2.64 2.64
I prefer not to answer 11 1.79 1.94 4.58
Own 273 44.32 48.06 52.64
Rent 269 43.67 47.36 100.00
NA NA 48 7.79 NA NA
total N=616 · valid N=568 · x̄=3.40 · σ=0.66

 

Q36 <character>
val label frq raw.prc valid.prc cum.prc
Excellent 130 21.10 21.17 21.17
Fair 93 15.10 15.15 36.32
Good 200 32.47 32.57 68.89
Poor 16 2.60 2.61 71.50
Very good 175 28.41 28.50 100.00
NA NA 2 0.32 NA NA
total N=616 · valid N=614 · x̄=3.02 · σ=1.47

 

Q38 <character>
val label frq raw.prc valid.prc cum.prc
English 543 88.15 89.90 89.90
English,Others (specify) 9 1.46 1.49 91.39
English,Spanish 30 4.87 4.97 96.36
English,Spanish,Others (specify) 1 0.16 0.17 96.52
I prefer not to answer 2 0.32 0.33 96.85
Others (specify) 2 0.32 0.33 97.19
Spanish 15 2.44 2.48 99.67
Spanish,I prefer not to answer 2 0.32 0.33 100.00
NA NA 12 1.95 NA NA
total N=616 · valid N=604 · x̄=1.32 · σ=1.15

 

Q23 <character>
val label frq raw.prc valid.prc cum.prc
Rural 98 15.91 15.96 15.96
Suburban 255 41.40 41.53 57.49
Urban 261 42.37 42.51 100.00
NA NA 2 0.32 NA NA
total N=616 · valid N=614 · x̄=2.27 · σ=0.72

 

Q24 <character>
val label frq raw.prc valid.prc cum.prc
No 125 20.29 20.29 20.29
Yes 491 79.71 79.71 100.00
NA NA 0 0.00 NA NA
total N=616 · valid N=616 · x̄=1.80 · σ=0.40

 

Q50 <character>
val label frq raw.prc valid.prc cum.prc
Neutral 118 19.16 22.26 22.26
Somewhat affordable 181 29.38 34.15 56.42
Somewhat unaffordable 52 8.44 9.81 66.23
Very affordable 158 25.65 29.81 96.04
Very unaffordable 21 3.41 3.96 100.00
NA NA 86 13.96 NA NA
total N=616 · valid N=530 · x̄=2.59 · σ=1.23

 

Q51 <character>
val label frq raw.prc valid.prc cum.prc
Neither satisfied nor unsatsified 97 15.75 16.67 16.67
Somewhat satisfied 178 28.90 30.58 47.25
Somewhat unsatisfied 77 12.50 13.23 60.48
Very satisfied 166 26.95 28.52 89.00
Very unsatisfied 64 10.39 11.00 100.00
NA NA 34 5.52 NA NA
total N=616 · valid N=582 · x̄=2.87 · σ=1.30

 

Q25 <character>
val label frq raw.prc valid.prc cum.prc
Employed full time 261 42.37 42.79 42.79
Employed full time,Employed part time 1 0.16 0.16 42.95
Employed full time,Not working (retired, disabled, stay-at-home parent, etc) 1 0.16 0.16 43.11
Employed full time,Student 4 0.65 0.66 43.77
Employed part time 84 13.64 13.77 57.54
Employed part time,Not working (retired, disabled, stay-at-home parent, etc) 2 0.32 0.33 57.87
Employed part time,Student 10 1.62 1.64 59.51
Not working (retired, disabled, stay-at-home parent, etc) 218 35.39 35.74 95.25
Student 27 4.38 4.43 99.67
Student,Not working (retired, disabled, stay-at-home parent, etc) 2 0.32 0.33 100.00
NA NA 6 0.97 NA NA
total N=616 · valid N=610 · x̄=4.58 · σ=3.28

 

_os_api_gender <character>
val label frq raw.prc valid.prc cum.prc
Female 310 50.32 50.32 50.32
Male 306 49.68 49.68 100.00
NA NA 0 0.00 NA NA
total N=616 · valid N=616 · x̄=1.50 · σ=0.50

 

_os_api_age <numeric>
val label frq raw.prc valid.prc cum.prc
18 8 1.30 1.30 1.30
19 10 1.62 1.62 2.92
20 10 1.62 1.62 4.55
21 12 1.95 1.95 6.49
22 17 2.76 2.76 9.25
23 25 4.06 4.06 13.31
24 15 2.44 2.44 15.75
25 19 3.08 3.08 18.83
26 12 1.95 1.95 20.78
27 16 2.60 2.60 23.38
28 12 1.95 1.95 25.32
29 14 2.27 2.27 27.60
30 13 2.11 2.11 29.71
31 3 0.49 0.49 30.19
32 12 1.95 1.95 32.14
33 13 2.11 2.11 34.25
34 18 2.92 2.92 37.18
35 11 1.79 1.79 38.96
36 12 1.95 1.95 40.91
37 9 1.46 1.46 42.37
38 5 0.81 0.81 43.18
39 16 2.60 2.60 45.78
40 13 2.11 2.11 47.89
41 8 1.30 1.30 49.19
42 9 1.46 1.46 50.65
43 5 0.81 0.81 51.46
44 10 1.62 1.62 53.08
45 12 1.95 1.95 55.03
46 10 1.62 1.62 56.66
47 7 1.14 1.14 57.79
48 7 1.14 1.14 58.93
49 10 1.62 1.62 60.55
50 15 2.44 2.44 62.99
51 5 0.81 0.81 63.80
52 11 1.79 1.79 65.58
53 13 2.11 2.11 67.69
54 18 2.92 2.92 70.62
55 10 1.62 1.62 72.24
56 10 1.62 1.62 73.86
57 8 1.30 1.30 75.16
58 9 1.46 1.46 76.62
59 6 0.97 0.97 77.60
60 13 2.11 2.11 79.71
61 5 0.81 0.81 80.52
62 10 1.62 1.62 82.14
63 8 1.30 1.30 83.44
64 8 1.30 1.30 84.74
65 8 1.30 1.30 86.04
66 12 1.95 1.95 87.99
67 10 1.62 1.62 89.61
68 6 0.97 0.97 90.58
69 3 0.49 0.49 91.07
70 5 0.81 0.81 91.88
71 4 0.65 0.65 92.53
72 3 0.49 0.49 93.02
73 4 0.65 0.65 93.67
74 9 1.46 1.46 95.13
75 5 0.81 0.81 95.94
76 6 0.97 0.97 96.92
77 4 0.65 0.65 97.56
78 2 0.32 0.32 97.89
79 1 0.16 0.16 98.05
80 4 0.65 0.65 98.70
81 1 0.16 0.16 98.86
82 1 0.16 0.16 99.03
83 2 0.32 0.32 99.35
84 1 0.16 0.16 99.51
90 2 0.32 0.32 99.84
91 1 0.16 0.16 100.00
NA NA 0 0.00 NA NA
total N=616 · valid N=616 · x̄=44.07 · σ=17.38

 

_os_api_ethnicity <character>
val label frq raw.prc valid.prc cum.prc
American Indian 6 0.97 0.97 0.97
Asian 67 10.88 10.88 11.85
Black 194 31.49 31.49 43.34
Other 25 4.06 4.06 47.40
Pacific Islander 1 0.16 0.16 47.56
White 323 52.44 52.44 100.00
NA NA 0 0.00 NA NA
total N=616 · valid N=616 · x̄=4.49 · σ=1.64

 

_os_api_region <character>
val label frq raw.prc valid.prc cum.prc
Midwest 136 22.08 22.08 22.08
Northeast 109 17.69 17.69 39.77
South 231 37.50 37.50 77.27
West 140 22.73 22.73 100.00
NA NA 0 0.00 NA NA
total N=616 · valid N=616 · x̄=2.61 · σ=1.07
tnbs_raw$employment<- dplyr::case_when(
  grepl("Employed full time", tnbs_raw$Q25) ~ "Employed full time",
  grepl("Employed part time", tnbs_raw$Q25) ~ "Employed part time",
  grepl("Student", tnbs_raw$Q25) ~ "Student",
  grepl("Not working", tnbs_raw$Q25) ~ "Not working",
  is.na(tnbs_raw$Q25) ~ NA_character_,
  TRUE ~ "Other"
)

sjmisc::frq(tnbs_raw$employment)
## x <character> 
## # total N=616 valid N=610 mean=2.01 sd=0.99
## 
## Value              |   N | Raw % | Valid % | Cum. %
## ---------------------------------------------------
## Employed full time | 267 | 43.34 |   43.77 |  43.77
## Employed part time |  96 | 15.58 |   15.74 |  59.51
## Not working        | 218 | 35.39 |   35.74 |  95.25
## Student            |  29 |  4.71 |    4.75 | 100.00
## <NA>               |   6 |  0.97 |    <NA> |   <NA>
tnbs_raw$age<- dplyr::case_when(
  tnbs_raw$`_os_api_age` >= 18 & tnbs_raw$`_os_api_age` <= 24 ~ "18-24",
  tnbs_raw$`_os_api_age` >= 25 & tnbs_raw$`_os_api_age` <= 34 ~ "25-34",
  tnbs_raw$`_os_api_age` >= 35 & tnbs_raw$`_os_api_age` <= 44 ~ "35-44",
  tnbs_raw$`_os_api_age` >= 45 & tnbs_raw$`_os_api_age` <= 54 ~ "45-54",
  tnbs_raw$`_os_api_age` >= 55 & tnbs_raw$`_os_api_age` <= 64 ~ "55-64",
  tnbs_raw$`_os_api_age` >= 65 ~ "65+",
  TRUE ~ "Not provided"
)
sjmisc::frq(tnbs_raw$age)
## x <character> 
## # total N=616 valid N=616 mean=3.39 sd=1.68
## 
## Value |   N | Raw % | Valid % | Cum. %
## --------------------------------------
## 18-24 |  97 | 15.75 |   15.75 |  15.75
## 25-34 | 132 | 21.43 |   21.43 |  37.18
## 35-44 |  98 | 15.91 |   15.91 |  53.08
## 45-54 | 108 | 17.53 |   17.53 |  70.62
## 55-64 |  87 | 14.12 |   14.12 |  84.74
## 65+   |  94 | 15.26 |   15.26 | 100.00
## <NA>  |   0 |  0.00 |    <NA> |   <NA>
tnbs_raw$language <- dplyr::case_when(
  grepl("I prefer not to answer", tnbs_raw$Q38, ignore.case = TRUE) ~ "Prefer not to answer",
  grepl("Spanish", tnbs_raw$Q38, ignore.case = TRUE) ~ "Spanish",
  grepl("English", tnbs_raw$Q38, ignore.case = TRUE) ~ "English",
  grepl("Others", tnbs_raw$Q38, ignore.case = TRUE) ~ "Other languages",
  is.na(tnbs_raw$Q38) ~ "NA"
)

sjmisc::frq(tnbs_raw$language)
## x <character> 
## # total N=616 valid N=616 mean=1.34 sd=1.08
## 
## Value                |   N | Raw % | Valid % | Cum. %
## -----------------------------------------------------
## English              | 552 | 89.61 |   89.61 |  89.61
## NA                   |  12 |  1.95 |    1.95 |  91.56
## Other languages      |   2 |  0.32 |    0.32 |  91.88
## Prefer not to answer |   4 |  0.65 |    0.65 |  92.53
## Spanish              |  46 |  7.47 |    7.47 | 100.00
## <NA>                 |   0 |  0.00 |    <NA> |   <NA>
sjmisc::frq(tnbs_raw$Q12)
## x <character> 
## # total N=616 valid N=570 mean=2.10 sd=0.66
## 
## Value                                                                                                                         
## ------------------------------------------------------------------------------------------------------------------------------
## Car-less Traveler: I rarely or never use a car and mostly rely on walking, biking, or public transportation.                  
## Daily Driver: I use a car for nearly every trip.                                                                              
## Multimodal Traveler: I use a mix of transportation modes—car for some trips, and walk, bike, rideshare, or transit for others.
## <NA>                                                                                                                          
## 
##   N | Raw % | Valid % | Cum. %
## ------------------------------
##  97 | 15.75 |   17.02 |  17.02
## 318 | 51.62 |   55.79 |  72.81
## 155 | 25.16 |   27.19 | 100.00
##  46 |  7.47 |    <NA> |   <NA>
tnbs_raw$traveler_type <- dplyr::case_when(
  grepl("Car-less Traveler", tnbs_raw$Q12, ignore.case = TRUE) ~ "Car-less",
  grepl("Daily Driver", tnbs_raw$Q12, ignore.case = TRUE) ~ "Daily driver",
  grepl("Multimodal Traveler", tnbs_raw$Q12, ignore.case = TRUE) ~ "Multimodal travelr",
  is.na(tnbs_raw$Q12) ~ NA_character_
)

sjmisc::frq(tnbs_raw$traveler_type)
## x <character> 
## # total N=616 valid N=570 mean=2.10 sd=0.66
## 
## Value              |   N | Raw % | Valid % | Cum. %
## ---------------------------------------------------
## Car-less           |  97 | 15.75 |   17.02 |  17.02
## Daily driver       | 318 | 51.62 |   55.79 |  72.81
## Multimodal travelr | 155 | 25.16 |   27.19 | 100.00
## <NA>               |  46 |  7.47 |    <NA> |   <NA>
sjmisc::frq(tnbs_raw$Q24)
## x <character> 
## # total N=616 valid N=616 mean=1.80 sd=0.40
## 
## Value |   N | Raw % | Valid % | Cum. %
## --------------------------------------
## No    | 125 | 20.29 |   20.29 |  20.29
## Yes   | 491 | 79.71 |   79.71 | 100.00
## <NA>  |   0 |  0.00 |    <NA> |   <NA>
sjmisc::frq(tnbs_raw$Q32)
## x <character> 
## # total N=616 valid N=615 mean=2.74 sd=0.67
## 
## Value                |   N | Raw % | Valid % | Cum. %
## -----------------------------------------------------
## No                   |  79 | 12.82 |   12.85 |  12.85
## Prefer not to answer |   4 |  0.65 |    0.65 |  13.50
## Yes                  | 532 | 86.36 |   86.50 | 100.00
## <NA>                 |   1 |  0.16 |    <NA> |   <NA>
sjmisc::frq(tnbs_raw$Q41)
## x <character> 
## # total N=616 valid N=581 mean=11.24 sd=5.25
## 
## Value                               |   N | Raw % | Valid % | Cum. %
## --------------------------------------------------------------------
## ,3                                  |   1 |  0.16 |    0.17 |   0.17
## ..                                  |   1 |  0.16 |    0.17 |   0.34
## .2                                  |   1 |  0.16 |    0.17 |   0.52
## 0                                   |   8 |  1.30 |    1.38 |   1.89
## 1                                   | 129 | 20.94 |   22.20 |  24.10
## 1@                                  |   1 |  0.16 |    0.17 |  24.27
## 100000                              |   1 |  0.16 |    0.17 |  24.44
## 12                                  |   1 |  0.16 |    0.17 |  24.61
## 2                                   | 163 | 26.46 |   28.06 |  52.67
## 200                                 |   1 |  0.16 |    0.17 |  52.84
## 2000                                |   1 |  0.16 |    0.17 |  53.01
## 2100                                |   1 |  0.16 |    0.17 |  53.18
## 3                                   | 104 | 16.88 |   17.90 |  71.08
## 3 of my children and that it is all |   1 |  0.16 |    0.17 |  71.26
## 3500                                |   1 |  0.16 |    0.17 |  71.43
## 4                                   |  87 | 14.12 |   14.97 |  86.40
## 5                                   |  38 |  6.17 |    6.54 |  92.94
## 6                                   |  18 |  2.92 |    3.10 |  96.04
## 7                                   |   2 |  0.32 |    0.34 |  96.39
## 8                                   |   4 |  0.65 |    0.69 |  97.07
## 9                                   |   1 |  0.16 |    0.17 |  97.25
## E                                   |   1 |  0.16 |    0.17 |  97.42
## Housing                             |   1 |  0.16 |    0.17 |  97.59
## Husband                             |   1 |  0.16 |    0.17 |  97.76
## I don’t know                        |   1 |  0.16 |    0.17 |  97.93
## None                                |   4 |  0.65 |    0.69 |  98.62
## Not for sure                        |   1 |  0.16 |    0.17 |  98.80
## Roomate                             |   1 |  0.16 |    0.17 |  98.97
## T                                   |   1 |  0.16 |    0.17 |  99.14
## Thank you                           |   1 |  0.16 |    0.17 |  99.31
## Timing                              |   1 |  0.16 |    0.17 |  99.48
## Unknown                             |   1 |  0.16 |    0.17 |  99.66
## w                                   |   1 |  0.16 |    0.17 |  99.83
## Wert                                |   1 |  0.16 |    0.17 | 100.00
## <NA>                                |  35 |  5.68 |    <NA> |   <NA>
# Convert Q41 to numeric, non-numeric entries will become NA automatically
tnbs_raw$household_size <- suppressWarnings(as.numeric(tnbs_raw$Q41))
unique(tnbs_raw$Q41[is.na(tnbs_raw$household_size)])
##  [1] NA                                    ",3"                                 
##  [3] ".."                                  "Unknown"                            
##  [5] "Not for sure"                        "w"                                  
##  [7] "None"                                "T"                                  
##  [9] "Roomate"                             "Housing"                            
## [11] "Thank you"                           "I don’t know"                       
## [13] "Wert"                                "1@"                                 
## [15] "Husband"                             "3 of my children and that it is all"
## [17] "Timing"                              "E"
sjmisc::frq(tnbs_raw$household_size)
## x <numeric> 
## # total N=616 valid N=561 mean=194.80 sd=4225.65
## 
##    Value |   N | Raw % | Valid % | Cum. %
## -----------------------------------------
##     0.00 |   8 |  1.30 |    1.43 |   1.43
##     0.20 |   1 |  0.16 |    0.18 |   1.60
##     1.00 | 129 | 20.94 |   22.99 |  24.60
##     2.00 | 163 | 26.46 |   29.06 |  53.65
##     3.00 | 104 | 16.88 |   18.54 |  72.19
##     4.00 |  87 | 14.12 |   15.51 |  87.70
##     5.00 |  38 |  6.17 |    6.77 |  94.47
##     6.00 |  18 |  2.92 |    3.21 |  97.68
##     7.00 |   2 |  0.32 |    0.36 |  98.04
##     8.00 |   4 |  0.65 |    0.71 |  98.75
##     9.00 |   1 |  0.16 |    0.18 |  98.93
##    12.00 |   1 |  0.16 |    0.18 |  99.11
##   200.00 |   1 |  0.16 |    0.18 |  99.29
##  2000.00 |   1 |  0.16 |    0.18 |  99.47
##  2100.00 |   1 |  0.16 |    0.18 |  99.64
##  3500.00 |   1 |  0.16 |    0.18 |  99.82
## 1.00e+05 |   1 |  0.16 |    0.18 | 100.00
##     <NA> |  55 |  8.93 |    <NA> |   <NA>
# Recode: Treat 0 and 0.2 as 1
tnbs_raw$household_size[tnbs_raw$household_size %in% c(0, 0.2)] <- 1

# Recode: Set entries > 12 as NA
tnbs_raw$household_size[tnbs_raw$household_size > 12] <- NA
sjmisc::frq(tnbs_raw$household_size)
## x <numeric> 
## # total N=616 valid N=556 mean=2.68 sd=1.53
## 
## Value |   N | Raw % | Valid % | Cum. %
## --------------------------------------
##     1 | 138 | 22.40 |   24.82 |  24.82
##     2 | 163 | 26.46 |   29.32 |  54.14
##     3 | 104 | 16.88 |   18.71 |  72.84
##     4 |  87 | 14.12 |   15.65 |  88.49
##     5 |  38 |  6.17 |    6.83 |  95.32
##     6 |  18 |  2.92 |    3.24 |  98.56
##     7 |   2 |  0.32 |    0.36 |  98.92
##     8 |   4 |  0.65 |    0.72 |  99.64
##     9 |   1 |  0.16 |    0.18 |  99.82
##    12 |   1 |  0.16 |    0.18 | 100.00
##  <NA> |  60 |  9.74 |    <NA> |   <NA>
# Create household size categories
tnbs_raw$hh_size_cat <- dplyr::case_when(
  tnbs_raw$household_size == 1 ~ "1",
  tnbs_raw$household_size == 2 ~ "2",
  tnbs_raw$household_size == 3 ~ "3",
  tnbs_raw$household_size == 4 ~ "4",
  tnbs_raw$household_size == 5 ~ "5",
  tnbs_raw$household_size >= 6 ~ "6+",
  is.na(tnbs_raw$household_size) ~ NA_character_
)

# View frequency table
sjmisc::frq(tnbs_raw$hh_size_cat)
## x <character> 
## # total N=616 valid N=556 mean=2.64 sd=1.42
## 
## Value |   N | Raw % | Valid % | Cum. %
## --------------------------------------
## 1     | 138 | 22.40 |   24.82 |  24.82
## 2     | 163 | 26.46 |   29.32 |  54.14
## 3     | 104 | 16.88 |   18.71 |  72.84
## 4     |  87 | 14.12 |   15.65 |  88.49
## 5     |  38 |  6.17 |    6.83 |  95.32
## 6+    |  26 |  4.22 |    4.68 | 100.00
## <NA>  |  60 |  9.74 |    <NA> |   <NA>
sjmisc::frq(tnbs_raw$Q42_1)
## x <character> 
## # total N=616 valid N=556 mean=2.68 sd=0.81
## 
## Value        |   N | Raw % | Valid % | Cum. %
## ---------------------------------------------
## ...          |   1 |  0.16 |    0.18 |   0.18
## 0            | 240 | 38.96 |   43.17 |  43.35
## 1            | 273 | 44.32 |   49.10 |  92.45
## 2            |  36 |  5.84 |    6.47 |  98.92
## 4            |   1 |  0.16 |    0.18 |  99.10
## 48           |   1 |  0.16 |    0.18 |  99.28
## Not for sure |   1 |  0.16 |    0.18 |  99.46
## Q            |   1 |  0.16 |    0.18 |  99.64
## T            |   1 |  0.16 |    0.18 |  99.82
## Unknown      |   1 |  0.16 |    0.18 | 100.00
## <NA>         |  60 |  9.74 |    <NA> |   <NA>
sjmisc::frq(tnbs_raw$Q42_3)
## x <character> 
## # total N=616 valid N=539 mean=3.16 sd=1.63
## 
## Value |   N | Raw % | Valid % | Cum. %
## --------------------------------------
## ..    |   1 |  0.16 |    0.19 |   0.19
## 0     | 319 | 51.79 |   59.18 |  59.37
## 0p    |   1 |  0.16 |    0.19 |  59.55
## 1     | 108 | 17.53 |   20.04 |  79.59
## 2     |  69 | 11.20 |   12.80 |  92.39
## 3     |  26 |  4.22 |    4.82 |  97.22
## 4     |   8 |  1.30 |    1.48 |  98.70
## 5     |   2 |  0.32 |    0.37 |  99.07
## 6     |   1 |  0.16 |    0.19 |  99.26
## I     |   1 |  0.16 |    0.19 |  99.44
## No    |   1 |  0.16 |    0.19 |  99.63
## T     |   1 |  0.16 |    0.19 |  99.81
## Yes   |   1 |  0.16 |    0.19 | 100.00
## <NA>  |  77 | 12.50 |    <NA> |   <NA>
sjmisc::frq(tnbs_raw$Q42_4)
## x <character> 
## # total N=616 valid N=529 mean=3.66 sd=2.19
## 
## Value |   N | Raw % | Valid % | Cum. %
## --------------------------------------
## ...   |   1 |  0.16 |    0.19 |   0.19
## 0     | 238 | 38.64 |   44.99 |  45.18
## 1     | 138 | 22.40 |   26.09 |  71.27
## 18    |   1 |  0.16 |    0.19 |  71.46
## 19    |   1 |  0.16 |    0.19 |  71.64
## 2     |  68 | 11.04 |   12.85 |  84.50
## 3     |  58 |  9.42 |   10.96 |  95.46
## 36    |   1 |  0.16 |    0.19 |  95.65
## 4     |  17 |  2.76 |    3.21 |  98.87
## 5     |   4 |  0.65 |    0.76 |  99.62
## No    |   1 |  0.16 |    0.19 |  99.81
## T     |   1 |  0.16 |    0.19 | 100.00
## <NA>  |  87 | 14.12 |    <NA> |   <NA>
# Coerce to numeric and remove invalid entries
clean_numeric <- function(x) {
  suppressWarnings(as.numeric(trimws(x)))
}
tnbs_raw$spouse   <- clean_numeric(tnbs_raw$Q42_1)
tnbs_raw$children <- clean_numeric(tnbs_raw$Q42_3)
tnbs_raw$adults   <- clean_numeric(tnbs_raw$Q42_4)
#Replace Extreme Values and NAs
tnbs_raw <- tnbs_raw |>
  dplyr::mutate(
    spouse   = ifelse(spouse > 2 | spouse < 0, NA, spouse),
    children = ifelse(children > 8 | children < 0, NA, children),
    adults   = ifelse(adults > 12 | adults < 0, NA, adults)
  )
sjmisc::frq(tnbs_raw$children)
## x <numeric> 
## # total N=616 valid N=533 mean=0.70 sd=1.04
## 
## Value |   N | Raw % | Valid % | Cum. %
## --------------------------------------
##     0 | 319 | 51.79 |   59.85 |  59.85
##     1 | 108 | 17.53 |   20.26 |  80.11
##     2 |  69 | 11.20 |   12.95 |  93.06
##     3 |  26 |  4.22 |    4.88 |  97.94
##     4 |   8 |  1.30 |    1.50 |  99.44
##     5 |   2 |  0.32 |    0.38 |  99.81
##     6 |   1 |  0.16 |    0.19 | 100.00
##  <NA> |  83 | 13.47 |    <NA> |   <NA>
# Filter rows where children is NA
tnbs_raw %>%
  filter(is.na(children)) %>%
  count(hh_size_cat)
## # A tibble: 6 × 2
##   hh_size_cat     n
##   <chr>       <int>
## 1 1              13
## 2 2              28
## 3 3              14
## 4 4               5
## 5 5               2
## 6 <NA>           21
tnbs_raw <- tnbs_raw %>%
  mutate(child_cat = case_when(
    is.na(children)     ~ "NA",
    children == 0       ~ "0",
    children == 1       ~ "1",
    children >= 2       ~ "2+"
  ))

sjmisc::frq(tnbs_raw$child_cat)
## x <character> 
## # total N=616 valid N=616 mean=1.92 sd=1.11
## 
## Value |   N | Raw % | Valid % | Cum. %
## --------------------------------------
## 0     | 319 | 51.79 |   51.79 |  51.79
## 1     | 108 | 17.53 |   17.53 |  69.32
## 2+    | 106 | 17.21 |   17.21 |  86.53
## NA    |  83 | 13.47 |   13.47 | 100.00
## <NA>  |   0 |  0.00 |    <NA> |   <NA>
#Derive Household Type
tnbs_raw <- tnbs_raw |>
  dplyr::mutate(
    HH_type = dplyr::case_when(
      children > 0 & is.na(spouse) ~ "Single parent household",
      children > 0 & spouse > 0    ~ "HH with children",
      children == 0 ~ "HH without children",
      TRUE                         ~ "HH without children and spouse"
    )
  )

# View frequency table
sjmisc::frq(tnbs_raw$HH_type)
## x <character> 
## # total N=616 valid N=616 mean=1.99 sd=0.74
## 
## Value                          |   N | Raw % | Valid % | Cum. %
## ---------------------------------------------------------------
## HH with children               | 160 | 25.97 |   25.97 |  25.97
## HH without children            | 319 | 51.79 |   51.79 |  77.76
## HH without children and spouse | 123 | 19.97 |   19.97 |  97.73
## Single parent household        |  14 |  2.27 |    2.27 | 100.00
## <NA>                           |   0 |  0.00 |    <NA> |   <NA>
#sjmisc::frq(tnbs_raw$Q43_1)

# Clean and convert Q43_1 to numeric (invalid strings become NA)
tnbs_raw$income_raw <- suppressWarnings(as.numeric(tnbs_raw$Q43_1))

# Step 2: Create income category
tnbs_raw <- tnbs_raw %>%
  mutate(income_cat = case_when(
    is.na(income_raw)            ~ "NA",
    income_raw < 1000            ~ "<$1,000",
    income_raw >= 1000 & income_raw < 3000 ~ "$1,000–2,999",
    income_raw >= 3000 & income_raw < 5000 ~ "$3,000–4,999",
    income_raw >= 5000 & income_raw < 8000 ~ "$5,000–7,999",
    income_raw >= 8000 & income_raw < 12000 ~ "$8,000–11,999",
    income_raw >= 12000          ~ "$12,000+"
  ))

sjmisc::frq(tnbs_raw$income_cat)
## x <character> 
## # total N=616 valid N=616 mean=3.63 sd=1.56
## 
## Value         |   N | Raw % | Valid % | Cum. %
## ----------------------------------------------
## <$1,000       |  38 |  6.17 |    6.17 |   6.17
## $1,000–2,999  | 155 | 25.16 |   25.16 |  31.33
## $12,000+      | 110 | 17.86 |   17.86 |  49.19
## $3,000–4,999  | 106 | 17.21 |   17.21 |  66.40
## $5,000–7,999  | 106 | 17.21 |   17.21 |  83.60
## $8,000–11,999 | 100 | 16.23 |   16.23 |  99.84
## NA            |   1 |  0.16 |    0.16 | 100.00
## <NA>          |   0 |  0.00 |    <NA> |   <NA>
# Step 2: Convert monthly income to annual income
tnbs_raw$annual_income <- tnbs_raw$income_raw * 12

tnbs_raw <- tnbs_raw %>%
  mutate(annual_income_cat = case_when(
    is.na(annual_income)            ~ "$150,000+",
    annual_income < 15000           ~ "<$15,000",
    annual_income >= 15000 & annual_income < 30000 ~ "$15,000–29,999",
    annual_income >= 30000 & annual_income < 50000 ~ "$30,000–49,999",
    annual_income >= 50000 & annual_income < 75000 ~ "$50,000–74,999",
    annual_income >= 75000 & annual_income < 100000 ~ "$75,000–99,999",
    annual_income >= 100000 & annual_income < 150000 ~ "$100,000–149,999",
    annual_income >= 150000          ~ "$150,000+"
  ))
sjmisc::frq(tnbs_raw$annual_income_cat)
## x <character> 
## # total N=616 valid N=616 mean=4.01 sd=1.86
## 
## Value            |   N | Raw % | Valid % | Cum. %
## -------------------------------------------------
## <$15,000         |  65 | 10.55 |   10.55 |  10.55
## $100,000–149,999 |  94 | 15.26 |   15.26 |  25.81
## $15,000–29,999   |  95 | 15.42 |   15.42 |  41.23
## $150,000+        | 107 | 17.37 |   17.37 |  58.60
## $30,000–49,999   |  92 | 14.94 |   14.94 |  73.54
## $50,000–74,999   |  98 | 15.91 |   15.91 |  89.45
## $75,000–99,999   |  65 | 10.55 |   10.55 | 100.00
## <NA>             |   0 |  0.00 |    <NA> |   <NA>
sjmisc::frq(tnbs_raw$Q44)
## x <character> 
## # total N=616 valid N=615 mean=3.05 sd=1.54
## 
## Value              |   N | Raw % | Valid % | Cum. %
## ---------------------------------------------------
## 1 to 3 years       | 137 | 22.24 |   22.28 |  22.28
## 3-5 years          | 127 | 20.62 |   20.65 |  42.93
## 5-10 years         | 102 | 16.56 |   16.59 |  59.51
## Less than 1 year   |  69 | 11.20 |   11.22 |  70.73
## More than 10 years | 180 | 29.22 |   29.27 | 100.00
## <NA>               |   1 |  0.16 |    <NA> |   <NA>
sjmisc::frq(tnbs_raw$Q49)
## x <character> 
## # total N=616 valid N=613 mean=2.32 sd=0.93
## 
## Value     |   N | Raw % | Valid % | Cum. %
## ------------------------------------------
## 0         | 104 | 16.88 |   16.97 |  16.97
## 1         | 285 | 46.27 |   46.49 |  63.46
## 2         | 161 | 26.14 |   26.26 |  89.72
## 3         |  47 |  7.63 |    7.67 |  97.39
## 4 or more |  16 |  2.60 |    2.61 | 100.00
## <NA>      |   3 |  0.49 |    <NA> |   <NA>