# quantiles of MPG for the 2024 model year vehicles by for different powertrains and vehicle types. Data source: EPA or fuel_economic_gov
<-as.data.frame(read.csv(here("..", "..","survey_updated_pretest","data","mpg_by_segment_fuel.csv"))) %>%
dt_mpg# kwh per 100 miles --> kwh per mile
mutate(kwh_q10=kwh_q10/100,
kwh_q25=kwh_q25/100,
kwh_q50=kwh_q50/100,
kwh_q75=kwh_q75/100,
kwh_q90=kwh_q90/100
)
# electricity rate per kwh https://www.chooseenergy.com/electricity-rates-by-state/ March 2025
<- 0.11
electricity_rate_low <- 0.17
electricity_rate_avg <-0.33
electricity_rate_high
# gas rate per gallon https://www.chooseenergy.com/data-center/cost-of-driving-by-state May 2025
<- 2.66
gasoline_rate_low <- 3.18
gasoline_rate_avg <- 4.77
gasoline_rate_high
# PHEV utility factors https://docs.nrel.gov/docs/gen/fy07/41341.pdf
<- 0.668
phev_uf_car<- 0.487
phev_uf_suv
# MPGe- One gallon of gasoline is equivalent to 33.7 kilowatt-hours (kWh) of electricity https://www.bluegrassauto.com/hybrid-and-electric-vehicle-comparisons/
<- 33.7
gas_electricity
# # PHEV: Assume total range = 300 miles, electric range = 40 miles,
# phev_total_range<- 300
# phev_e_range<- 40
Power Analysis - Vehicle Choices
Step 0: Data set up (Operation costs)
Assumptions
All levels
<-dt_mpg %>%
dt_mpgmutate(
cents_mile_min=case_when(
=="bev" ~ electricity_rate_low*(kwh_q10)*100,
powertrain%in% c("cv","hev") ~ gasoline_rate_low/mpg_q90*100,
powertrain
=="phev" & vehicle_type=="car" ~ (phev_uf_car*(electricity_rate_low*(kwh_q10))+(1-phev_uf_car)*(gasoline_rate_low/mpg_q90))*100,
powertrain=="phev" & vehicle_type=="suv" ~ (phev_uf_suv*(electricity_rate_low*(kwh_q10))+(1-phev_uf_suv)*(gasoline_rate_low/mpg_q90))*100
powertrain
),cents_mile_max=case_when(
=="bev" ~ electricity_rate_high*(kwh_q90)*100,
powertrain%in% c("cv","hev") ~ gasoline_rate_high/mpg_q10*100,
powertrain
=="phev" & vehicle_type=="car" ~ (phev_uf_car*(electricity_rate_high*(kwh_q90))+(1-phev_uf_car)*(gasoline_rate_high/mpg_q10))*100,
powertrain=="phev" & vehicle_type=="suv" ~ (phev_uf_suv*(electricity_rate_high*(kwh_q90))+(1-phev_uf_suv)*(gasoline_rate_high/mpg_q10))*100
powertrain
)
%>%
) mutate(MPGe_min=case_when(
%in% c("bev","cv","hev") ~ mpg_q10,
powertrain
=="phev" & vehicle_type=="car" ~ (1 / (phev_uf_car * (kwh_q10 / gas_electricity) + (1 - phev_uf_car) * (1 / mpg_q90))),
powertrain=="phev" & vehicle_type=="suv" ~ (1 / (phev_uf_suv * (kwh_q10 / gas_electricity) + (1 - phev_uf_suv) * (1 / mpg_q90)))
powertrain
),
MPGe_max=case_when(
%in% c("bev","cv","hev") ~ mpg_q90,
powertrain
=="phev" & vehicle_type=="car" ~ (1 / (phev_uf_car * (kwh_q90 / gas_electricity) + (1 - phev_uf_car) * (1 / mpg_q10))),
powertrain=="phev" & vehicle_type=="suv" ~ (1 / (phev_uf_suv * (kwh_q90 / gas_electricity) + (1 - phev_uf_suv) * (1 / mpg_q10)))
powertrain
)
)
<-dt_mpg %>%
dt_mpgrowwise() %>%
mutate(
quintiles = list(seq(from = cents_mile_min, to = cents_mile_max, length.out = 5))
%>%
) unnest_wider(quintiles, names_sep = "_") %>%
rename_with(~ paste0("cents_mile_value_", 1:5), starts_with("quintiles"))
<-dt_mpg%>%
dt_mpgrowwise() %>%
mutate(
quintiles = list(seq(from = MPGe_max, to = MPGe_min, length.out = 5))
%>%
) unnest_wider(quintiles, names_sep = "_") %>%
rename_with(~ paste0("MPGe_value_", 1:5), starts_with("quintiles"))
# write.csv(dt_mpg, here("survey_updated_pretest","data","mpg_by_segment_fuel_cost_final.csv"),row.names = F)
<- dt_mpg %>%
dt_mpg_expanded ## Display all possible values
# mutate(
# cents_mile_value = map2(cents_mile_min_round, cents_mile_max_round, ~ .x:.y)
# ) %>%
# unnest(cents_mile_value) %>%
## Only display min, avg, max values
pivot_longer(starts_with("cents_mile_value_") | starts_with("MPGe_value_"),
names_to = c(".value", "rank"),
names_pattern = "(.*)_value_(\\d+)"
%>%
) mutate(cents_mile=round(cents_mile,0),
MPGe=format(round(MPGe,1), nsmall=1)) %>%
select(vehicle_type,powertrain,cents_mile, MPGe) %>%
mutate(operating_cost_text = paste0(cents_mile, " cents per mile",
" (" ,MPGe, " MPG equivalent)")) %>%
select(-MPGe)
<- dt_mpg_expanded %>%
cost_list group_by(vehicle_type, powertrain) %>%
summarise(cents_mile_list = list(cents_mile), .groups = 'drop')
Operation costs by fuel type among cars
%>%
dt_mpg_expanded filter(vehicle_type=="car") %>%
count(powertrain, cents_mile)
#> # A tibble: 20 × 3
#> powertrain cents_mile n
#> <chr> <dbl> <int>
#> 1 bev 3 1
#> 2 bev 5 1
#> 3 bev 7 1
#> 4 bev 9 1
#> 5 bev 12 1
#> 6 cv 8 1
#> 7 cv 12 1
#> 8 cv 16 1
#> 9 cv 20 1
#> 10 cv 24 1
#> 11 hev 5 1
#> 12 hev 7 1
#> 13 hev 9 1
#> 14 hev 11 1
#> 15 hev 12 1
#> 16 phev 3 1
#> 17 phev 6 1
#> 18 phev 8 1
#> 19 phev 10 1
#> 20 phev 12 1
Operation costs by fuel type among SUVs
%>%
dt_mpg_expanded filter(vehicle_type=="suv") %>%
count(powertrain, cents_mile)
#> # A tibble: 20 × 3
#> powertrain cents_mile n
#> <chr> <dbl> <int>
#> 1 bev 3 1
#> 2 bev 6 1
#> 3 bev 8 1
#> 4 bev 11 1
#> 5 bev 14 1
#> 6 cv 9 1
#> 7 cv 13 1
#> 8 cv 17 1
#> 9 cv 21 1
#> 10 cv 25 1
#> 11 hev 7 1
#> 12 hev 10 1
#> 13 hev 13 1
#> 14 hev 17 1
#> 15 hev 20 1
#> 16 phev 8 1
#> 17 phev 12 1
#> 18 phev 16 1
#> 19 phev 19 1
#> 20 phev 23 1
—————————-
For car
Step 1: Generate Profiles
<- cbc_profiles(
profiles powertrain = c('Conventional','Gas hybrid', 'Plug-in hybrid','Battery electric'),
price = seq(0.8, 1.2, 0.1), # unit: maximum buget
range = seq(0.5, 3, 0.25), # unit: 100
mileage = seq(1.5, 5, 0.5), # unit: 10000
make_year = seq(2, 10), # make_year 2015-2023 --> vehicle age 2-10
operating_cost = seq(dt_mpg_expanded %>%
filter(vehicle_type == "car") %>%
select(starts_with("cents")) %>%
unlist() %>%
min(na.rm = TRUE),
%>%
dt_mpg_expanded filter(vehicle_type == "car") %>%
select(starts_with("cents")) %>%
unlist() %>%
max(na.rm = TRUE)+1) # unit: cents/mile
)
nrow(profiles)
#> [1] 364320
Resrictions
Only include profiles with feasible operation costs
<-profiles %>%
profiles_restricted filter(
== "Conventional" & operating_cost %in% unlist(cost_list %>% filter(vehicle_type == "car", powertrain == "cv") %>% pull(cents_mile_list))) |
(powertrain == "Gas hybrid" & operating_cost %in% unlist(cost_list %>% filter(vehicle_type == "car", powertrain == "hev") %>% pull(cents_mile_list))) |
(powertrain == "Plug-in hybrid" & operating_cost %in% unlist(cost_list %>% filter(vehicle_type == "car", powertrain == "phev") %>% pull(cents_mile_list))) |
(powertrain == "Battery electric" & operating_cost %in% unlist(cost_list %>% filter(vehicle_type == "car", powertrain == "bev") %>% pull(cents_mile_list)))
(powertrain
)
nrow(profiles_restricted)
#> [1] 79200
%>%
profiles_restricted group_by(powertrain, operating_cost) %>%
summarise(n=n())
#> # A tibble: 20 × 3
#> # Groups: powertrain [4]
#> powertrain operating_cost n
#> <fct> <int> <int>
#> 1 Conventional 8 3960
#> 2 Conventional 12 3960
#> 3 Conventional 16 3960
#> 4 Conventional 20 3960
#> 5 Conventional 24 3960
#> 6 Gas hybrid 5 3960
#> 7 Gas hybrid 7 3960
#> 8 Gas hybrid 9 3960
#> 9 Gas hybrid 11 3960
#> 10 Gas hybrid 12 3960
#> 11 Plug-in hybrid 3 3960
#> 12 Plug-in hybrid 6 3960
#> 13 Plug-in hybrid 8 3960
#> 14 Plug-in hybrid 10 3960
#> 15 Plug-in hybrid 12 3960
#> 16 Battery electric 3 3960
#> 17 Battery electric 5 3960
#> 18 Battery electric 7 3960
#> 19 Battery electric 9 3960
#> 20 Battery electric 12 3960
# table(profiles_restricted$operating_cost)
Step 2: Set up priors
Attribute | Expectation | Theory | Suggested Sign |
---|---|---|---|
powertrain | ICEV > HEV > PHEV > BEV | Concerns about battery health | Negative (−) |
price | Lower price preferred | Standard economic theory (price disutility) | Negative (−) |
range | More range preferred | Reduces range anxiety | Positive (+) |
mileage | Lower mileage preferred | Higher mileage = older/worn vehicle | Negative (−) |
make_year | Newer car preferred | Risk aversion, resale concerns | Negative (−) |
operating_cost | Lower cost preferred | Standard economic theory (price disutility) | Negative (−) |
Fixed Parameters
<- cbc_priors(
priors_fixed_parameter profiles = profiles_restricted,
# powertrain: categorical (effects coded or dummy)
powertrain = c( -0.1, -0.2, -0.3),
# price: scaled by 1,000s
price = -0.1,
# range: scaled by 100s → positive effect
range = 0.5,
# mileage: scaled by 10,000s → negative effect
mileage = -0.5,
# make_year: higher means older → negative effect
make_year = -0.2,
# operating cost: higher means worse → negative effect
operating_cost = -0.3
)
# priors_fixed_parameter
Random Parameters
<- cbc_priors(
priors_random_parameter profiles = profiles_restricted,
# powertrain: categorical (effects coded or dummy)
powertrain = rand_spec("n", c(-0.1, -0.2, -0.3), c( 0.1, 0.1, 0.1)),
price = -0.1, # Assume $1,000 price increase = -0.1 utility
# range: scaled by 100s → positive effect
range = 0.5,
# mileage: scaled by 10,000s → negative effect
mileage = -0.5,
# make_year: higher means older → negative effect
make_year = -0.2,
# operating cost: higher means worse → negative effect
operating_cost = -0.3
)
# priors_random_parameter
Step 3: Generate Designs
Different designs
Random Method
- random: randomly samples profiles for each respondent independently; maximum diversity but may be less statistically efficient
## Error- not run
# design_random_fixed_parameter <- cbc_design(
# profiles = profiles_restricted,
# priors = priors_fixed_parameter,
# method = "random", # randomized full-factorial design
# n_resp = 3000, # Number of respondents
# n_alts = 3, # Number of alternatives per question
# n_q = 6, # Number of questions per respondent #6
# remove_dominant = TRUE
# )
<- cbc_design(
design_random_fixed_parameter # profiles = profiles_restricted,
profiles = profiles,
priors = priors_fixed_parameter,
method = "random", # randomized full-factorial design
n_resp = 3000, # Number of respondents
n_alts = 3, # Number of alternatives per question
n_q = 6, # Number of questions per respondent #6
remove_dominant = TRUE
)
Step 4: Inspect Design
cbc_inspect(design_random_fixed_parameter)
#> DESIGN SUMMARY
#> =========================
#>
#> STRUCTURE
#> ================
#> Method: random
#> Created: 2025-07-18 13:15:37
#> Respondents: 3000
#> Questions per respondent: 6
#> Alternatives per question: 3
#> Total choice sets: 18000
#> Profile usage: 50057/364320 (13.7%)
#> Special features:
#> • Dominance removal: total, partial
#>
#> SUMMARY METRICS
#> =================
#> D-error calculation not available for random designs
#> Overall balance score: 0.822 (higher is better)
#> Overall overlap score: 0.175 (lower is better)
#>
#> VARIABLE ENCODING
#> =================
#> Format: Dummy-coded (powertrain)
#> 💡 Use cbc_decode_design() to convert to categorical format
#>
#> ATTRIBUTE BALANCE
#> =================
#> Overall balance score: 0.822 (higher is better)
#>
#> Individual attribute level counts:
#>
#> powertrainGas hybrid:
#>
#> 0 1
#> 40587 13413
#> Balance score: 0.584 (higher is better)
#>
#> powertrainPlug-in hybrid:
#>
#> 0 1
#> 40659 13341
#> Balance score: 0.583 (higher is better)
#>
#> powertrainBattery electric:
#>
#> 0 1
#> 40366 13634
#> Balance score: 0.588 (higher is better)
#>
#> price:
#>
#> 0.8 0.9 1 1.1 1.2
#> 10735 10831 10735 10832 10867
#> Balance score: 0.994 (higher is better)
#>
#> range:
#>
#> 0.5 0.75 1 1.25 1.5 1.75 2 2.25 2.5 2.75 3
#> 4765 4881 4899 4803 5054 4935 5029 4901 4966 4870 4897
#> Balance score: 0.983 (higher is better)
#>
#> mileage:
#>
#> 1.5 2 2.5 3 3.5 4 4.5 5
#> 6683 6758 6741 6722 6758 6848 6816 6674
#> Balance score: 0.991 (higher is better)
#>
#> make_year:
#>
#> 2 3 4 5 6 7 8 9 10
#> 6051 5962 5903 6072 5950 6106 6006 5978 5972
#> Balance score: 0.989 (higher is better)
#>
#> operating_cost:
#>
#> 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
#> 1788 2090 2179 2469 2591 2683 2722 2735 2883 2807 2688 2710 2624 2497 2451 2330
#> 19 20 21 22 23 24 25
#> 2169 2000 2056 2016 1901 1766 1845
#> Balance score: 0.867 (higher is better)
#>
#> ATTRIBUTE OVERLAP
#> =================
#> Overall overlap score: 0.175 (lower is better)
#>
#> Counts of attribute overlap:
#> (# of questions with N unique levels)
#>
#> powertrainGas hybrid: Continuous variable
#> Questions by # unique levels:
#> 1 (complete overlap): 43.9% (7902 / 18000 questions)
#> 2 (no overlap): 56.1% (10098 / 18000 questions)
#> Average unique levels per question: 1.56
#>
#> powertrainPlug-in hybrid: Continuous variable
#> Questions by # unique levels:
#> 1 (complete overlap): 44.0% (7914 / 18000 questions)
#> 2 (no overlap): 56.0% (10086 / 18000 questions)
#> Average unique levels per question: 1.56
#>
#> powertrainBattery electric: Continuous variable
#> Questions by # unique levels:
#> 1 (complete overlap): 43.7% (7861 / 18000 questions)
#> 2 (no overlap): 56.3% (10139 / 18000 questions)
#> Average unique levels per question: 1.56
#>
#> price: Continuous variable
#> Questions by # unique levels:
#> 1 (complete overlap): 4.0% (722 / 18000 questions)
#> 2 (partial overlap): 47.6% (8576 / 18000 questions)
#> 3 (partial overlap): 48.3% (8702 / 18000 questions)
#> 4 (partial overlap): 0.0% (0 / 18000 questions)
#> 5 (no overlap): 0.0% (0 / 18000 questions)
#> Average unique levels per question: 2.44
#>
#> range: Continuous variable
#> Questions by # unique levels:
#> 1 (complete overlap): 0.9% (163 / 18000 questions)
#> 2 (partial overlap): 25.2% (4529 / 18000 questions)
#> 3 (partial overlap): 73.9% (13308 / 18000 questions)
#> 4 (partial overlap): 0.0% (0 / 18000 questions)
#> 5 (partial overlap): 0.0% (0 / 18000 questions)
#> 6 (partial overlap): 0.0% (0 / 18000 questions)
#> 7 (partial overlap): 0.0% (0 / 18000 questions)
#> 8 (partial overlap): 0.0% (0 / 18000 questions)
#> 9 (partial overlap): 0.0% (0 / 18000 questions)
#> 10 (partial overlap): 0.0% (0 / 18000 questions)
#> 11 (no overlap): 0.0% (0 / 18000 questions)
#> Average unique levels per question: 2.73
#>
#> mileage: Continuous variable
#> Questions by # unique levels:
#> 1 (complete overlap): 1.7% (304 / 18000 questions)
#> 2 (partial overlap): 32.9% (5916 / 18000 questions)
#> 3 (partial overlap): 65.4% (11780 / 18000 questions)
#> 4 (partial overlap): 0.0% (0 / 18000 questions)
#> 5 (partial overlap): 0.0% (0 / 18000 questions)
#> 6 (partial overlap): 0.0% (0 / 18000 questions)
#> 7 (partial overlap): 0.0% (0 / 18000 questions)
#> 8 (no overlap): 0.0% (0 / 18000 questions)
#> Average unique levels per question: 2.64
#>
#> make_year: Continuous variable
#> Questions by # unique levels:
#> 1 (complete overlap): 1.3% (234 / 18000 questions)
#> 2 (partial overlap): 29.1% (5238 / 18000 questions)
#> 3 (partial overlap): 69.6% (12528 / 18000 questions)
#> 4 (partial overlap): 0.0% (0 / 18000 questions)
#> 5 (partial overlap): 0.0% (0 / 18000 questions)
#> 6 (partial overlap): 0.0% (0 / 18000 questions)
#> 7 (partial overlap): 0.0% (0 / 18000 questions)
#> 8 (partial overlap): 0.0% (0 / 18000 questions)
#> 9 (no overlap): 0.0% (0 / 18000 questions)
#> Average unique levels per question: 2.68
#>
#> operating_cost: Continuous variable
#> Questions by # unique levels:
#> 1 (complete overlap): 0.4% (74 / 18000 questions)
#> 2 (partial overlap): 16.0% (2878 / 18000 questions)
#> 3 (partial overlap): 83.6% (15048 / 18000 questions)
#> 4 (partial overlap): 0.0% (0 / 18000 questions)
#> 5 (partial overlap): 0.0% (0 / 18000 questions)
#> 6 (partial overlap): 0.0% (0 / 18000 questions)
#> 7 (partial overlap): 0.0% (0 / 18000 questions)
#> 8 (partial overlap): 0.0% (0 / 18000 questions)
#> 9 (partial overlap): 0.0% (0 / 18000 questions)
#> 10 (partial overlap): 0.0% (0 / 18000 questions)
#> 11 (partial overlap): 0.0% (0 / 18000 questions)
#> 12 (partial overlap): 0.0% (0 / 18000 questions)
#> 13 (partial overlap): 0.0% (0 / 18000 questions)
#> 14 (partial overlap): 0.0% (0 / 18000 questions)
#> 15 (partial overlap): 0.0% (0 / 18000 questions)
#> 16 (partial overlap): 0.0% (0 / 18000 questions)
#> 17 (partial overlap): 0.0% (0 / 18000 questions)
#> 18 (partial overlap): 0.0% (0 / 18000 questions)
#> 19 (partial overlap): 0.0% (0 / 18000 questions)
#> 20 (partial overlap): 0.0% (0 / 18000 questions)
#> 21 (partial overlap): 0.0% (0 / 18000 questions)
#> 22 (partial overlap): 0.0% (0 / 18000 questions)
#> 23 (no overlap): 0.0% (0 / 18000 questions)
#> Average unique levels per question: 2.83