Power Analysis - Vehicle Choices

Step 0: Data set up (Operation costs)

Assumptions

# quantiles of MPG for the 2024 model year vehicles by for different powertrains and vehicle types. Data source: EPA or fuel_economic_gov
dt_mpg<-as.data.frame(read.csv(here("..", "..","survey_updated_pretest","data","mpg_by_segment_fuel.csv"))) %>% 
  # kwh per 100 miles --> kwh per mile
  mutate(kwh_q10=kwh_q10/100,
         kwh_q25=kwh_q25/100,
         kwh_q50=kwh_q50/100,
         kwh_q75=kwh_q75/100,
         kwh_q90=kwh_q90/100
         )


# electricity rate per kwh https://www.chooseenergy.com/electricity-rates-by-state/ March 2025
electricity_rate_low <- 0.11
electricity_rate_avg <- 0.17
electricity_rate_high <-0.33

# gas rate per gallon https://www.chooseenergy.com/data-center/cost-of-driving-by-state May 2025
gasoline_rate_low <- 2.66
gasoline_rate_avg <- 3.18
gasoline_rate_high <- 4.77

# PHEV utility factors https://docs.nrel.gov/docs/gen/fy07/41341.pdf
phev_uf_car<- 0.668
phev_uf_suv<- 0.487

# MPGe- One gallon of gasoline is equivalent to 33.7 kilowatt-hours (kWh) of electricity https://www.bluegrassauto.com/hybrid-and-electric-vehicle-comparisons/
gas_electricity<- 33.7

# # PHEV: Assume total range = 300 miles, electric range = 40 miles,
# phev_total_range<- 300
# phev_e_range<- 40

All levels

dt_mpg<-dt_mpg %>% 
  mutate(
    cents_mile_min=case_when(
      powertrain=="bev" ~ electricity_rate_low*(kwh_q10)*100,
      powertrain %in% c("cv","hev") ~ gasoline_rate_low/mpg_q90*100,
      
      powertrain=="phev" & vehicle_type=="car" ~ (phev_uf_car*(electricity_rate_low*(kwh_q10))+(1-phev_uf_car)*(gasoline_rate_low/mpg_q90))*100,
      powertrain=="phev" & vehicle_type=="suv" ~ (phev_uf_suv*(electricity_rate_low*(kwh_q10))+(1-phev_uf_suv)*(gasoline_rate_low/mpg_q90))*100
    ),
    cents_mile_max=case_when(
      powertrain=="bev" ~ electricity_rate_high*(kwh_q90)*100,
      powertrain %in% c("cv","hev") ~ gasoline_rate_high/mpg_q10*100,
      
      powertrain=="phev" & vehicle_type=="car" ~ (phev_uf_car*(electricity_rate_high*(kwh_q90))+(1-phev_uf_car)*(gasoline_rate_high/mpg_q10))*100,
      powertrain=="phev" & vehicle_type=="suv" ~ (phev_uf_suv*(electricity_rate_high*(kwh_q90))+(1-phev_uf_suv)*(gasoline_rate_high/mpg_q10))*100
    )
 
    ) %>%
  mutate(MPGe_min=case_when(
    powertrain %in% c("bev","cv","hev") ~ mpg_q10,
    
    powertrain=="phev" & vehicle_type=="car" ~ (1 / (phev_uf_car * (kwh_q10 / gas_electricity) + (1 - phev_uf_car) * (1 / mpg_q90))),
    powertrain=="phev" & vehicle_type=="suv" ~ (1 / (phev_uf_suv * (kwh_q10 / gas_electricity) + (1 - phev_uf_suv) * (1 / mpg_q90)))
  ),
  
  MPGe_max=case_when(
    powertrain %in% c("bev","cv","hev") ~ mpg_q90,
    
    powertrain=="phev" & vehicle_type=="car" ~ (1 / (phev_uf_car * (kwh_q90 / gas_electricity) + (1 - phev_uf_car) * (1 / mpg_q10))),
    powertrain=="phev" & vehicle_type=="suv" ~ (1 / (phev_uf_suv * (kwh_q90 / gas_electricity) + (1 - phev_uf_suv) * (1 / mpg_q10)))
  )
    
  ) 


dt_mpg<-dt_mpg %>% 
  rowwise() %>%
  mutate(
    quintiles = list(seq(from = cents_mile_min, to = cents_mile_max, length.out = 5))
  ) %>%
  unnest_wider(quintiles, names_sep = "_") %>%
  rename_with(~ paste0("cents_mile_value_", 1:5), starts_with("quintiles")) 

dt_mpg<-dt_mpg%>% 
  rowwise() %>%
  mutate(
    quintiles = list(seq(from = MPGe_max, to = MPGe_min, length.out = 5))
  ) %>%
  unnest_wider(quintiles, names_sep = "_") %>%
  rename_with(~ paste0("MPGe_value_", 1:5), starts_with("quintiles"))
  

# write.csv(dt_mpg, here("survey_updated_pretest","data","mpg_by_segment_fuel_cost_final.csv"),row.names = F)    
  
dt_mpg_expanded <- dt_mpg %>%
  ## Display all possible values
  # mutate(
  #   cents_mile_value = map2(cents_mile_min_round, cents_mile_max_round, ~ .x:.y)
  # ) %>%
  # unnest(cents_mile_value) %>%
  
  ## Only display min, avg, max values
  pivot_longer(starts_with("cents_mile_value_") | starts_with("MPGe_value_"),
               names_to = c(".value", "rank"),
               names_pattern = "(.*)_value_(\\d+)"
  ) %>%
  mutate(cents_mile=round(cents_mile,0),
         MPGe=format(round(MPGe,1), nsmall=1)) %>% 
  select(vehicle_type,powertrain,cents_mile, MPGe) %>%
  mutate(operating_cost_text =  paste0(cents_mile, " cents per mile", 
                                  " ("  ,MPGe, " MPG equivalent)")) %>% 
  select(-MPGe)
  
  
cost_list <- dt_mpg_expanded %>%
  group_by(vehicle_type, powertrain) %>%
  summarise(cents_mile_list = list(cents_mile), .groups = 'drop')

Operation costs by fuel type among cars

dt_mpg_expanded %>%
  filter(vehicle_type=="car") %>%
  count(powertrain, cents_mile)

#> # A tibble: 20 × 3
#>    powertrain cents_mile     n
#>    <chr>           <dbl> <int>
#>  1 bev                 3     1
#>  2 bev                 5     1
#>  3 bev                 7     1
#>  4 bev                 9     1
#>  5 bev                12     1
#>  6 cv                  8     1
#>  7 cv                 12     1
#>  8 cv                 16     1
#>  9 cv                 20     1
#> 10 cv                 24     1
#> 11 hev                 5     1
#> 12 hev                 7     1
#> 13 hev                 9     1
#> 14 hev                11     1
#> 15 hev                12     1
#> 16 phev                3     1
#> 17 phev                6     1
#> 18 phev                8     1
#> 19 phev               10     1
#> 20 phev               12     1

Operation costs by fuel type among SUVs

dt_mpg_expanded %>%
  filter(vehicle_type=="suv") %>%
  count(powertrain, cents_mile)

#> # A tibble: 20 × 3
#>    powertrain cents_mile     n
#>    <chr>           <dbl> <int>
#>  1 bev                 3     1
#>  2 bev                 6     1
#>  3 bev                 8     1
#>  4 bev                11     1
#>  5 bev                14     1
#>  6 cv                  9     1
#>  7 cv                 13     1
#>  8 cv                 17     1
#>  9 cv                 21     1
#> 10 cv                 25     1
#> 11 hev                 7     1
#> 12 hev                10     1
#> 13 hev                13     1
#> 14 hev                17     1
#> 15 hev                20     1
#> 16 phev                8     1
#> 17 phev               12     1
#> 18 phev               16     1
#> 19 phev               19     1
#> 20 phev               23     1

—————————-

For car

Step 1: Generate Profiles

profiles <- cbc_profiles(
  powertrain     = c('Conventional','Gas hybrid', 'Plug-in hybrid','Battery electric'),
  price          = seq(0.8, 1.2, 0.1), # unit: maximum buget
  range          = seq(0.5, 3, 0.25), # unit: 100
  mileage        = seq(1.5, 5, 0.5), # unit: 10000
  make_year      = seq(2, 10), # make_year 2015-2023 --> vehicle age 2-10
  operating_cost = seq(dt_mpg_expanded %>%
                         filter(vehicle_type == "car") %>%
                         select(starts_with("cents")) %>%
                         unlist() %>%
                         min(na.rm = TRUE),
                       dt_mpg_expanded %>%
                         filter(vehicle_type == "car") %>%
                         select(starts_with("cents")) %>%
                         unlist() %>%
                         max(na.rm = TRUE)+1) # unit: cents/mile
) 

nrow(profiles)

#> [1] 364320

Resrictions

Only include profiles with feasible operation costs

profiles_restricted <-profiles %>% 
  filter(
    (powertrain == "Conventional"   & operating_cost %in% unlist(cost_list %>% filter(vehicle_type == "car", powertrain == "cv") %>% pull(cents_mile_list))) |
    (powertrain == "Gas hybrid"     & operating_cost %in% unlist(cost_list %>% filter(vehicle_type == "car", powertrain == "hev")  %>% pull(cents_mile_list))) |
    (powertrain == "Plug-in hybrid" & operating_cost %in% unlist(cost_list %>% filter(vehicle_type == "car", powertrain == "phev") %>% pull(cents_mile_list))) |
    (powertrain == "Battery electric" & operating_cost %in% unlist(cost_list %>% filter(vehicle_type == "car", powertrain == "bev") %>% pull(cents_mile_list)))
  )

nrow(profiles_restricted)

#> [1] 79200

profiles_restricted %>%
  group_by(powertrain, operating_cost) %>% 
  summarise(n=n())

#> # A tibble: 20 × 3
#> # Groups:   powertrain [4]
#>    powertrain       operating_cost     n
#>    <fct>                     <int> <int>
#>  1 Conventional                  8  3960
#>  2 Conventional                 12  3960
#>  3 Conventional                 16  3960
#>  4 Conventional                 20  3960
#>  5 Conventional                 24  3960
#>  6 Gas hybrid                    5  3960
#>  7 Gas hybrid                    7  3960
#>  8 Gas hybrid                    9  3960
#>  9 Gas hybrid                   11  3960
#> 10 Gas hybrid                   12  3960
#> 11 Plug-in hybrid                3  3960
#> 12 Plug-in hybrid                6  3960
#> 13 Plug-in hybrid                8  3960
#> 14 Plug-in hybrid               10  3960
#> 15 Plug-in hybrid               12  3960
#> 16 Battery electric              3  3960
#> 17 Battery electric              5  3960
#> 18 Battery electric              7  3960
#> 19 Battery electric              9  3960
#> 20 Battery electric             12  3960

# table(profiles_restricted$operating_cost)

Step 2: Set up priors

Attribute	Expectation	Theory	Suggested Sign
powertrain	ICEV > HEV > PHEV > BEV	Concerns about battery health	Negative (−)
price	Lower price preferred	Standard economic theory (price disutility)	Negative (−)
range	More range preferred	Reduces range anxiety	Positive (+)
mileage	Lower mileage preferred	Higher mileage = older/worn vehicle	Negative (−)
make_year	Newer car preferred	Risk aversion, resale concerns	Negative (−)
operating_cost	Lower cost preferred	Standard economic theory (price disutility)	Negative (−)

Fixed Parameters

priors_fixed_parameter <- cbc_priors(
  profiles = profiles_restricted,
  # powertrain: categorical (effects coded or dummy)
  powertrain = c( -0.1, -0.2, -0.3),
  # price: scaled by 1,000s
  price = -0.1,
  # range: scaled by 100s → positive effect
  range = 0.5,
  # mileage: scaled by 10,000s → negative effect
  mileage = -0.5,
  # make_year: higher means older → negative effect
  make_year = -0.2,
  # operating cost: higher means worse → negative effect
  operating_cost = -0.3
  
)

# priors_fixed_parameter

Random Parameters

priors_random_parameter <- cbc_priors(
  profiles = profiles_restricted,
  # powertrain: categorical (effects coded or dummy)
  powertrain = rand_spec("n", c(-0.1, -0.2, -0.3), c( 0.1, 0.1, 0.1)),
  price = -0.1, # Assume $1,000 price increase = -0.1 utility
  # range: scaled by 100s → positive effect
  range = 0.5,
  # mileage: scaled by 10,000s → negative effect
  mileage = -0.5,
  # make_year: higher means older → negative effect
  make_year = -0.2,
  # operating cost: higher means worse → negative effect
  operating_cost = -0.3
  
)

# priors_random_parameter

Step 3: Generate Designs

Different designs

Random Method

random: randomly samples profiles for each respondent independently; maximum diversity but may be less statistically efficient

## Error- not run

# design_random_fixed_parameter <- cbc_design(
#   profiles = profiles_restricted,
#   priors = priors_fixed_parameter,
#   method = "random", # randomized full-factorial design
#   n_resp   = 3000, # Number of respondents
#   n_alts   = 3,    # Number of alternatives per question
#   n_q      = 6,    # Number of questions per respondent #6
#   remove_dominant = TRUE
# )

design_random_fixed_parameter <- cbc_design(
  # profiles = profiles_restricted,
  profiles = profiles,
  priors = priors_fixed_parameter,
  method = "random", # randomized full-factorial design
  n_resp   = 3000, # Number of respondents
  n_alts   = 3,    # Number of alternatives per question
  n_q      = 6,    # Number of questions per respondent #6
  remove_dominant = TRUE
)

Step 4: Inspect Design

cbc_inspect(design_random_fixed_parameter)

#> DESIGN SUMMARY
#> =========================
#> 
#> STRUCTURE
#> ================
#> Method: random
#> Created: 2025-07-18 13:15:37
#> Respondents: 3000
#> Questions per respondent: 6
#> Alternatives per question: 3
#> Total choice sets: 18000
#> Profile usage: 50057/364320 (13.7%)
#> Special features:
#>   • Dominance removal: total, partial
#> 
#> SUMMARY METRICS
#> =================
#> D-error calculation not available for random designs
#> Overall balance score: 0.822 (higher is better)
#> Overall overlap score: 0.175 (lower is better)
#> 
#> VARIABLE ENCODING
#> =================
#> Format: Dummy-coded (powertrain)
#> 💡 Use cbc_decode_design() to convert to categorical format
#> 
#> ATTRIBUTE BALANCE
#> =================
#> Overall balance score: 0.822 (higher is better)
#> 
#> Individual attribute level counts:
#> 
#> powertrainGas hybrid:
#> 
#>     0     1 
#> 40587 13413 
#>   Balance score: 0.584 (higher is better)
#> 
#> powertrainPlug-in hybrid:
#> 
#>     0     1 
#> 40659 13341 
#>   Balance score: 0.583 (higher is better)
#> 
#> powertrainBattery electric:
#> 
#>     0     1 
#> 40366 13634 
#>   Balance score: 0.588 (higher is better)
#> 
#> price:
#> 
#>   0.8   0.9     1   1.1   1.2 
#> 10735 10831 10735 10832 10867 
#>   Balance score: 0.994 (higher is better)
#> 
#> range:
#> 
#>  0.5 0.75    1 1.25  1.5 1.75    2 2.25  2.5 2.75    3 
#> 4765 4881 4899 4803 5054 4935 5029 4901 4966 4870 4897 
#>   Balance score: 0.983 (higher is better)
#> 
#> mileage:
#> 
#>  1.5    2  2.5    3  3.5    4  4.5    5 
#> 6683 6758 6741 6722 6758 6848 6816 6674 
#>   Balance score: 0.991 (higher is better)
#> 
#> make_year:
#> 
#>    2    3    4    5    6    7    8    9   10 
#> 6051 5962 5903 6072 5950 6106 6006 5978 5972 
#>   Balance score: 0.989 (higher is better)
#> 
#> operating_cost:
#> 
#>    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18 
#> 1788 2090 2179 2469 2591 2683 2722 2735 2883 2807 2688 2710 2624 2497 2451 2330 
#>   19   20   21   22   23   24   25 
#> 2169 2000 2056 2016 1901 1766 1845 
#>   Balance score: 0.867 (higher is better)
#> 
#> ATTRIBUTE OVERLAP
#> =================
#> Overall overlap score: 0.175 (lower is better)
#> 
#> Counts of attribute overlap:
#> (# of questions with N unique levels)
#> 
#> powertrainGas hybrid: Continuous variable
#>   Questions by # unique levels:
#>   1 (complete overlap):  43.9%  (7902 / 18000 questions)
#>   2 (no overlap):        56.1%  (10098 / 18000 questions)
#>   Average unique levels per question: 1.56
#> 
#> powertrainPlug-in hybrid: Continuous variable
#>   Questions by # unique levels:
#>   1 (complete overlap):  44.0%  (7914 / 18000 questions)
#>   2 (no overlap):        56.0%  (10086 / 18000 questions)
#>   Average unique levels per question: 1.56
#> 
#> powertrainBattery electric: Continuous variable
#>   Questions by # unique levels:
#>   1 (complete overlap):  43.7%  (7861 / 18000 questions)
#>   2 (no overlap):        56.3%  (10139 / 18000 questions)
#>   Average unique levels per question: 1.56
#> 
#> price: Continuous variable
#>   Questions by # unique levels:
#>   1 (complete overlap):   4.0%  (722 / 18000 questions)
#>   2 (partial overlap):   47.6%  (8576 / 18000 questions)
#>   3 (partial overlap):   48.3%  (8702 / 18000 questions)
#>   4 (partial overlap):    0.0%  (0 / 18000 questions)
#>   5 (no overlap):         0.0%  (0 / 18000 questions)
#>   Average unique levels per question: 2.44
#> 
#> range: Continuous variable
#>   Questions by # unique levels:
#>   1 (complete overlap):   0.9%  (163 / 18000 questions)
#>   2 (partial overlap):   25.2%  (4529 / 18000 questions)
#>   3 (partial overlap):   73.9%  (13308 / 18000 questions)
#>   4 (partial overlap):    0.0%  (0 / 18000 questions)
#>   5 (partial overlap):    0.0%  (0 / 18000 questions)
#>   6 (partial overlap):    0.0%  (0 / 18000 questions)
#>   7 (partial overlap):    0.0%  (0 / 18000 questions)
#>   8 (partial overlap):    0.0%  (0 / 18000 questions)
#>   9 (partial overlap):    0.0%  (0 / 18000 questions)
#>   10 (partial overlap):    0.0%  (0 / 18000 questions)
#>   11 (no overlap):         0.0%  (0 / 18000 questions)
#>   Average unique levels per question: 2.73
#> 
#> mileage: Continuous variable
#>   Questions by # unique levels:
#>   1 (complete overlap):   1.7%  (304 / 18000 questions)
#>   2 (partial overlap):   32.9%  (5916 / 18000 questions)
#>   3 (partial overlap):   65.4%  (11780 / 18000 questions)
#>   4 (partial overlap):    0.0%  (0 / 18000 questions)
#>   5 (partial overlap):    0.0%  (0 / 18000 questions)
#>   6 (partial overlap):    0.0%  (0 / 18000 questions)
#>   7 (partial overlap):    0.0%  (0 / 18000 questions)
#>   8 (no overlap):         0.0%  (0 / 18000 questions)
#>   Average unique levels per question: 2.64
#> 
#> make_year: Continuous variable
#>   Questions by # unique levels:
#>   1 (complete overlap):   1.3%  (234 / 18000 questions)
#>   2 (partial overlap):   29.1%  (5238 / 18000 questions)
#>   3 (partial overlap):   69.6%  (12528 / 18000 questions)
#>   4 (partial overlap):    0.0%  (0 / 18000 questions)
#>   5 (partial overlap):    0.0%  (0 / 18000 questions)
#>   6 (partial overlap):    0.0%  (0 / 18000 questions)
#>   7 (partial overlap):    0.0%  (0 / 18000 questions)
#>   8 (partial overlap):    0.0%  (0 / 18000 questions)
#>   9 (no overlap):         0.0%  (0 / 18000 questions)
#>   Average unique levels per question: 2.68
#> 
#> operating_cost: Continuous variable
#>   Questions by # unique levels:
#>   1 (complete overlap):   0.4%  (74 / 18000 questions)
#>   2 (partial overlap):   16.0%  (2878 / 18000 questions)
#>   3 (partial overlap):   83.6%  (15048 / 18000 questions)
#>   4 (partial overlap):    0.0%  (0 / 18000 questions)
#>   5 (partial overlap):    0.0%  (0 / 18000 questions)
#>   6 (partial overlap):    0.0%  (0 / 18000 questions)
#>   7 (partial overlap):    0.0%  (0 / 18000 questions)
#>   8 (partial overlap):    0.0%  (0 / 18000 questions)
#>   9 (partial overlap):    0.0%  (0 / 18000 questions)
#>   10 (partial overlap):    0.0%  (0 / 18000 questions)
#>   11 (partial overlap):    0.0%  (0 / 18000 questions)
#>   12 (partial overlap):    0.0%  (0 / 18000 questions)
#>   13 (partial overlap):    0.0%  (0 / 18000 questions)
#>   14 (partial overlap):    0.0%  (0 / 18000 questions)
#>   15 (partial overlap):    0.0%  (0 / 18000 questions)
#>   16 (partial overlap):    0.0%  (0 / 18000 questions)
#>   17 (partial overlap):    0.0%  (0 / 18000 questions)
#>   18 (partial overlap):    0.0%  (0 / 18000 questions)
#>   19 (partial overlap):    0.0%  (0 / 18000 questions)
#>   20 (partial overlap):    0.0%  (0 / 18000 questions)
#>   21 (partial overlap):    0.0%  (0 / 18000 questions)
#>   22 (partial overlap):    0.0%  (0 / 18000 questions)
#>   23 (no overlap):         0.0%  (0 / 18000 questions)
#>   Average unique levels per question: 2.83