OPUF Analysis code

Load packages

library(dplyr)
library(knitr)
library(mice)

Import the raw data

data <- read.csv("./Data/OPUF_DATA.csv") # Replace this with the final data download from OPUF

Remove demo/practice responses

demo_responses <- read.csv("./Data/Demo_IDs.csv")
data <- anti_join(data, demo_responses, by = "sessionId")

Initialize a list to store the resulting matrices

results_list <- list()
domain_weighting <- list()

Main analysis

The main outputs from this forloop are results_list and domain_weighting. They are lists containing matrices and vectors respectively. The number of matrices n is equal to your analysis sample size (length(results_list) = n). Each i of results_list represents the latent personal utility function for individual i. If this was anchored with that individuals anchoring factor then this would represent the anchored utility score for individual i.

# Loop through each row of the data
for (i in 1:nrow(data)) {
  
  # Manipulate data into vectors to take the outer product as per paper
  v_tired <- as.numeric(data[i, c("opuf_levelRatings_tired_0", "opuf_levelRatings_tired_1", 
                                  "opuf_levelRatings_tired_2", "opuf_levelRatings_tired_3", 
                                  "opuf_levelRatings_tired_4")])
  
  v_walk <- as.numeric(data[i, c("opuf_levelRatings_walk_0", "opuf_levelRatings_walk_1", 
                                 "opuf_levelRatings_walk_2", "opuf_levelRatings_walk_3", 
                                 "opuf_levelRatings_walk_4")])
  
  v_sport <- as.numeric(data[i, c("opuf_levelRatings_sports_0", "opuf_levelRatings_sports_1", 
                                  "opuf_levelRatings_sports_2", "opuf_levelRatings_sports_3", 
                                  "opuf_levelRatings_sports_4")])
  
  v_conc <- as.numeric(data[i, c("opuf_levelRatings_concentration_0", "opuf_levelRatings_concentration_1", 
                                 "opuf_levelRatings_concentration_2", "opuf_levelRatings_concentration_3", 
                                 "opuf_levelRatings_concentration_4")])
  
  v_embarrass <- as.numeric(data[i, c("opuf_levelRatings_embarrassment_0", "opuf_levelRatings_embarrassment_1", 
                                      "opuf_levelRatings_embarrassment_2", "opuf_levelRatings_embarrassment_3", 
                                      "opuf_levelRatings_embarrassment_4")])
  
  v_unhappy <- as.numeric(data[i, c("opuf_levelRatings_unhappiness_0", "opuf_levelRatings_unhappiness_1", 
                                    "opuf_levelRatings_unhappiness_2", "opuf_levelRatings_unhappiness_3", 
                                    "opuf_levelRatings_unhappiness_4")])
  
  v_treat <- as.numeric(data[i, c("opuf_levelRatings_treat_0", "opuf_levelRatings_treat_1", 
                                  "opuf_levelRatings_treat_2", "opuf_levelRatings_treat_3", 
                                  "opuf_levelRatings_treat_4")])

  # Assign level labels
  lvl_labels <- c("Never", "Almost Never", "Sometimes", "Often", "Always")
  names(v_tired) <- lvl_labels
  names(v_walk) <- lvl_labels
  names(v_sport) <- lvl_labels
  names(v_conc) <- lvl_labels
  names(v_embarrass) <- lvl_labels
  names(v_unhappy) <- lvl_labels
  names(v_treat) <- lvl_labels
  
  # Dimension labels
  dim_labels <- c("Tired", "Walking", "Sports", "Concentration", "Embarrassment", "Unhappiness", "Treated Differently")
  
  # Dimension weights as vector
  dimension_weights <- as.numeric(data[i, c("opuf_swingWeights_tired", "opuf_swingWeights_walk", 
                                            "opuf_swingWeights_sports", "opuf_swingWeights_concentration", 
                                            "opuf_swingWeights_embarrassment", "opuf_swingWeights_unhappiness", 
                                            "opuf_swingWeights_treat")])
  
  names(dimension_weights) <- dim_labels

  # Rescale levels to 0-1
  v_levels <- list(v_tired, v_walk, v_sport, v_conc, v_embarrass, v_unhappy, v_treat)
  v_scaled <- lapply(v_levels, function(x) x / 100)

  # Normalising dimension weights  
  v_dim <- dimension_weights / sum(dimension_weights) # Referred to as w in article
  # sum(v_dim) # Uncomment this if you need to check the sum
  
  # Combine dimension weights with level ratings
  m <- mapply(function(v, w) as.numeric(v) * as.numeric(w), v_scaled, v_dim)
  colnames(m) <- dim_labels
  rownames(m) <- lvl_labels

  # Store the resulting matrix in the list
  results_list[[i]] <- m

  # Store domain weighting in matrix
  domain_weighting[[i]] <- v_dim
}

Store the domain weightings in a matrix

domain_matrix <- matrix(0, nrow = 0, ncol = 7)
for (i in domain_weighting){
  domain_matrix <- domain_matrix + i
}

Estimating the mean latent coefficient matrix

Initialize a matrix to store the sum of all latent coefficient matrices

sum_matrix <- matrix(0, nrow = 5, ncol = 7)

Store the sum of all latent coefficient matrices

for (m in results_list) {
  sum_matrix <- sum_matrix + m
}

Divide the sum by the number of matrices to get the average

average_matrix <- sum_matrix / length(results_list)
kable(round(average_matrix, 3))

	Tired	Walking	Sports	Concentration	Embarrassment	Unhappiness	Treated Differently
Never	0.000	0.000	0.000	0.000	0.000	0.000	0.000
Almost Never	0.040	0.030	0.017	0.037	0.016	0.035	0.026
Sometimes	0.072	0.062	0.032	0.071	0.030	0.075	0.048
Often	0.122	0.105	0.053	0.111	0.047	0.116	0.073
Always	0.195	0.161	0.096	0.168	0.085	0.173	0.121

The latent coefficients estimated here are estimated on the 0-1 scale where sum(average_matrix[5,]) = 1

Anchoring

Estimate your anchoring factor using mean PITS utility value. I have a lot of code here and really all you need is anchor = mean(data$opuf_pitsUtility). I had some missing and skewed data in data$opuf_pitsUtility and so conducted winsorisation and imputation.

#----------------------------------------------------------------------------
# Winsorize pits data
#----------------------------------------------------------------------------
# New PITS variable
data$PITS <- data$opuf_pitsUtility

# Calculate the cutoff values
lower_cutoff <- quantile(data$PITS, 0.005, na.rm=T) # lower % of 0.5%
upper_cutoff <- quantile(data$PITS, 0.995, na.rm=T) # upper % of 99.5%

# Winsorize the vector
data$PITS <- pmin(pmax(data$PITS, lower_cutoff), upper_cutoff)

#----------------------------------------------------------------------------
# Multiple imputation on pits data (n=5)
#----------------------------------------------------------------------------
library(mice) # install MI package 

MICE_data <- select(data, PITS, opuf_swingWeights_tired, opuf_swingWeights_walk, opuf_swingWeights_sports, opuf_swingWeights_concentration, opuf_swingWeights_embarrassment, opuf_swingWeights_unhappiness, opuf_swingWeights_treat, AdditionalDemographicQuestions_weightstatus_value, AdditionalDemographicQuestions_education_value, AdditionalDemographicQuestions_employment_value, AdditionalDemographicQuestions_ethnicity_value, AdditionalDemographicQuestions_gender_value)
imputed_data <- mice(MICE_data, m = 1, method = "pmm", seed = 1998)

# Replace NAs with imputed values from the pmm model
data$PITS[29] <- 0.2
data$PITS[98] <- 0.7
data$PITS[138] <- 0.3
data$PITS[216] <- 0.3
data$PITS[298] <- 0.9

anchor = mean(data$PITS)

The anchoring value obtained for my study was: 0.2817

Anchoring the latent coefficient matrix

Anchoring the average latent coefficient matrix to yield the value set is run with the fairly simple command below.

anchored_matrix <- average_matrix * (1-anchor)

Social utility function

You can now see that the sum of the PITS state (Always for all attributes) generates a disutility value of 0.7183. This is the compliment of the PITS utility value.

kable(round(anchored_matrix, 3))

	Tired	Walking	Sports	Concentration	Embarrassment	Unhappiness	Treated Differently
Never	0.000	0.000	0.000	0.000	0.000	0.000	0.000
Almost Never	0.029	0.021	0.012	0.026	0.012	0.025	0.019
Sometimes	0.052	0.045	0.023	0.051	0.022	0.054	0.035
Often	0.088	0.075	0.038	0.080	0.034	0.083	0.052
Always	0.140	0.116	0.069	0.121	0.061	0.124	0.087

Valueset

Function to calculate health state score

calculate_score <- function(code, score_matrix) {
  # Convert code string into numeric vector (one digit per dimension)
  levels <- as.numeric(strsplit(code, "")[[1]])
  
  # Lookup scores in the score matrix
  scores <- mapply(function(level, dim) score_matrix[level, dim], levels, 1:7)
  
  # Calculate health state score
  health_state_score <- 1 - sum(scores)
  return(health_state_score)
}

# Example
calculate_score("5555555", anchored_matrix)

[1] 0.2817

# Estimate health state score for person 1 this is unanchored and just based upon their individual latent coefficients
calculate_score("5555555", results_list[[1]])

[1] 0

# Generate all possible health state codes
levels <- 1:5
dimensions <- 7
combinations <- expand.grid(rep(list(levels), dimensions))
health_state_codes <- apply(combinations, 1, paste0, collapse = "")
codes_df <- cbind(combinations, health_state_codes)

# Estimating the final value set
social_valueset <- sapply(health_state_codes, calculate_score, anchored_matrix)

These below functions present the first and last 6 health state utility values in the from the social valueset object.

head(social_valueset)

  1111111   2111111   3111111   4111111   5111111   1211111 
1.0000000 0.9712073 0.9479533 0.9124579 0.8598369 0.9787379

tail(social_valueset)

  5455555   1555555   2555555   3555555   4555555   5555555 
0.3220465 0.4218631 0.3930704 0.3698164 0.3343210 0.2817000