purp.gather   <- T
purp.analysis <- F
purp.debug    <- F
purp.mcmc     <- F
library(tidyverse)
library(psych)
library(mascutils)
library(printr)
#library(lavaan)
options(mc.cores = 5)


if(!purp.gather) {load("D_1.Rda")}


## Functions

rm_psycho <- function(Data) 
  Data %>% 
  group_by(Part, Item) %>% 
  summarize(mean_resp = mean(response)) %>% 
  ungroup() %>% 
  arrange(Item) %>% 
  spread(Item, value = mean_resp) %>% 
  select(-Part)

rm_design <- function(Data) 
  Data %>% 
  group_by(Design, Item) %>%
  summarize(mean_resp = mean(response)) %>% 
  ungroup() %>% 
  spread(Item, value = mean_resp) %>% 
  select(-Design)

alpha_ci <- function(Data){
  Scale <- str_c(distinct(Data, Scale)$Scale) 
  model_psych <- 
    psych::alpha(rm_psycho(Data), check.keys = FALSE, n.iter = 100)$boot %>% 
    as_tibble() %>% 
    mutate(Perspective = "psychometric")
  model_design <- 
    psych::alpha(rm_design(Data), check.keys = FALSE, n.iter = 100)$boot %>% 
    as_tibble() %>% 
    mutate(Perspective = "designometric")
  out <- 
    bind_rows(model_psych,
              model_design) %>% 
    select(Perspective, std.alpha) %>% 
    group_by(Perspective) %>% 
    summarize(center = mean(std.alpha),
              lower = quantile(std.alpha, .025),
              upper = quantile(std.alpha, .975)) %>% 
    mutate(Scale = Scale) %>% 
    go_first(Scale, Perspective)
  out
}

# D_1 %>% 
#   filter(Scale == "Attractiveness") %>% 
#   alpha_ci()


item_rel <- function(Data){
  #Data <- D_1 %>% filter(Scale == "HQI")
  Scale <- str_c(distinct(Data, Scale)$Scale)
  model_psych <- 
    psych::alpha(rm_psycho(Data), check.keys = FALSE)$item.stats %>% 
    as_tibble(rownames = "Item") %>% 
    mutate(Perspective = "psychometric")
  model_design <- 
    psych::alpha(rm_design(Data), check.keys = FALSE)$item.stats %>% 
    as_tibble(rownames = "Item") %>% 
    mutate(Perspective = "designometric")
  
#  model <- M_1_design
  out <- 
    bind_rows(model_psych,
              model_design) %>% 
    mutate(Scale = Scale) %>% 
    go_first(Scale, Item, Perspective) %>% 
    arrange(Scale, Item, Perspective)
  out
}

# D_1 %>% 
#   filter(Scale == "Attractiveness") %>% 
#   item_rel()

1 The psychometric fallacy in design research

The ISO 9241-11 defines usability by three components: effectiveness, efficiency and satisfaction. The first two factors are rooted in a well established human performance perspective. The third, satisfaction, with its vaguely emotional frame of reference remained poorly understood to many researchers and practitioners and was often associated with “this you have to measure with a rating scale”. Then the UX age dawned to paint a more detailed picture of elusive concepts, such as user’s feelings (e.g. Eeriness), their aesthetic judgments and even their dreams (Hedonic Quality). It was a wonder to observe how the pale definition of user satisfaction had to make space for a big party. And everybody was bringing their own rating scales! (Bargas-Avila & Hoernbaek, Old wine in new bottles)

In industrial practice, rating scales have their place as a always available and cheap method for comparing designs and benchmark all kinds of systems. In contrast to this convenience, developing a valid and reliable rating scale, is a project that can not be smaller than a doctorate thesis. However, most people in design research are primarly busy with learning something about designs, such that most of these scales have been designed with minimal effort for a transient purpose. Only a few rating scale inventories pervade industry and research and these have been designed with best psychometric effort (AttrakDiff, UEQ, TAM).

Psychometrics is the science of assigning meaningful numbers to persons. Traditionally these numbers served to measure skills, such as mathematical intelligence or comprehension of language. With time, researchers became more interested into more elusive properties of persons, such as how much they watch themselves in social situations (self monitoring). Always devoted to the queen of social sciences, design researchers adopted more or less sophisticated methods from psychometrics to improve and validate their shiny new rating scales.

Unfortunately, practically all of them failed to recognize that measuring a person with a rating scale is structurally different from measuring a design. In brief, the difference between a psychometric and a designometric measurment is that:

The psychometric fallacy is to validate designometric rating scales as if they were psychometric. It comes in two forms:

  1. A psychometric measurement is two-dimensional: a flat matrix of Persons by Items. The designometric perspective is three-dimensional: persons by items by designs. Psychometricians usually employ large person samples as they must show that their instrument can adequatly discern between persons. In analogy, a designometric rating scale claims to discern between designs, and such a claim requires a large sample of designs to validate this claim. The level 1 psychometric fallacy is, to use just one or few design(s) in the validation study.

  2. Designometric observations are three-way encounters that include the psychometric two-way encounter. That means, you can create a psychometric response matrix from a designometric response cuboid, by averaging over designs. At the same time you can create a designometric response matrix from the designometric response cuboid by averaging over persons. And that is the correct way! The level 2 psychometric fallacy is to do the analysis on a psychometric response matrix rather than a designometric matrix.

Aim of this study is to: + elaborate on how the psychometric fallacy can compromise the process of construction and validation of designometric rating scales. + provide examples of published rating scales with level 1 and level 2 psycometric fallacies, as well as an informal inquiry on how pervading the problem is in literature and practice + do a first exploration of actual consequences of the psychometric fallacy using real data

1.1 Psychometrics

A typical psychometric measurement situations is when person attributes are assessed by a set of ordinal-scaled items, such as the following:

1.1.0.1 [HERE]

a set of tasks (i.e. items), where the response \(y_{ij}\) on any encounter between person \(i\) and item \(j\) is either correct (\(y_{ij} = 1\)) or incorrect (\(y_{ij} = 0\)). A test validation study for a Rasch scale therefore results in a dichotomous response matrix.

1.1.1 Reliability

Recall that in CTT the measurement error is reduced by the law of large numbers. The more items are added to estimate the latent person variable \(\theta_i\), the more the common structure among the items dominates the noise, resulting in a more reliable measure. However, in designometrics studies, researchers are interested in the

1.1.2 Validity

1.1.3 Factor structures

1.2 Designometrics

1.2.1 Simulation on Reliability

When a scale validation study in design research falls into the psychomnetric fallacy by using a psychometric response matrix for reliability analysis, what is shown is that the scale reliably measures a person’s tendency to judge websites beautiful or robot faces spine-tingling. This is obviously not the same as measuring a websites perceived beauty or a robot faces eeriness. The following example demonstrates the difference by simulating an extreme situation, where a fictive three-item scale of Coolness is highly reliable for persons, but has no reliability at all for discerning the tested designs. Such a pattern can occur for the trivial reason that the sample have little or no variance with respect to Coolness. In the following simulation, we assume that the Coolness scale be tested on a sample of 50 undertaker company websites, and 50 participants.

set.seed(42)

n_Design = 20
n_Part   = 20
n_Item  =  4
n_Obs = n_Design * n_Part * n_Item

Designs <- tibble(Design      = as.factor(1:n_Design),
                  cool_Design = rnorm(n_Design, 0, .02)) ## little variance in Coolness

Parts   <- tibble(Part        = as.factor(1:n_Part),
                  cool_Part   = rnorm(n_Part, 0, .2)) ## strong variance in tendeny to judge sth. cool

Items   <- tibble(Scale       = "Coolness",
                  Item        = as.factor(1:4),
                  cool_Item   = rnorm(n_Item,  0, .2)) ## item strength: understating items get lower values

Coolness     <- expand_grid(Design = Designs$Design,
                       Part   = Parts$Part,
                       Item   = Items$Item) %>% 
  left_join(Designs) %>% 
  left_join(Parts) %>% 
  left_join(Items) %>% 
  mutate(response = mascutils::rescale_zero_one(cool_Design + cool_Part - cool_Item + rnorm(n_Obs, 0, .5)))
## Joining with `by = join_by(Design)`
## Joining with `by = join_by(Part)`
## Joining with `by = join_by(Item)`
Coolness %>% 
  ggplot(aes(y = response, x = Design)) +
  geom_violin()

Coolness %>% 
  ggplot(aes(y = response, x = Part)) +
  geom_violin()

alpha_ci(Coolness)
## `summarise()` has grouped output by 'Part'. You can override using the
## `.groups` argument.
## Number of categories should be increased in order to count frequencies.
## `summarise()` has grouped output by 'Design'. You can override using the
## `.groups` argument.
## Number of categories should be increased in order to count frequencies.
## Warning in psych::alpha(rm_design(Data), check.keys = FALSE, n.iter = 100): Some items were negatively correlated with the total scale and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( 1 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
Scale Perspective center lower upper
Coolness designometric 0.1179021 -0.5375240 0.4680096
Coolness psychometric 0.9307102 0.8793905 0.9598878

This simple example demonstrate that a scale can produce excellent reliability when measuring persons, but rather poor reliability on designs. In all psychometric fallacy study, this could have happened to some degree and would go completely unnoticed. The way we constructed this simulation, producing a sample of designs with little difference in Coolness, also highlights the importance of careful sampling the designs in a designometric validation study. In many classes of designs, we can expect some properties to vary strongly and others to be relatively stable across designs. In our example, undertaker websites will probably not so much differ in how much they enthuse users, which makes them a poor sample for a Coolness scale, but could still vary a lot in visual simplicity.

Still, falling into the psychometric fallacy does not neccessarily mean that a scale is unreliable under the designometric perspective. It is not too unlikely, that the two mental processes of appreciating coolness (psychometric perspective) and discerning coolness (designometric perspective) share some mental processes and therefore result in sufficient reliability (or factors structure) under both perspectives. It is even possible that the real situation is the opposite of the previous simulation, where persons vary little in appreciation, whereas designs vary strongly. In the following, we will explore on several real designometric data sets how psychometric and designometric scale and item reliabilities compare.

2 Methods

From a theoretical perspective the psychometric fallacy is obvious and we have demonstrated by simulation that the worst case is possible , but little is known how the fallacy effects the quality of rating scales. Here, we explore only the basics psychometric qualities: scale consistency and item reliability.

2.1 Data sets

The data used for analysis originates from four experiments (DK, PS, QB, SP). In all experiments, participants saw pictures of designs, websites (QB, SP) or robot faces (DK, PS) and responded to one item at a time. The original experiments tested the influence of presentation times (generally between 17ms and 5s). For the analysis here, we only used responses at presentation times of 500ms and 2000ms.

As in these experiments only single items were used per presented design, the designometric cuboid is very sparse. However, when collapsing the cuboid to either psychometric RM or designometric RM, the result is completely filled response matrices.

norm_cols <- 
  function(Data) 
    Data %>% 
    mutate(Part = str_c(Study, as.character(Part)),
         Item = as.character(Item),
         Scale = as.character(Scale),
         Design = as.character(Design)) %>% 
    mutate(response = mascutils::rescale_unit(response)) %>% 
  arrange(Study, Part, Scale , Item, Anchor, Design)

mini_cols <- 
  function(Data)
    Data %>% select(Study, Part, Scale , Item, Anchor, Design, response)
    

Items <- readxl::read_excel("Items.xlsx")

load("DK1.Rda")
DK <- 
  DK1 %>% 
  filter(Condition == "long") %>% 
  mutate(Study = "DK",
         Design = str_remove(Stimulus, "^c")) %>% 
  left_join(select(Items, Scale, Item, AnchorLow_EN, AnchorHigh_EN)) %>% 
  mutate(Anchor = str_c(AnchorHigh_EN, AnchorLow_EN, sep = " - ")) %>% 
  norm_cols()  
## Joining with `by = join_by(Item, Scale)`
load("PS.Rda")
PS <- 
  PS_1 %>% 
  filter(Condition == 2) %>% 
  mutate(Study = "PS",
         Design = Stimulus) %>% 
  left_join(select(Items, Scale, Item, AnchorLow_EN, AnchorHigh_EN)) %>% 
  mutate(Anchor = str_c(AnchorHigh_EN, AnchorLow_EN, sep = " - ")) %>% 
  norm_cols()
## Joining with `by = join_by(Item, Scale)`
load("AH.Rda")
## Warning: namespace 'MCMCglmm' is not available and has been replaced
## by .GlobalEnv when processing object 'M'
AH <- D$AH1 %>% 
  rename(Part = Participant,
         trial = Trial,
         Design = Face,
         Gender = sex) %>% 
  mutate(Study = "AH",
         Scale = "nEeriness",
         Item = str_c("n", Item),
         Gender = as.factor(Gender),
         response = 1 - mascutils::rescale_unit(response)) %>% 
  left_join(select(Items, Scale, Item, AnchorLow_EN, AnchorHigh_EN)) %>% 
  mutate(Anchor = str_c(AnchorHigh_EN, AnchorLow_EN, sep = " - ")) %>%  
  norm_cols()
## Joining with `by = join_by(Scale, Item)`
load("Tuch1.Rda")
QB <- 
  Tuch1 %>% 
  mutate(Anchor = str_c(AnchorLow, AnchorHi, sep = "_"),
         Study = "QB") %>%
  group_by(Scale) %>% 
  mutate(Item = str_c(Scale, as.integer(as.factor(Anchor)))) %>% 
  ungroup() %>% 
  filter(Condition == "0.5s") %>% 
  rename(Part = Subj, 
         Design = Stimulus, 
         response = Judgement) %>%  
  norm_cols()


load("Tuch2.Rda")
SP <- 
  Tuch2 %>% 
  rename(Part = Participant, Design = Stimulus, response = Response) %>% 
  mutate(Study = "SP") %>% 
  filter(Inventory == "UEQ" &
         Scale == "Attractiveness" &
         Condition %in% c("unlimited", "500ms")) %>% 
  mutate(response = mascutils::rescale_unit(response)) %>% 
  mutate(response = if_else(Item %in% c("Att3", "Att4"), 
                            1 - response, 
                            response)) %>% 
  left_join(select(Items, Scale, Item, AnchorLow_EN, AnchorHigh_EN)) %>% 
  mutate(Anchor = str_c(AnchorHigh_EN, AnchorLow_EN, sep = " - ")) %>% 
  norm_cols()
## Joining with `by = join_by(Scale, Item)`
load("DN.Rda")
DN <- 
  DN %>% 
  mutate(Study = "DN",
         response = mascutils::rescale_zero_one(Response),
         Anchor = "") %>% 
  rename(Part = subject_nr,
         Design = SSName) %>% 
  norm_cols()



D_1 <- bind_rows(mini_cols(PS), mini_cols(DK), mini_cols(QB), mini_cols(SP), mini_cols(DN), mini_cols(AH)) |> 
  mutate(Scale = if_else(Scale %in% c("beauty", "hedonism", "usability"), str_to_sentence(Scale), Scale))

D_Eer <- 
  D_1 %>% 
  filter(Scale == "nEeriness",
         str_detect(Item, "[[:digit:]]+")) ## only M&R stimuli

distinct(D_1, Scale, Study) |> 
  arrange(Scale)
Scale Study
Attractiveness SP
Beauty DN
Credib QB
HQI QB
HQS QB
Hedonism DN
Usability DN
nEeriness PS
nEeriness DK
nEeriness AH
D_Att <- 
  D_1 %>% 
  filter(Scale %in% c("HQI", "HQS", "Credib") )
  

D_HUB <- 
  D_1 %>% 
  filter(Study == "DN")

save(D_1, D_Eer, PS, DK, AH, QB, SP, DN, file = "DMX_data.Rda")
load("DMX_data.Rda")
D_1 %>% 
  ggplot(aes(x = response)) +
  geom_histogram() +
  facet_wrap(~Study, scale = "free_y")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

D_Eer %>% 
  ggplot(aes(x = Item, y = response, color = Study)) +
  geom_violin()

## Joining with `by = join_by(Study)`
## Joining with `by = join_by(Study)`
Study n_Design n_Part n_Obs
AH 20 45 10800
DK 80 35 2800
DN 48 42 8064
PS 87 39 2808
QB 76 25 1900
SP 66 40 1440
## Joining with `by = join_by(Scale)`
## Joining with `by = join_by(Scale)`
Scale n_Design n_Part n_Obs
Attractiveness 66 40 1440
Beauty 48 42 2688
Credib 76 25 500
HQI 76 25 700
HQS 76 25 700
Hedonism 48 42 2688
Usability 48 42 2688
nEeriness 127 119 16408

2.1.1 Scales

For the following rating scales responses have been extracted from the original experimental data:

The Eeriness scale has been developed for measuring negative emotional responses towards robot faces and is primarily use for research on the Uncanny Valles phenomenon. Ho & MacDorman(2017) present an advanced psychometric validation of the scale. The study made use of 12 animated characters (Designs), avoiding the level 1 fallacy to some degree, but the data analysis is under psychometric perspective (level 2 fallacy).

The Attractiveness scale is part of the User Experience Questionnaire (UEQ) inventory. Is has been vaidated by [Bettina Laugwitz, Theo Held, and Martin Schrepp. 2008. Construction and Evaluation of a User Experience Questionnaire. . 63–76. https://doi.org/10.1007/978-3-540-89350-9_6] The UEQ has undergone basic psychometric evaluation in six studies with a single design each (level 1 fallacy).

The two scales Hedonic Quality - Identity (HQI) and Hedonic Quality - Stimulation (HQS) are from the AttrakDiff2 inventory. AttrakDiff2 underwent basic evaluation using only three Designs under psychometric perspective (level 1 fallacy) [Hassenzahl, M., Burmester, M., Koller, F., AttrakDiff: Ein Fragebogen zur Messung wahrgenommener hedonischer und pragmatischer Qualität].

The Credibility scale … #### [HERE]

The following table gives an overview on inventory and scales:

## `summarise()` has grouped output by 'Study'. You can override using the
## `.groups` argument.
Study Scale n_Items
AH nEeriness 8
DK nEeriness 8
DN Beauty 4
DN Hedonism 4
DN Usability 4
PS nEeriness 8
QB Credib 5
QB HQI 7
QB HQS 7
SP Attractiveness 6

2.1.2 Data analysis

2.2 Results

2.2.1 Scale consistency

Scale_rel <-
  D_1 %>% 
  #mutate(Scale = str_c(Study, Scale, sep = "_")) %>% 
  split(.$Scale) %>% 
  map_df(alpha_ci)
## Some items ( Att4 Att6 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
Scale_rel
Scale Perspective center lower upper
Attractiveness designometric 0.6351084 0.4139821 0.7675680
Attractiveness psychometric 0.3607549 0.1025158 0.6041318
Beauty designometric 0.9692454 0.9524973 0.9789909
Beauty psychometric 0.5872196 0.3211394 0.7246443
Credib designometric 0.5717053 0.3137612 0.7250947
Credib psychometric 0.4243274 -0.2559442 0.6789972
Hedonism designometric 0.9681432 0.9519016 0.9783777
Hedonism psychometric 0.6229097 0.4330071 0.7679728
HQI designometric 0.6737763 0.5425565 0.7830014
HQI psychometric 0.6496321 0.2090802 0.8473008
HQS designometric 0.7394482 0.6378606 0.8019733
HQS psychometric 0.6896385 0.2702103 0.8681698
nEeriness designometric 0.8834215 0.8351128 0.9156898
nEeriness psychometric 0.8139584 0.7283786 0.8756691
Usability designometric 0.8685692 0.7983001 0.9208187
Usability psychometric 0.6550002 0.4810185 0.7966900
Scale_rel %>% 
  ggplot(aes(color = Scale,
             label = Scale,
             x = Perspective,
             y = center,
             ymin = lower,
             ymax = upper)) +
  geom_point() +
  geom_line(aes(group = Scale)) +
  ylab("std. Cronbach alpha") +
  geom_label() +
  ylim(0,1)

2.2.2 Item reliability

Item_rel <-
  D_1 %>% 
  split(.$Scale) %>% 
  map_df(item_rel)
## Some items ( Att4 Att6 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
Item_rel
Scale Item Perspective n raw.r std.r r.cor r.drop mean sd
Attractiveness Att1 designometric 59 0.6935717 0.6221672 0.5374563 0.4152139 0.5578079 0.2357714
Attractiveness Att1 psychometric 40 0.6556074 0.6582254 0.5850407 0.3760495 0.5602917 0.1860536
Attractiveness Att2 designometric 56 0.6803049 0.6949713 0.6711486 0.5044168 0.5051086 0.2363659
Attractiveness Att2 psychometric 40 0.6499161 0.6604626 0.6482586 0.3950330 0.4927083 0.1721726
Attractiveness Att3 designometric 54 0.7224083 0.7110001 0.6214230 0.5291541 0.5478719 0.2477413
Attractiveness Att3 psychometric 40 0.4999924 0.5096504 0.3655711 0.1976182 0.5500833 0.1733970
Attractiveness Att4 designometric 57 0.3314863 0.3210092 0.0856613 0.0477289 0.5078553 0.2118101
Attractiveness Att4 psychometric 40 0.2379467 0.2590127 0.0002670 -0.0557086 0.4937083 0.1556704
Attractiveness Att5 designometric 53 0.7734007 0.7701590 0.7479344 0.6165928 0.6050833 0.2344340
Attractiveness Att5 psychometric 40 0.7220290 0.7143748 0.6884718 0.4761634 0.5999583 0.1851124
Attractiveness Att6 designometric 55 0.5071367 0.4898179 0.3320168 0.2360847 0.5461803 0.2407393
Attractiveness Att6 psychometric 40 0.2038149 0.1716358 -0.1026968 -0.1800056 0.5563333 0.2040698
Beauty 1 designometric 48 0.9715208 0.9705167 0.9620374 0.9469555 0.4817018 0.2405092
Beauty 1 psychometric 42 0.5634238 0.5646013 0.2839047 0.2265541 0.4763787 0.1232866
Beauty 2 designometric 48 0.9512628 0.9509864 0.9304837 0.9141279 0.5581468 0.2252406
Beauty 2 psychometric 42 0.7005367 0.7291817 0.6326197 0.4630836 0.5518076 0.1088177
Beauty 3 designometric 48 0.9697443 0.9684071 0.9595165 0.9437284 0.5264599 0.2403634
Beauty 3 psychometric 42 0.7725085 0.7719752 0.7087504 0.5186978 0.5387326 0.1285998
Beauty 4 designometric 48 0.9328297 0.9355976 0.9008122 0.8865251 0.4910026 0.2133545
Beauty 4 psychometric 42 0.6487324 0.6222997 0.3868669 0.3061015 0.5009746 0.1341732
Credib Credib1 designometric 59 0.6044867 0.5932103 0.4228984 0.3185598 0.5694068 0.2211135
Credib Credib1 psychometric 25 0.6959111 0.6899259 0.6056964 0.4315671 0.5667000 0.1462139
Credib Credib2 designometric 59 0.6551251 0.6087126 0.4511409 0.3364046 0.5215226 0.2560754
Credib Credib2 psychometric 25 0.7337153 0.7095985 0.6784585 0.4579035 0.5105000 0.1592414
Credib Credib3 designometric 59 0.6514878 0.6506570 0.5467255 0.4086193 0.5228249 0.2228835
Credib Credib3 psychometric 25 0.5507187 0.5569437 0.3896841 0.2571936 0.5160000 0.1363455
Credib Credib4 designometric 57 0.7123939 0.7130302 0.6322551 0.4802624 0.5217398 0.2564079
Credib Credib4 psychometric 25 0.4871192 0.4900538 0.2266055 0.1664315 0.5203000 0.1409701
Credib Credib5 designometric 53 0.5370583 0.4951819 0.2553971 0.1843925 0.4953751 0.2240947
Credib Credib5 psychometric 25 0.3886878 0.4118324 0.1330502 0.0676140 0.4957000 0.1353824
Hedonism 1 designometric 48 0.9754944 0.9729531 0.9661373 0.9489504 0.4949129 0.2486562
Hedonism 1 psychometric 42 0.5736808 0.6387250 0.4451207 0.3110343 0.4989629 0.0873503
Hedonism 2 designometric 48 0.9265228 0.9346126 0.9044443 0.8870970 0.5479773 0.1613367
Hedonism 2 psychometric 42 0.6375588 0.6812583 0.5040558 0.3795473 0.5420985 0.0918896
Hedonism 3 designometric 48 0.9539940 0.9513429 0.9322637 0.9167024 0.4675383 0.2110448
Hedonism 3 psychometric 42 0.7611141 0.7352989 0.6478569 0.5326241 0.4721382 0.1008703
Hedonism 4 designometric 48 0.9644170 0.9626921 0.9502177 0.9360589 0.4408734 0.2070048
Hedonism 4 psychometric 42 0.7802483 0.7127237 0.6195312 0.4669670 0.4360133 0.1286006
HQI HQI1 designometric 54 0.5650463 0.5677040 0.5199326 0.3768710 0.4896019 0.2501074
HQI HQI1 psychometric 25 0.8131976 0.7897839 0.7924689 0.6859397 0.4950000 0.2026234
HQI HQI2 designometric 60 0.6225986 0.5693149 0.4603014 0.3755944 0.5781833 0.2724666
HQI HQI2 psychometric 25 0.4140515 0.4199140 0.2907244 0.2108103 0.5604000 0.1667609
HQI HQI3 designometric 58 0.6975315 0.6478708 0.5525440 0.4689579 0.5020259 0.2574881
HQI HQI3 psychometric 25 0.5941852 0.6053038 0.5068226 0.4222744 0.4941000 0.1680958
HQI HQI4 designometric 57 0.6658530 0.6966137 0.6226209 0.5353069 0.5549269 0.2150086
HQI HQI4 psychometric 25 0.5258622 0.5417442 0.4966752 0.3680606 0.5709000 0.1448704
HQI HQI5 designometric 59 0.4338197 0.3596435 0.2005093 0.1251196 0.5912429 0.2374892
HQI HQI5 psychometric 25 0.7795347 0.7634402 0.7937611 0.6220707 0.5843000 0.2158794
HQI HQI6 designometric 63 0.5748955 0.5786184 0.4471956 0.3832471 0.5124471 0.2509687
HQI HQI6 psychometric 25 0.5138991 0.5250801 0.3944244 0.3154123 0.5037000 0.1751995
HQI HQI7 designometric 57 0.7367313 0.7424558 0.7285225 0.6019045 0.4723392 0.2803627
HQI HQI7 psychometric 25 0.5576736 0.5584739 0.4336605 0.3685304 0.4573000 0.1750787
HQS HQS1 designometric 53 0.6810616 0.6611287 0.5768370 0.5024558 0.5541855 0.2449457
HQS HQS1 psychometric 25 0.6168159 0.6070060 0.5477540 0.4332504 0.5197000 0.1599859
HQS HQS2 designometric 55 0.7521635 0.7630558 0.7209064 0.6402145 0.3685152 0.2426397
HQS HQS2 psychometric 25 0.6922924 0.7099388 0.6440410 0.5659545 0.3813000 0.1321243
HQS HQS3 designometric 56 0.5881881 0.5977326 0.5303986 0.4181500 0.4187946 0.2243515
HQS HQS3 psychometric 25 0.7746096 0.7598930 0.7075289 0.6311833 0.4277000 0.1744156
HQS HQS4 designometric 61 0.6705848 0.6308341 0.5702840 0.4614471 0.4811749 0.2632831
HQS HQS4 psychometric 25 0.4887681 0.4886682 0.3808049 0.2910595 0.4654000 0.1498588
HQS HQS5 designometric 56 0.6387364 0.5227271 0.4352969 0.3392942 0.3966518 0.2557008
HQS HQS5 psychometric 25 0.5210489 0.5184179 0.4270064 0.3229098 0.3899000 0.1542091
HQS HQS6 designometric 61 0.7125647 0.6549950 0.5735686 0.4876641 0.4197322 0.2625926
HQS HQS6 psychometric 25 0.5634879 0.5772406 0.4567243 0.3922267 0.4003000 0.1417273
HQS HQS7 designometric 61 0.5840752 0.5848721 0.4958604 0.3960248 0.4128005 0.2460413
HQS HQS7 psychometric 25 0.6966990 0.6955517 0.6761135 0.5369481 0.4267000 0.1611009
nEeriness nE1 designometric 127 0.7408118 0.7334399 0.6848541 0.6440660 0.4678868 0.1740001
nEeriness nE1 psychometric 119 0.5160214 0.5343692 0.4265733 0.3863068 0.4556709 0.0952187
nEeriness nE2 designometric 126 0.7428029 0.7397503 0.6929683 0.6406685 0.4809049 0.1821338
nEeriness nE2 psychometric 119 0.6873760 0.6728223 0.6097037 0.5412618 0.4710938 0.1331084
nEeriness nE3 designometric 125 0.7363558 0.7016361 0.6419461 0.5971234 0.4606730 0.2197593
nEeriness nE3 psychometric 119 0.5663081 0.5477541 0.4419186 0.3869046 0.4442411 0.1334954
nEeriness nE4 designometric 126 0.6349151 0.6613301 0.5886438 0.5480713 0.5499998 0.1425516
nEeriness nE4 psychometric 119 0.6343404 0.6317383 0.5547755 0.4991363 0.5278561 0.1144561
nEeriness nE5 designometric 127 0.7501838 0.7615488 0.7206447 0.6736243 0.5490203 0.1549981
nEeriness nE5 psychometric 119 0.7423577 0.7477601 0.7092518 0.6486944 0.5340442 0.1036929
nEeriness nE6 designometric 127 0.7180501 0.7173765 0.6617658 0.6239836 0.5236777 0.1610552
nEeriness nE6 psychometric 119 0.6432526 0.6575557 0.5929158 0.5329360 0.5006405 0.0974882
nEeriness nE7 designometric 125 0.8272383 0.8246236 0.8079900 0.7584556 0.4967029 0.1799651
nEeriness nE7 psychometric 119 0.7977637 0.7956693 0.7821557 0.7006352 0.4942902 0.1245290
nEeriness nE8 designometric 126 0.7624603 0.7703229 0.7417685 0.6787708 0.5527972 0.1646881
nEeriness nE8 psychometric 119 0.7030982 0.7086295 0.6631192 0.5909101 0.5344227 0.1103392
Usability 1 designometric 48 0.8954695 0.8852177 0.8496008 0.8072696 0.5537115 0.1698535
Usability 1 psychometric 42 0.7059783 0.6963695 0.5421684 0.4300962 0.5353763 0.1208224
Usability 2 designometric 48 0.8949999 0.8727338 0.8517108 0.8006296 0.5615960 0.1771572
Usability 2 psychometric 42 0.6888980 0.7014512 0.5453210 0.4359088 0.5574644 0.1107410
Usability 3 designometric 48 0.6282174 0.6911338 0.5121547 0.4845593 0.5668973 0.1168843
Usability 3 psychometric 42 0.6876127 0.6760119 0.4882153 0.4002469 0.5600007 0.1214538
Usability 4 designometric 48 0.9622228 0.9443958 0.9610464 0.9097243 0.5221123 0.2167186
Usability 4 psychometric 42 0.7434466 0.7530564 0.6456878 0.5171022 0.5242849 0.1112598
G_Item_rel <-
  Item_rel %>% 
  ggplot(aes(color = Scale,
             x = Perspective,
             y = r.cor)) +
  # geom_point() +
  geom_line(aes(group = Item)) +
  ylab("Item-whole correlation")

G_Item_rel

G_Item_rel +
  geom_label(aes( label = Item)) +
  facet_wrap(~Scale, ncol = 2) +
  geom_point(data = rename(Scale_rel, alpha = center),
             aes(x = Perspective, 
                 y = alpha,
                 col = "Whole Cronbach alpha")) +
  geom_line(data = rename(Scale_rel, alpha = center),
             aes(x = Perspective, 
                 y = alpha,
                 group = Scale,
                 col = "Whole Cronbach alpha"))

2.3 Number of factors

Often, different scales are used in combination to create a more complete picture. It is usually aimed for that every scale measures exactly one construct (or latent variable) and that different scales measure different constructs. As a counter-example, MacDorman found that the Eeriness scale decomposes into two slightly different aspects, summarized as “eery” and “spine-tingling”. In contrast, the AttrakDiff2 questionnaire comprises two scales to capture supposedly different aspects.

Given a response matrix, the number of factors can be estimated using parallel analysis. Ideally, this procedure returns exactly as many factors as there are separate scales. Here, we use parallel analysis to assess whether the two perspectives produce the expected number of factors.

parallel_analysis <- function(data, n, persp, scales){
  if (persp == "D") {
    data <- rm_design(data)
    main <- str_c("Designometric Parallel Analysis of ", scales)
  }
  if (persp == "P") {
    data <- rm_psycho(data)
    main <- str_c("Psychometric Parallel Analysis of ", scales)
  }
  psych::fa.parallel(data,
                   fa = "fa",
                   fm = "minres",
                   nfactors=n,
                   main=main)
    
}

2.4 Eeriness scale

Eeriness is usually considered a one-dimensional construct. Nevertheless, it has been suggested that it comprises two slightly different factors.

parallel_analysis(D_Eer, 2, "D", "Eeriness")
## `summarise()` has grouped output by 'Design'. You can override using the
## `.groups` argument.
## Loading required namespace: GPArotation
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA
parallel_analysis(D_Eer, 2, "P", "Eeriness")
## `summarise()` has grouped output by 'Part'. You can override using the
## `.groups` argument.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

The results suggest that under psychometric perspective there is only one latent variables, whereas there are two under designometric perspective.

E_psycho <- tibble(Perspective = "psychometric",
                   Item = str_c("nE", 1:8),
                   loading = psych::fa(rm_psycho(D_Eer))$loadings)
## `summarise()` has grouped output by 'Part'. You can override using the
## `.groups` argument.
E_design <- tibble(Perspective = "designometric",
                   Item = str_c("nE", 1:8),
                   loading = psych::fa(rm_design(D_Eer))$loadings)
## `summarise()` has grouped output by 'Design'. You can override using the
## `.groups` argument.
# bind_rows(E_psycho, E_design) %>% 
#   ggplot(aes(x = Perspective, group = Item))
# 
# 
# str(E_psycho)

2.5 AttrakDiff and Credibility

The AttrakDiff2 inventory splits hedonistic quality into two components Identity and Stimulation, while the credibility scale is separate right from the start.

parallel_analysis(D_Att, 3, "P", "AttrakDiff and Credibility")
## `summarise()` has grouped output by 'Part'. You can override using the
## `.groups` argument.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA
parallel_analysis(D_Att, 3, "D", "AttrakDiff and Credibility")
## `summarise()` has grouped output by 'Design'. You can override using the
## `.groups` argument.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## In smc, smcs > 1 were set to 1.0
## In smc, smcs < 0 were set to .0
## In smc, smcs > 1 were set to 1.0
## In smc, smcs < 0 were set to .0
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in sqrt(1/diag(V)): NaNs produced
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
## In smc, smcs > 1 were set to 1.0
## In smc, smcs < 0 were set to .0
## In smc, smcs > 1 were set to 1.0
## In smc, smcs < 0 were set to .0
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

## Parallel analysis suggests that the number of factors =  5  and the number of components =  NA

Under psychometric perspective, all items can be grouped under just one latent construct. In contrast, the designometric analysis yielded five factors.

2.6 Hedonism, Usability and Beauty

In DN three separate scales were used. However, parallel analysis suggests that these capture the same latent variable under both perspectives.

parallel_analysis(D_HUB, 3, "P", "Hedonism, Usability and Beauty")
## `summarise()` has grouped output by 'Part'. You can override using the
## `.groups` argument.
## Warning in GPFoblq(A, Tmat = Tmat, normalize = normalize, eps = eps, maxit =
## maxit, : convergence not obtained in GPFoblq. 1000 iterations used.

## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA
parallel_analysis(D_HUB, 3, "D", "Hedonism, Usability and Beauty")
## `summarise()` has grouped output by 'Design'. You can override using the
## `.groups` argument.
## Warning in GPFoblq(A, Tmat = Tmat, normalize = normalize, eps = eps, maxit =
## maxit, : convergence not obtained in GPFoblq. 1000 iterations used.

## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA

3 Confirmatory Factor Analysis on Inventories

Several of the original studies employed more than one scale (QB, DN, SP). CFA is commonly used on multi-scale inventories to assess advanced psychometric qualities. In particular,

discriminant validity. If the scales measure genuinely different aspects of a person or a design,

library(blavaan)

tbl_post.blavaan <- function(x, model = NA){
  x %>% 
    blavaan::standardizedposterior() %>% 
    coda::as.mcmc() %>% 
    coda::as.mcmc.list() %>% 
    tidybayes::tidy_draws() %>% 
    rename(chain = .chain, iter = .iteration) %>% 
    select(-.draw) %>% 
    gather(parameter, value, -chain, -iter) %>% 
    mutate(type = case_when(str_detect(parameter, "=~") ~ "std.coef",
                            str_detect(parameter, "~~") ~ "std.vcov")) %>% 
    separate(parameter, into = c("lhs", "rhs"), remove = F)
}

F_6 <- "nEeriness =~ nE1 + nE2 + nE3 + nE4 + nE5 + nE6 + nE7 + nE8"

M_6_psycho <- 
  bcfa(model = F_6,
  data = rm_psycho(D_Eer) ,
  n.chains = 5,
  burnin = 12000,
  sample = 2000)

save(M_6_psycho, file = "M_6.Rda")


M_6_design <- 
  bcfa(model = F_6,
       data = rm_design(D_Eer),
       n.chains = 5,
       burnin = 20000,
       sample = 2000)




save(M_6_design, M_6_psycho, file = "M_6.Rda")




P_6 <- 
  bind_rows(
    tbl_post.blavaan(M_6_design) %>% mutate(model = "designometric"),
    tbl_post.blavaan(M_6_psycho) %>% mutate(model = "psychometric")
  ) 

save(M_6_design, M_6_psycho, P_6, file = "M_6.Rda")
load("M_6.Rda")
clu <- function(x)
  x %>% 
  group_by(model, parameter, type, lhs, rhs) %>% 
  summarize(center = median(value),
            lower = quantile(value, .025),
            upper = quantile(value, .025)) %>% 
  ungroup()


P_6 %>% 
  mutate(parameter = NA) %>% 
  filter(type == "std.coef") %>% 
  clu() %>% 
  mascutils::discard_redundant() %>% 
  rename(Item = rhs)
## `summarise()` has grouped output by 'model', 'parameter', 'type', 'lhs'. You
## can override using the `.groups` argument.
model Item center lower upper
designometric nE1 0.7448549 0.6717351 0.6717351
designometric nE2 0.6591509 0.5169453 0.5169453
designometric nE3 0.6549655 0.5077850 0.5077850
designometric nE4 0.5179342 0.3501500 0.3501500
designometric nE5 0.6497413 0.5130526 0.5130526
designometric nE6 0.6109226 0.4593454 0.4593454
designometric nE7 0.7407468 0.6282122 0.6282122
designometric nE8 0.6773998 0.5447105 0.5447105
psychometric nE1 0.7004677 0.5704952 0.5704952
psychometric nE2 0.5265502 -0.6398494 -0.6398494
psychometric nE3 0.3910258 -0.5155322 -0.5155322
psychometric nE4 0.4413438 -0.6035649 -0.6035649
psychometric nE5 0.5171714 -0.6336654 -0.6336654
psychometric nE6 0.4205133 -0.5832273 -0.5832273
psychometric nE7 0.6104738 -0.6853271 -0.6853271
psychometric nE8 0.5105753 -0.6295708 -0.6295708
CLU_6 <- 
  P_6 %>% 
  filter(type == "std.coef") %>% 
  clu() %>% 
  rename(Item = rhs)
## `summarise()` has grouped output by 'model', 'parameter', 'type', 'lhs'. You
## can override using the `.groups` argument.
CLU_6
model parameter type lhs Item center lower upper
designometric nEeriness=~nE1 std.coef nEeriness nE1 0.7448549 0.6717351 0.6717351
designometric nEeriness=~nE2 std.coef nEeriness nE2 0.6591509 0.5169453 0.5169453
designometric nEeriness=~nE3 std.coef nEeriness nE3 0.6549655 0.5077850 0.5077850
designometric nEeriness=~nE4 std.coef nEeriness nE4 0.5179342 0.3501500 0.3501500
designometric nEeriness=~nE5 std.coef nEeriness nE5 0.6497413 0.5130526 0.5130526
designometric nEeriness=~nE6 std.coef nEeriness nE6 0.6109226 0.4593454 0.4593454
designometric nEeriness=~nE7 std.coef nEeriness nE7 0.7407468 0.6282122 0.6282122
designometric nEeriness=~nE8 std.coef nEeriness nE8 0.6773998 0.5447105 0.5447105
psychometric nEeriness=~nE1 std.coef nEeriness nE1 0.7004677 0.5704952 0.5704952
psychometric nEeriness=~nE2 std.coef nEeriness nE2 0.5265502 -0.6398494 -0.6398494
psychometric nEeriness=~nE3 std.coef nEeriness nE3 0.3910258 -0.5155322 -0.5155322
psychometric nEeriness=~nE4 std.coef nEeriness nE4 0.4413438 -0.6035649 -0.6035649
psychometric nEeriness=~nE5 std.coef nEeriness nE5 0.5171714 -0.6336654 -0.6336654
psychometric nEeriness=~nE6 std.coef nEeriness nE6 0.4205133 -0.5832273 -0.5832273
psychometric nEeriness=~nE7 std.coef nEeriness nE7 0.6104738 -0.6853271 -0.6853271
psychometric nEeriness=~nE8 std.coef nEeriness nE8 0.5105753 -0.6295708 -0.6295708
P_6 %>% 
  filter(type == "std.coef") %>% 
  rename(Item = rhs) %>% 
  ggplot(aes(x = Item, color = model, fill = model, y = value)) +
  geom_violin() +
  geom_point(data = CLU_6, aes(y = center)) +
  geom_line(data = CLU_6, aes(y = center, group = model))

4 Using designometric scales as psychometrics

Up to this point we have taken a purely designometric stance, that such rating scales must primarily discriminate between designs. In some research situations, however, a designometric scale could well be used psychometrically. For example, a common stereotype is that male adolescents expose themselves more to imagery of robots, zombies and humanoid extraterrestrians than young women. One could assume that the feeling of eeriness wears off, which would then produce weaker responses of male respondents averaged over designs.

AH %>% 
  group_by(Part, Item, Gender) %>%
  summarize(nEeriness = mean(response)) %>% 
  ggplot(aes(x = Item, color = Gender, y = nEeriness)) +
  geom_boxplot()
## `summarise()` has grouped output by 'Part', 'Item'. You can override using the
## `.groups` argument.

5 Discussion

Scales in HCI research and beyond are commonly used to discriminate between poor and good designs. We stated that scales for measuring designs must be evaluated on design-by-item response matrices. Most validation studies evaluate their scales on person-by-item response matrices, which we call the psychometric fallacy. To make the case, a simulation showed, that in a realistic scenario, psychometric reliability can be excellent when designometric reliability is poor. Fortunately, when looking at real data from commonly used rating scales, this bias is reversed: Designometric reliability is consistently better. At the same time,

5.1 Implications for practical use

5.1.1 AttrakDiff2

The two scales HQI and HQS showed only moderate reliability under both perspectives.

5.2 Implications for scale development

5.3 Limitations

  • Populations in the samples were rather homogenous (students). Too little variance in the sample?
  • just stimuli, no use. we can assume dominance of system 1.
  • Tested conditions were on finalized scales, rather than initial item pools.

5.4 The ideal designometric scale

The comparison of item-level reliability suggests that the scales fall into two clusters: beauty and hedonism have overall excellent item and scale reliability. Reliability under psychometric perspective is still good. What is striking is that item reliability seem to drop by a constant