Loading and Preparing the Data
library(magrittr)
## Warning: package 'magrittr' was built under R version 4.0.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.0.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.1 ✓ purrr 0.3.4
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.0.2
## Warning: package 'tidyr' was built under R version 4.0.2
## Warning: package 'readr' was built under R version 4.0.2
## Warning: package 'forcats' was built under R version 4.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::extract() masks magrittr::extract()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
data <- read_csv("/Users/Seba/Documents/DBdistances2.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## A = col_character(),
## B = col_character(),
## DIST = col_double()
## )
create_matrix_from_qualtrics <- function(list_arg) {
unique_elements <- as.list(unique(list_arg$A))
output_matrix <- matrix(nrow = length(unique_elements), ncol = length(unique_elements))
index_i = 1
index_j = 1
for(i in unique_elements) {
for(j in unique_elements) {
ratings <- list_arg %>% filter(A == i & B == j)
if (nrow(ratings) > 0) {
output_matrix[index_j, index_i] = mean(ratings$DIST)
}
index_j = index_j + 1
}
index_i = index_i + 1
index_j = 1
}
return(output_matrix)
}
data.similarity_matrix <- create_matrix_from_qualtrics(data)
library(smacof)
## Warning: package 'smacof' was built under R version 4.0.2
## Loading required package: plotrix
## Warning: package 'plotrix' was built under R version 4.0.2
## Loading required package: colorspace
## Loading required package: e1071
## Warning: package 'e1071' was built under R version 4.0.2
##
## Attaching package: 'smacof'
## The following object is masked from 'package:base':
##
## transform
## Creating a dissimilarity matrix from the similarity matrix
data.dissimilarity_matrix <- sim2diss(data.similarity_matrix, method = "reverse", to.dist = FALSE)
Running MDS
# Running the MDS algorithm
db.mds <- smacofSym(data.dissimilarity_matrix, ndim = 4, type = "ordinal")
pt <- permtest(db.mds, nrep = 100, verbose = FALSE)
pt
##
## Call: permtest.smacof(object = db.mds, nrep = 100, verbose = FALSE)
##
## SMACOF Permutation Test
## Number of objects: 52
## Number of replications (permutations): 100
##
## Observed stress value: 0.175
## p-value: <0.001
stress_levels <- tibble(k = numeric(), stress = numeric(), pval = numeric())
stress_levels <- stress_levels %>% add_row(k = 0, stress = 1)
for (i in (1:10)) {
mds.tmp <- smacofSym(data.dissimilarity_matrix, ndim = i, type = "ordinal")
pt <- permtest(db.mds, nrep = 100, verbose = FALSE)
stress_levels <- stress_levels %>% add_row(k = i, stress = mds.tmp$stress, pval = pt$pval)
}
## Warning in smacofSym(delta = structure(c(0, 6.5, 5.66666666666667,
## 4.33333333333333, : Iteration limit reached! You may want to increase the itmax
## argument!
stress_levels
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
a <- list(zeroline = TRUE, tickmode = "linear", tick0 = 0)
t <- list(text = "Stress Plot")
plot_ly(stress_levels, x=~k, y=~stress) %>% add_lines() %>% layout(xaxis = a, title = t)
kmeans_clust <- kmeans(db.mds$confdist, centers = 4, nstart = 25)
library(plotly)
tmp <- as_tibble(unique(data$A))
colnames(tmp) <- c("scenario")
labels <- read_csv("/Users/Seba/Documents/DBentitieslabels.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `A social media service was breached and your login credentials were stolen` = col_character(),
## `social media` = col_character(),
## `login credentials` = col_character()
## )
colnames(labels) <- c("scenario", "company", "info")
plot.frame <- as_tibble(tmp) %>% left_join(labels)
## Joining, by = "scenario"
plot.frame <- cbind(plot.frame, db.mds$conf)
plot.frame$info <- str_trunc(plot.frame$info, 15, side = "right")
plot.frame$company <- str_trunc(plot.frame$company, 20, side = "right")
plot_ly(plot.frame, x = ~D1, y = ~D2) %>% add_markers(color = ~company) %>% add_text(text = ~info, textposition = 'top center')
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
#geom.text.size = 3
#ggplot(plot.frame) + geom_point(stat = "identity", position = "identity", aes(x = D1, y=D2, colour = factor(value)), size = 1, alpha = 0.7) + geom_text(aes(x = D1, y=D2, #label=identity, color=factor(value)), size = 4, nudge_y = -0.09)