Loading and Preparing the Data

library(magrittr)

## Warning: package 'magrittr' was built under R version 4.0.2

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.0.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.0.2

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.0.2

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.0.2

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ tibble  3.1.1     ✓ purrr   0.3.4
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1

## Warning: package 'tibble' was built under R version 4.0.2

## Warning: package 'tidyr' was built under R version 4.0.2

## Warning: package 'readr' was built under R version 4.0.2

## Warning: package 'forcats' was built under R version 4.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::extract()   masks magrittr::extract()
## x dplyr::filter()    masks stats::filter()
## x dplyr::lag()       masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()

data <- read_csv("/Users/Seba/Documents/DBdistances2.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   A = col_character(),
##   B = col_character(),
##   DIST = col_double()
## )

create_matrix_from_qualtrics <- function(list_arg) {
  unique_elements <- as.list(unique(list_arg$A))
  
  output_matrix <- matrix(nrow = length(unique_elements), ncol = length(unique_elements))
  
  index_i = 1
  index_j = 1
  
  for(i in unique_elements) {
    for(j in unique_elements) {
      ratings <- list_arg %>% filter(A == i & B == j)
      
      if (nrow(ratings) > 0) {
        output_matrix[index_j, index_i] = mean(ratings$DIST)
      }
      
      index_j = index_j + 1
    }
    index_i = index_i + 1
    index_j = 1
  }
  
  return(output_matrix)
}

data.similarity_matrix <- create_matrix_from_qualtrics(data)

library(smacof)

## Warning: package 'smacof' was built under R version 4.0.2

## Loading required package: plotrix

## Warning: package 'plotrix' was built under R version 4.0.2

## Loading required package: colorspace

## Loading required package: e1071

## Warning: package 'e1071' was built under R version 4.0.2

## 
## Attaching package: 'smacof'

## The following object is masked from 'package:base':
## 
##     transform

## Creating a dissimilarity matrix from the similarity matrix
data.dissimilarity_matrix <- sim2diss(data.similarity_matrix, method = "reverse", to.dist = FALSE)

Running MDS

# Running the MDS algorithm
db.mds <- smacofSym(data.dissimilarity_matrix, ndim = 4, type = "ordinal")
pt <- permtest(db.mds, nrep = 100, verbose = FALSE)
pt

## 
## Call: permtest.smacof(object = db.mds, nrep = 100, verbose = FALSE)
## 
## SMACOF Permutation Test
## Number of objects: 52 
## Number of replications (permutations): 100 
## 
## Observed stress value: 0.175 
## p-value: <0.001

stress_levels <- tibble(k = numeric(), stress = numeric(), pval = numeric())
stress_levels <- stress_levels %>% add_row(k = 0, stress = 1)
for (i in (1:10)) {
  mds.tmp <- smacofSym(data.dissimilarity_matrix, ndim = i, type = "ordinal")
  pt <- permtest(db.mds, nrep = 100, verbose = FALSE)
  stress_levels <- stress_levels %>% add_row(k = i, stress = mds.tmp$stress, pval = pt$pval)
}

## Warning in smacofSym(delta = structure(c(0, 6.5, 5.66666666666667,
## 4.33333333333333, : Iteration limit reached! You may want to increase the itmax
## argument!

stress_levels

library(plotly)

## Warning: package 'plotly' was built under R version 4.0.2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

a <- list(zeroline = TRUE, tickmode = "linear", tick0 = 0)
t <- list(text = "Stress Plot")
plot_ly(stress_levels, x=~k, y=~stress) %>% add_lines() %>% layout(xaxis = a, title = t)

kmeans_clust <- kmeans(db.mds$confdist, centers = 4, nstart = 25)

library(plotly)

tmp <- as_tibble(unique(data$A))
colnames(tmp) <- c("scenario")

labels <- read_csv("/Users/Seba/Documents/DBentitieslabels.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   `A social media service was breached and your login credentials were stolen` = col_character(),
##   `social media` = col_character(),
##   `login credentials` = col_character()
## )

colnames(labels) <- c("scenario", "company", "info")

plot.frame <- as_tibble(tmp) %>% left_join(labels)

## Joining, by = "scenario"

plot.frame <- cbind(plot.frame, db.mds$conf)

plot.frame$info <- str_trunc(plot.frame$info, 15, side = "right")
plot.frame$company <- str_trunc(plot.frame$company, 20, side = "right")


plot_ly(plot.frame, x = ~D1, y = ~D2) %>% add_markers(color = ~company) %>% add_text(text = ~info, textposition = 'top center')

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

#geom.text.size = 3
#ggplot(plot.frame) + geom_point(stat = "identity", position = "identity", aes(x = D1, y=D2, colour = factor(value)), size = 1, alpha = 0.7) + geom_text(aes(x = D1, y=D2, #label=identity, color=factor(value)), size = 4, nudge_y = -0.09)

Data Breach Taxonomy Study

Loading and Preparing the Data

Running MDS