Code-thesis-sustainable-PM,-Anna-Grether.R

#Libraries
library(openxlsx)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggpubr)

## Loading required package: ggplot2

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ✔ readr     2.1.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ car::recode()   masks dplyr::recode()
## ✖ purrr::some()   masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(rstatix)

## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(readxl)
library(mosaic)

## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Attaching package: 'mosaic'
## 
## The following object is masked from 'package:Matrix':
## 
##     mean
## 
## The following objects are masked from 'package:rstatix':
## 
##     cor_test, prop_test, t_test
## 
## The following object is masked from 'package:purrr':
## 
##     cross
## 
## The following objects are masked from 'package:car':
## 
##     deltaMethod, logit
## 
## The following object is masked from 'package:ggplot2':
## 
##     stat
## 
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## 
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
##     quantile, sd, t.test, var
## 
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum

library(epiDisplay)

## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:rstatix':
## 
##     select
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: nnet
## 
## Attaching package: 'epiDisplay'
## 
## The following object is masked from 'package:lattice':
## 
##     dotplot
## 
## The following object is masked from 'package:ggplot2':
## 
##     alpha

library(brant)
library(pgirmess) #Posthoc of Friedmann
library(data.table)

## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

library(flextable)

## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:mosaic':
## 
##     surround
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## The following objects are masked from 'package:ggpubr':
## 
##     border, font, rotate

library(fancycut)
library(lsr)
library(ufs)

## 
## Attaching package: 'ufs'
## 
## The following object is masked from 'package:lsr':
## 
##     cramersV

library(janitor)

## 
## Attaching package: 'janitor'
## 
## The following object is masked from 'package:rstatix':
## 
##     make_clean_names
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(psych)

## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:pgirmess':
## 
##     shannon
## 
## The following objects are masked from 'package:epiDisplay':
## 
##     alpha, cs, lookup
## 
## The following objects are masked from 'package:mosaic':
## 
##     logit, rescale
## 
## The following object is masked from 'package:car':
## 
##     logit
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(cowplot)

## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:mosaic':
## 
##     theme_map
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
## 
## The following object is masked from 'package:ggpubr':
## 
##     get_legend

library(rcompanion)

## 
## Attaching package: 'rcompanion'
## 
## The following object is masked from 'package:psych':
## 
##     phi

library(DescTools)

## 
## Attaching package: 'DescTools'
## 
## The following objects are masked from 'package:psych':
## 
##     AUC, ICC, SD
## 
## The following object is masked from 'package:data.table':
## 
##     %like%
## 
## The following object is masked from 'package:mosaic':
## 
##     MAD
## 
## The following object is masked from 'package:car':
## 
##     Recode

library(ordinal)

## 
## Attaching package: 'ordinal'
## 
## The following object is masked from 'package:dplyr':
## 
##     slice

library(stargazer)

## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

# Needs to be adapted depending on person looking at the code
setwd("/Users/annagrether/Desktop/BA R")
data <- read.csv("~/Desktop/BA R/data.csv", sep=";")
mypath <- tempfile(pattern = "tables", tmpdir="/Users/annagrether/Desktop/BA R", fileext = ".docx")

Cleaning the data

## Columns
data[ , c('external_lfdn', 'tester', 'dispcode', 'lastpage', 'quality', 'browser', 
          'referer', 'device_type', 'quota', 'quota_assignment', 'quota_rejected_id', 
          'page_history', 'hflip', 'vflip', 'output_mode', 'javascript', 'flash', 
          'session_id', 'language', 'cleaned', 'ats', 'datetime', 'date_of_last_access',
          'date_of_first_mail', 'rts6683840', 'rts6683939', 'rts6683945', 'rts6683978', 
          'rts6683997', 'rts6684002', 'rts6684005', 'rts6689118')] <- list(NULL)

## Rows/participants

Only participants that haven’t already been removed in the previous rows get removed in the rows. So if a participant has been removed in the first row because a senseless answer was given in question “A3_whydifficulty”, the same person will not appear in the row with duration selecting even if the duration was bellow 2 minutes. This is because the same person can’t be removed twice. In the end 152 from 256 observations/participants are left.

drops <- c(41, 61, 62, 63, 64, 99, 112, 113, 114, 126, 127, 244, 245, 246,                           #Observed A3_whydifficulty: senseless answers (eg. Yes) or multiple times the same answers over some rows
           129, 130, 131, 132, 133, 135, 136, 137, 138, 139, 141, 142, 143, 144,                     #Same duration and exact same answers over some rows
           145, 147, 148, 149, 150, 151, 154, 155, 156, 157, 159, 160, 161, 163,                     #Same duration and exact same answers over some rows
           165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 207, 208, 210, 211,                     #Same duration and exact same answers over some rows
           212, 214, 215, 216, 219, 220, 221, 222, 224, 225, 226, 227, 229,                          #Same duration and exact same answers over some rows
           197, 198, 199, 205, 206, 209, 213, 217, 218, 223, 228, 230, 231, 232, 242, 243, 244, 249, #Control question (v_163): Answered Yes(1) instead of No (2)
           43, 83, 84, 85, 87, 200, 240,                                                             #Age over 100 years old. Entry in column "Age" is bellow 1923 or doesn't have a number as an answer
           53, 77, 128, 134, 140, 146, 153, 158, 162, 164, 170)                                      #Duration is under 2 minutes and not -1. -1 implies that there was an error in the system. 
data <- data[-drops,]

#Renaming of columns
data <- data %>% 
  dplyr::rename("Economic" = "A1_ec", 
                "Environmental" = "A1_env", 
                "Social" = "A1_so", 
                "Economic_Difficulty" = "A2_ec_difficulty", 
                "Economic_Action" = "A2_ec_action", 
                "Economic_Priority" = "A2_ec_priority", 
                "Environmental_Difficulty" = "A2_env_difficulty", 
                "Environmental_Action" = "A2_env_action", 
                "Environmental_Priority" = "A2_env_priority",
                "Social_Difficulty" = "A2_so_difficulty", 
                "Social_Action" = "A2_so_action", 
                "Social_Priority" = "A2_so_priority",
                "Proactive_Steps" = "B1_proactivesteps", 
                "Impact_of_Reputation" = "B2_ImpactRep", 
                "Economic_Mindset" = "C1_ec.infl.net0", 
                "Costs_for_offsetting_up_0" = "C2_0cost", 
                "Costs_for_offsetting_up_19" = "C2_19cost",
                "Costs_for_offsetting_up_39" = "C2_39cost", 
                "Costs_for_offsetting_up_59" = "C2_59cost", 
                "Costs_for_offsetting_up_79" = "C2_79cost", 
                "Costs_for_offsetting_up_100" = "C2_100cost", 
                "Considering_future_costs" = "C3_futurecost",
                "Engagement_of_Project_Managers" = "C4_0ConlevelPM", 
                "Frequency_usage" = "EY1_freq.usage",
                "Experience_of_standard" = "FYuseyes1_experience",
                "goals_0" = "EY2N1_0goals", 
                "goals_9" = "EY2N1_9goals",
                "goals_19" = "EY2N1_19goals",
                "goals_29" = "EY2N1_29goals",
                "goals_39" = "EY2N1_39goals",
                "goals_49" = "EY2N1_49goals", 
                "Tradeoff" = "EY3N2_tradeoff",
                "Understanding_tools" = "GY1_levelofunderstanding", 
                "Sufficiency" = "GY2_sufficient", 
                "Knowing" = "D1KnowGPM_YN", 
                "Training" = "TrainingNo",
                "Consider_for_future" = "FN1_consider",
                "Why_not_consider" = "FN1_whyconsider",
                "Improvement_Standard" = "GY3_improvement")

# Add ID column at beginning
data <- tibble::rowid_to_column(data, "ID") 

# Defining NAs 
data[data == -99 | data == -77 | data == -66] <- NA
data$Economic[data$Economic == 0] <- NA
data$Environmental[data$Environmental == 0] <- NA
data$Social[data$Social == 0] <- NA
data$Economic_Difficulty[data$Economic_Difficulty == 0] <- NA
data$Economic_Action[data$Economic_Action == 0] <- NA
data$Economic_Priority[data$Economic_Priority == 0] <- NA
data$Environmental_Difficulty[data$Environmental_Difficulty == 0] <- NA
data$Environmental_Action[data$Environmental_Action == 0] <- NA
data$Environmental_Priority[data$Environmental_Priority == 0] <- NA
data$Social_Difficulty[data$Social_Difficulty == 0] <- NA
data$Social_Action[data$Social_Action == 0] <- NA
data$Social_Priority[data$Social_Priority == 0] <- NA
data$Proactive_Steps[data$Proactive_Steps == 0] <- NA
data$Impact_of_Reputation[data$Impact_of_Reputation == 0] <- NA
data$Economic_Mindset[data$Economic_Mindset == 0] <- NA
data$Costs_for_offsetting_up_0[data$Costs_for_offsetting_up_0 == 0] <- NA
data$Costs_for_offsetting_up_19[data$Costs_for_offsetting_up_19 == 0] <- NA
data$Costs_for_offsetting_up_39[data$Costs_for_offsetting_up_39 == 0] <- NA
data$Costs_for_offsetting_up_59[data$Costs_for_offsetting_up_59 == 0] <- NA
data$Costs_for_offsetting_up_79[data$Costs_for_offsetting_up_79 == 0] <- NA
data$Costs_for_offsetting_up_100[data$Costs_for_offsetting_up_100 == 0] <- NA
data$Engagement_of_Project_Managers[data$Engagement_of_Project_Managers == 0] <- NA
data$goals_0[data$goals_0 == 0] <- NA
data$goals_9[data$goals_9 == 0] <- NA
data$goals_19[data$goals_19 == 0] <- NA
data$goals_29[data$goals_29 == 0] <- NA
data$goals_39[data$goals_39 == 0] <- NA
data$goals_49[data$goals_49 == 0] <- NA
data$Tradeoff[data$Tradeoff == 0] <- NA
data$Experience_of_standard[data$Experience_of_standard == 0] <- NA
data$Consider_for_future[data$Consider_for_future == 0] <- NA
data$Sector[data$Sector == 0] <- NA

# Recoding data; all 0s are No's and all 1s are Yes' 
data$Training <- ifelse(data$Training == 0, 1, 0) # From 0:Training, 1: No training to 1:Training, 0:No training 
data$Training <- factor(data$Training, levels = c(0,1), labels = c("No training", "Training"))
data$Consider_for_future <- ifelse(data$Consider_for_future == 3, 0, 1) #From 1: Yes consider and 3:Not consider to 1: Consider, 2: not consider
data$Consider_for_future <- factor(data$Consider_for_future, levels = c(0,1), labels = c("No", "Yes"))

# Writing some values as factors and giving labels
data$Knowing <- factor(data$Knowing,
                       labels = c("Yes", "No"), 
                       ordered = TRUE)

# 1&2 together und 4&5 together --> because little people at 1 and 5 
data <- data %>%
  mutate(Proactive_Steps_zsmf = fancycut::fancycut(as.numeric(Proactive_Steps),
                                                   Unlikely = "[1, 2]",
                                                   Neutral = "3",
                                                   Likely = "[4, 5]",
                                                   na.bucket = 'missing'))
data$Proactive_Steps_zsmf[data$Proactive_Steps_zsmf == "missing"] <- NA
data$Proactive_Steps_zsmf <- droplevels(data$Proactive_Steps_zsmf)

# Median of duration 
summary(data$duration)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    -1.0   448.5   701.0   943.7  1145.2 11123.0

Hypotheses ** H1: Of the three pillars of sustainability, environmental sustainability has arrived the least in project management.

## Data preparation 
# Gather columns Economic, Environmental and Social into long format and convert id and pillar into factor variables
# to be able to analyse first question in itself.
data.H1_wide_pillars <- data %>% dplyr::select(ID, Economic:Social)
data.H1_long_pillars <- data.H1_wide_pillars %>% 
  gather(key = "pillar", value = "pillar_score", Economic, Environmental, Social) %>%
  convert_as_factor(ID, pillar)

# Gather columns Economic Difficulty, Economic Action and Economic Priority into long format (-> Economic_DAP) and convert ID and Economic into factor variables + same for Envrionment and Social 
data.H1_wide_EconomicDAP <- data %>% dplyr::select(ID, Economic_Difficulty:Economic_Priority)
data.H1_long_EconomicDAP <- data.H1_wide_EconomicDAP %>% 
  gather(key = "Economic_DAP", value = "rank_ec", Economic_Difficulty, Economic_Action, Economic_Priority) %>%
  convert_as_factor(ID, Economic_DAP)

data.H1_wide_EnvironmentalDAP <- data %>% dplyr::select(ID, Environmental_Difficulty:Environmental_Priority)
data.H1_long_EnvironmentalDAP <- data.H1_wide_EnvironmentalDAP %>% 
  gather(key = "Environment_DAP", value = "rank_env", Environmental_Difficulty, Environmental_Action, Environmental_Priority) %>%
  convert_as_factor(ID, Environment_DAP)

data.H1_wide_SocialDAP <- data %>% dplyr::select(ID, Social_Difficulty:Social_Priority)
data.H1_long_SocialDAP <- data.H1_wide_SocialDAP %>% 
  gather(key = "Social_DAP", value = "rank_so", Social_Difficulty, Social_Action, Social_Priority) %>%
  convert_as_factor(ID, Social_DAP)

# Bringing all three data sets DAP together
data.H1_long_pillars <- lsr::sortFrame(data.H1_long_pillars, ID)
data.H1_long_EconomicDAP <- lsr::sortFrame(data.H1_long_EconomicDAP, ID)
data.H1_long_EnvironmentalDAP <- lsr::sortFrame(data.H1_long_EnvironmentalDAP, ID)
data.H1_long_SocialDAP <- lsr::sortFrame(data.H1_long_SocialDAP, ID)

data.H1_long <- cbind(data.H1_long_pillars,data.H1_long_EconomicDAP[,-1], data.H1_long_EnvironmentalDAP[,-1], data.H1_long_SocialDAP[,-1])

# Removing unnecessary data frames 
rm(data.H1_wide_EconomicDAP, data.H1_long_EconomicDAP, data.H1_wide_EnvironmentalDAP, data.H1_long_EnvironmentalDAP, data.H1_wide_SocialDAP, data.H1_long_SocialDAP, data.H1_long_pillars, data.H1_wide_pillars)

# Making factors and labels
data.H1_long$pillar_score <- factor(data.H1_long$pillar_score, labels = c("Very little extent", "Little extent", "Neutral", "Large extent", "Very large extent"), 
                                    ordered = TRUE)

Question1

## Descriptive Statistics 
# Summary tables with median, min, max, NAs, Q1 etc. (not mean and sd bc ordinal data)
round(mosaic::favstats(~ Economic, data = data), 2) #missing: NAs

##  min Q1 median Q3 max mean   sd   n missing
##    1  3      4  4   5 3.53 0.89 149       3

round(mosaic::favstats(~ Environmental, data = data), 2)

##  min Q1 median Q3 max mean   sd   n missing
##    1  3      3  4   5 3.18 0.97 148       4

round(mosaic::favstats(~ Social, data = data), 2)

##  min Q1 median Q3 max mean   sd   n missing
##    1  3      3  4   5  3.3 0.94 148       4

# Amount of people that answered what (very large to very little extent) per pillar: Table 
table(data.H1_long$pillar, data.H1_long$pillar_score)

##                
##                 Very little extent Little extent Neutral Large extent
##   Economic                       3            14      50           65
##   Environmental                  7            25      64           39
##   Social                         4            24      58           48
##                
##                 Very large extent
##   Economic                     17
##   Environmental                13
##   Social                       14

# 1&2 together und 4&5 together --> because little people at 1 and 5 
data.H1_long <- data.H1_long %>%
  mutate(pillar_score_zsmf = fancycut::fancycut(as.numeric(pillar_score),
                                                Little_extent = "[1, 2]",
                                                Neutral = "3",
                                                Large_extent = "[4, 5]",
                                                na.bucket = 'missing'))
data.H1_long$pillar_score_zsmf[data.H1_long$pillar_score_zsmf == "missing"] <- NA

data.H1_long$pillar_score_zsmf <- droplevels(data.H1_long$pillar_score_zsmf)

# Visualization of amount of answers: Stacked barplot for percentage of answers in "pillars" per "score" (extent of being addressed) 
data.H1_long %>% filter(!is.na(pillar) & !is.na(pillar_score)) %>% 
  ggplot(aes(x = factor(pillar), fill = factor(6-as.numeric(pillar_score)))) +
  geom_bar(position = "fill") +
  labs(x = "Pillar", fill = "Extent of being addressed") +
  # geom_text(aes(label=scales::percent(as.numeric(pillar))), position=position_stack(vjust=0.5), size=2) +
  scale_fill_discrete(labels = c("very large extent", "4", "neutral", "2", "very little extent")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  theme_classic() + #theme_(bw, minimal): background
  ggtitle("The extent to which each pillar of sustainability\
is addressed in PM of the organization") +
  theme(text = element_text(size = 14))

## Connection with H2
# Give labels to Proactive steps in data
data$Proactive_Steps <- factor(data$Proactive_Steps, 
                               labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
                               ordered = TRUE)

# Is there correlation between the first statement of how likely it is that the environmental pillar of sustainability is addressed and to what extent Proactive steps are taken towards carbon neutrality?

A moderate to strong correlation btw the extent to which environemntal sustainability is addresses and likelihood a company takes steps towards carbon neutrality. –> makes sense. Weak correlations for the other 2.

cor.test(as.numeric(data$Proactive_Steps), data$Environmental, method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 277254, p-value = 3.23e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##      rho 
## 0.465446

cor.test(as.numeric(data$Proactive_Steps), data$Economic, method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 348581, p-value = 2.298e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3415498

cor.test(as.numeric(data$Proactive_Steps), data$Social, method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 332164, p-value = 8.293e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3595797

# Plot on "to what extent company takes proactive steps to reach carbon neutrality and the likelihood that it addresses Environmental sustainability" 
data %>% filter(!is.na(Proactive_Steps) & !is.na(Environmental)) %>% 
  ggplot(aes(x = factor(Proactive_Steps), fill = factor(Environmental))) +
  geom_bar(position = "fill") +
  labs(x = "Proactive Steps", fill = "Environmental") +
  scale_fill_discrete(labels = c("very unlikely", "2", "3", "4", "very likely")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  ggtitle("Proactive steps taken with likelihood of addressing\
environmental sustainability") + 
  theme(text = element_text(size = 14)) +
  theme_classic()

## Regression analysis
# Friedmann test: Is there a significant difference between the tendencies of multiple dependent variables?

H0: No association between the two variables/variables are the same –> No relationship between the pillar of sustainability and extent to which it has arrived in PM. H1: There is a significant relationship.

missings <- data %>% dplyr::filter(is.na(Economic) | is.na(Environmental) |is.na(Social)) # missing people out; necessary for kendallW
stats::friedman.test(y=as.matrix(data %>% dplyr::select(Economic:Social)), groups = data$ID)

## 
##  Friedman rank sum test
## 
## data:  as.matrix(data %>% dplyr::select(Economic:Social))
## Friedman chi-squared = 22.665, df = 2, p-value = 1.198e-05

# Effect size + Friedmann
DescTools::KendallW(t(data %>% dplyr::filter(!ID %in% missings$ID) %>% dplyr::select(Economic:Social)), correct = TRUE, test = TRUE)

## 
##  Kendall's coefficient of concordance Wt
## 
## data:  t(data %>% dplyr::filter(!ID %in% missings$ID) %>% dplyr::select(Economic:Social))
## Kendall chi-squared = 22.665, df = 2, subjects = 3, raters = 148,
## p-value = 1.198e-05
## alternative hypothesis: Wt is greater 0
## sample estimates:
##         Wt 
## 0.07657073

rcompanion::kendallW(t(data %>% dplyr::filter(!ID %in% missings$ID) %>% dplyr::select(Economic:Social)), correct = TRUE, ci=TRUE) #mit CI -- the effect size lies in the CI with 95% confidence --> not necessary

##        W lower.ci upper.ci
## 1 0.0766   0.0277    0.145

Effect size is 0.0766: small effect Effect size, tells me how much the difference btw the groups is –> depending on the source/test (kendallW) it is low, medium or high

# Posthoc to determine where the difference is
pairwise.wilcox.test(as.numeric(data.H1_long$pillar_score), data.H1_long$pillar,paired = T, p.adjust.method = "holm")

## 
##  Pairwise comparisons using Wilcoxon signed rank test with continuity correction 
## 
## data:  as.numeric(data.H1_long$pillar_score) and data.H1_long$pillar 
## 
##               Economic Environmental
## Environmental 0.00027  -            
## Social        0.00921  0.21750      
## 
## P value adjustment method: holm

# Effect size for posthoc test
coin::wilcoxsign_test(data$Economic ~ data$Environmental)@statistic@teststatistic/sqrt(148)

## [1] 0.3470781

coin::wilcoxsign_test(data$Economic ~ data$Social)@statistic@teststatistic/sqrt(148)

## [1] 0.2696815

coin::wilcoxsign_test(data$Social ~ data$Environmental)@statistic@teststatistic/sqrt(148)

## [1] 0.1001237

# Hypothesis test: Proportion test

Such that the pillar of sustainability has arrived in PM, at least 51% of participants must have chosen a score of 4 (large extent) or above.

# 1. Data preparation 
data$economic_arrived <- ifelse(data$Economic >= 4, 1, 0) #0:not arrived, 1:arrived
data$economic_arrived <- factor(data$economic_arrived, levels = c(0,1), labels = c("Not arrived", "Arrived"))
data$environmental_arrived <- ifelse(data$Environmental >= 4, 1, 0)
data$environmental_arrived <- factor(data$environmental_arrived, levels = c(0,1), labels = c("Not arrived", "Arrived"))
data$social_arrived <- ifelse(data$Social >= 4, 1, 0)
data$social_arrived <- factor(data$social_arrived, levels = c(0,1), labels = c("Not arrived", "Arrived"))

# 2. Looking at amount of answers of sustainability "arrived" and "not arrived" in the pillars. 
# Table putting all 3 relative amounts of arrived, not arrived of the 3 pillars together 
table1 <- data.table::as.data.table(epiDisplay::tab1(data$economic_arrived, graph = FALSE, cum.percent = TRUE))
table1 <- table1[c(1,2), c(1)]
colnames(table1) <- "Arrival in PM"
table1[1,] <- "Not arrived"
table1[2,] <- "Arrived"

table1_ec <- data.table::as.data.table(epiDisplay::tab1(data$economic_arrived, graph = FALSE, cum.percent = TRUE))
table1_ec <- table1_ec[1:2, 5]
colnames(table1_ec) <- "Economic"
table1 <- cbind(table1, table1_ec)
table1_env <- data.table::as.data.table(epiDisplay::tab1(data$environmental_arrived, graph = FALSE, cum.percent = TRUE))
table1_env <- table1_env[1:2, 5]
colnames(table1_env) <- "Environmental"
table1 <- cbind(table1, table1_env)
table1_so <- data.table::as.data.table(epiDisplay::tab1(data$social_arrived, graph = FALSE, cum.percent = TRUE))
table1_so <- table1_so[1:2, 5]
colnames(table1_so) <- "Social"
table1 <- cbind(table1, table1_so)
table1 <- adorn_totals(table1, where = "row", name = "Total [%]")
ft1 <- flextable(table1)
ft1 <- flextable::colformat_double(ft1, digits = 1)
ft1 <- bold(ft1, bold=T, part = "header")
flextable::autofit(ft1)

Arrival in PM	Economic	Environmental	Social
Not arrived	45.0	64.9	58.1
Arrived	55.0	35.1	41.9
Total [%]	100.0	100.0	100.0

prop.test(x = 83, n = 153, p = 0.5, alternative = "greater")

## 
##  1-sample proportions test with continuity correction
## 
## data:  83 out of 153
## X-squared = 0.94118, df = 1, p-value = 0.166
## alternative hypothesis: true p is greater than 0.5
## 95 percent confidence interval:
##  0.4728247 1.0000000
## sample estimates:
##         p 
## 0.5424837

Not significant; Not more than 50% of the people have answered more than 4 significantly –> Economic sustainability hasn’t arrived in PM with the definition given Just because it’s not significant, it doesn’t necessarily mean that Economic sustainability really hasn’t arrived in PM. It just couldn’t be shown that significantly more than half of the participants chose 4 (large extent) or 5 (very large extent). Still more than half chose 4 or higher. It just isn’t significant.

Questions 2,3,4

##Descriptive Statistics 
#Amount of people that answered what (very large to very little extent) per pillar: Table

Difficulty: 1easiet,3most difficult, Action: 1least done, 3most done, Priority: 1highest, 3lowest

table(data.H1_long$Economic_DAP, data.H1_long$rank_ec)

##                      
##                        1  2  3
##   Economic_Action     19 67 54
##   Economic_Difficulty 57 69 16
##   Economic_Priority   66 54 23

table(data.H1_long$Environment_DAP, data.H1_long$rank_env)

##                           
##                             1  2  3
##   Environmental_Action     43 75 22
##   Environmental_Difficulty 30 74 43
##   Environmental_Priority   41 70 30

table(data.H1_long$Social_DAP, data.H1_long$rank_so)

##                    
##                      1  2  3
##   Social_Action     46 75 22
##   Social_Difficulty 33 77 34
##   Social_Priority   35 65 42

#Frequency tables in percentages
(table.Economic_DAP <- round(prop.table(table(data.H1_long$Economic_DAP, data.H1_long$rank_ec), 1), 4)*100)

##                      
##                           1     2     3
##   Economic_Action     13.57 47.86 38.57
##   Economic_Difficulty 40.14 48.59 11.27
##   Economic_Priority   46.15 37.76 16.08

(table.Environment_DAP <- round(prop.table(table(data.H1_long$Environment_DAP, data.H1_long$rank_env), 1), 4)*100)

##                           
##                                1     2     3
##   Environmental_Action     30.71 53.57 15.71
##   Environmental_Difficulty 20.41 50.34 29.25
##   Environmental_Priority   29.08 49.65 21.28

(table.Social_DAP <- round(prop.table(table(data.H1_long$Social_DAP, data.H1_long$rank_so), 1), 4)*100)

##                    
##                         1     2     3
##   Social_Action     32.17 52.45 15.38
##   Social_Difficulty 22.92 53.47 23.61
##   Social_Priority   24.65 45.77 29.58

Visualization of amount of answers: Stacked barplot for percentage of answers in “pillars” per “Question of action, difficulty and priority” and their rank “Minimal” summarizes easiest to achieve, lowest priority, and least action taken “Maximal” summarizes hardest to achieve, highest priority, and most action taken.

g1 <- data.H1_long %>% filter(!is.na(Economic_DAP) & !is.na(rank_ec)) %>% 
  ggplot(aes(x = factor(Economic_DAP), fill = factor(4-as.numeric(rank_ec)))) +
  geom_bar(position = "fill") +
  labs(x = "Economic", fill = "Rank") +
  scale_fill_discrete(labels = c("maximal", "2", "minimal")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  scale_x_discrete(labels = c("Priority", "Difficulty", "Action")) + 
  scale_fill_manual(values = c("#F8766D","#00C08D", "#619CFF")) + 
  theme_minimal() + #theme_(bw, minimal): background
  theme(axis.text.x = element_blank(), axis.title.x = element_blank()) +
  ggtitle("Ranking of each pillar in their priority, difficulty\
and the action being taken to be achieved") + 
  theme(text = element_text(size = 14)) +
  coord_flip()

## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

g2 <- data.H1_long %>% filter(!is.na(Environment_DAP) & !is.na(rank_env)) %>% 
  ggplot(aes(x = factor(Environment_DAP), fill = factor(4-as.numeric(rank_env)))) +
  geom_bar(position = "fill") +
  labs(x = "Environmental", fill = "Rank") +
  scale_fill_discrete(labels = c("maximal", "2", "minimal")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  scale_x_discrete(labels = c("Priority", "Difficulty", "Action")) +
  scale_fill_manual(values = c("#F8766D","#00C08D", "#619CFF")) +
  theme_minimal() + #theme_(bw, minimal): background
  theme(axis.text.x = element_blank(), axis.title.x = element_blank()) +
  theme(text = element_text(size = 14)) +
  coord_flip()

## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

g3 <- data.H1_long %>% filter(!is.na(Social_DAP) & !is.na(rank_so)) %>% 
  ggplot(aes(x = factor(Social_DAP), fill = factor(4-as.numeric(rank_so)))) +
  geom_bar(position = "fill") +
  labs(x = "Social", fill = "Rank") +
  scale_fill_discrete(labels = c("maximal", "2", "minimal")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  scale_x_discrete(labels = c("Priority", "Difficulty", "Action")) + 
  scale_fill_manual(values = c("#F8766D","#00C08D", "#619CFF")) +
  theme_minimal() + #theme_(bw, minimal): background
  theme(text = element_text(size = 14)) +
  coord_flip()

## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

plot_grid(g1, g2, g3, align = "v", ncol = 1)

Question 5

# Making data frame 
#df.open1 <- dplyr::select(data, Economic_Difficulty:A3_whydifficulty)
#df.open1 <- na.omit(df.open1)
#df.open1 <- as.data.table(df.open1)

# Export as an excel file
# write.xlsx(df.open1, file = "df.open1.xlsx", overwrite = TRUE)

H2: The higher the implied temperature rise (ITR) of an industry group, the more likely it is that project management within that industry group takes action to reach carbon neutrality.

## Data preparation
ITR <- read_excel("ITR by GICS industry group.xlsx")
ITR <- cbind(n = 1:nrow(ITR), ITR) 

data.H2 <- dplyr::select(data, ID, Sector, Proactive_Steps, Proactive_Steps_zsmf, Impact_of_Reputation)
data.H2 <- data.H2 %>% 
  dplyr::mutate(ITR = case_when(Sector == 1 ~ '6.0', Sector == 2 ~ '3.7', Sector == 3 ~ '3.5',
                                Sector == 4 ~ '2.9', Sector == 5 ~ '2.8', Sector == 6 ~ '2.5',
                                Sector == 7 ~ '2.2', Sector == 8 ~ '2.2', Sector == 9 ~ '2.2',
                                Sector == 10 ~ '2.0', Sector == 11 ~ '1.9', Sector == 12 ~ '1.9',
                                Sector == 13 ~ '1.9', Sector == 14 ~ '1.8', Sector == 15 ~ '1.8',
                                Sector == 16 ~ '1.8', Sector == 17 ~ '1.7', Sector == 18 ~ '1.6', 
                                Sector == 19 ~ '1.6', Sector == 20 ~ '1.6', Sector == 21 ~ '1.4', 
                                Sector == 22 ~ '1.4', Sector == 23 ~ '1.4', Sector == 24 ~ '1.4'))
data.H2$ITR <- as.numeric(data.H2$ITR)

data.H2$Proactive_Steps <- factor(data.H2$Proactive_Steps, 
                                  labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
                                  ordered = TRUE)
data.H2$Impact_of_Reputation <- factor(data.H2$Impact_of_Reputation, labels = c("not at all important", "low importance", "neutral", "very important", "extremely important"),
                                       ordered = TRUE)

Question1

## Descriptive Statistics 
# Frequency table with median 
epiDisplay::tab1(data.H2$Proactive_Steps, graph = FALSE, cum.percent = TRUE) #percentages more relevant

## data.H2$Proactive_Steps : 
##               Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## very unlikely         9      5.9        5.9      6.0        6.0
## unlikely             18     11.8       17.8     12.0       18.0
## neutral              48     31.6       49.3     32.0       50.0
## likely               55     36.2       85.5     36.7       86.7
## very likely          20     13.2       98.7     13.3      100.0
## NA's                  2      1.3      100.0      0.0      100.0
##   Total             152    100.0      100.0    100.0      100.0

# Amount&Percentage of people in Proactive Steps
epiDisplay::tab1(data.H2$Proactive_Steps_zsmf, graph = FALSE, cum.percent = TRUE)

## data.H2$Proactive_Steps_zsmf : 
##          Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## Unlikely        27     17.8       17.8       18         18
## Neutral         48     31.6       49.3       32         50
## Likely          75     49.3       98.7       50        100
## NA's             2      1.3      100.0        0        100
##   Total        152    100.0      100.0      100        100

# Present epidisplay nicely 
t.H2.1 <- as.data.table(epiDisplay::tab1(data.H2$Proactive_Steps, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H2.1 <- t.H2.1[,c(2,3,4,6,7)] 
colnames(t.H2.1) <- c("Category", "Frequency", "%", "valid %", "cum. valid %") 
t.H2.1.ft <- flextable(t.H2.1)
t.H2.1.ft <- bold(t.H2.1.ft, bold=T, part = "header")
autofit(t.H2.1.ft)

Category	Frequency	%	valid %	cum. valid %
very unlikely	9	5.9	6.0	6.0
unlikely	18	11.8	12.0	18.0
neutral	48	31.6	32.0	50.0
likely	55	36.2	36.7	86.7
very likely	20	13.2	13.3	100.0
NA's	2	1.3	0.0	100.0
Total	152	100.0	100.0	100.0

# Number of answers in a barplot 
ggplot(data = subset(data.H2, !is.na(Proactive_Steps)), aes(x = factor(Proactive_Steps))) +
  scale_x_discrete(name = "Proactive Steps") +
  geom_bar(aes(y=after_stat(count)/sum(after_stat(count))), fill = "#00C08D") +
  scale_y_continuous(labels=scales::percent, limits = c(0,0.5)) +
  ggtitle("Number of participants with increasing likelihood of taking\
proactive steps towards carbon neutrality") + 
  labs(y = "Percent") + 
  theme_light()

# Dataframe including Sector 14 which has 0 answers
sector_id <- list(Sector = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24))
sector_labels <- list(name = c("Energy", "Materials", "Automobiles & Components", "Utilities", "Capital Goods", "Food Beverage & Tobacco", "Semiconductors & Semiconductor Equipment",
                               "Consumber Durables & Apparel", "Consumer Discretionary Distribution & Retail", "Consumer Staples Distribution & Retail", "Banks", "Transportation",
                               "Technology Hardware & Equipment", "Equity Real Estate Investment Trusts (REITs)", "Consumer Services", "Financial Services", "Commercial & Professional Services", "Household & Personal Products",
                               "Health Care Equipment & Services", "Pharmaceuticals, Biotechnology & Life Sciences", "Media & Entertainment", "Telecommunication Services", "Software & Services", "Insurance"))

# Including Sector 14 into the barplot 
Sector_df1 <- data.frame(sector_id, sector_labels)
Sector_df2 <- data.H2 %>%                   
  group_by(Sector) %>%         
  dplyr::summarize(count = n()) 
Sector_df <- full_join(Sector_df1, Sector_df2, by = "Sector")
Sector_df[14, 3] <- 0

Sector_df <- na.omit(Sector_df)
Sector_df$name <- factor(Sector_df$name, levels = Sector_df$name)

At least 1 answer per sector except in sector 14 (Equity Real Estate Investment Trusts (REITs)). Most answers in the ‘Banks’ and ‘Financial services’ sector.

ggplot(data = Sector_df, aes(y = count, x = name)) +
  scale_x_discrete(name = "Industry Sector") +
  scale_y_continuous(name = "Count") +
  geom_bar(stat = "identity", fill = "#00C08D") +
  ggtitle("Number of participants per industry sector") + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        plot.title = element_text(hjust=0.0)) +
  theme(text = element_text(size = 14))

table(data.H2$Sector)

## 
##  1  2  3  4  5  6  7  8  9 10 11 12 13 15 16 17 18 19 20 21 22 23 24 
## 10 10  8 11 13 10  8  2  1  2 16  2  4  4 14 10  1  3  8  4  1  4  1

# Looking at ITRs 
round(favstats(~ ITR, data = data.H2), 2)

##  min  Q1 median  Q3 max mean   sd   n missing
##  1.4 1.8      2 2.9   6 2.52 1.15 147       5

# Boxplot: ITR per answer of Proactive steps 
data.H2 %>% filter(!is.na(Proactive_Steps)) %>% #!is.na: nicht NAs kommen weiter
  ggplot(aes(y = Proactive_Steps, x = ITR, fill = Proactive_Steps)) + #data.H2 darf ich hier nicht auch noch stehen haben
  labs(x = "ITR [°C]", y = "Proactive Steps") +
  scale_fill_manual(values = c("#E76BF3","#619CFF", "#00C08D", "#7CAE00", "#F8766D"), guide = guide_legend(reverse = TRUE)) +
  theme_minimal() +
  theme(text = element_text(size = 14)) +
  ggtitle("ITR per category of Proactive Steps") +
  geom_boxplot()

## Warning: Removed 4 rows containing non-finite values (`stat_boxplot()`).

# Spearman's correlation between ITR and likelihood of steps being taken towards carbon neutrality

Very weak non-significant correlations

cor.test(as.numeric(data.H2$Proactive_Steps), as.numeric(data.H2$ITR), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 492853, p-value = 0.5508
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.04976658

cor.test(as.numeric(data.H2$Proactive_Steps_zsmf), as.numeric(data.H2$ITR), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 457897, p-value = 0.159
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1171627

## Regression Analysis 
# OLR
H2.OLR <- polr(Proactive_Steps_zsmf ~ ITR, data = data.H2, Hess = TRUE)
summary(H2.OLR)

## Call:
## polr(formula = Proactive_Steps_zsmf ~ ITR, data = data.H2, Hess = TRUE)
## 
## Coefficients:
##      Value Std. Error t value
## ITR 0.2908     0.1543   1.885
## 
## Intercepts:
##                  Value   Std. Error t value
## Unlikely|Neutral -0.8705  0.4279    -2.0346
## Neutral|Likely    0.7336  0.4179     1.7555
## 
## Residual Deviance: 292.3737 
## AIC: 298.3737 
## (6 observations deleted due to missingness)

## Residual analysis
# summary table 
summary_table_H2.OLR <- coef(summary(H2.OLR))
pval1 <- round(pnorm(abs(summary_table_H2.OLR[, "t value"]),lower.tail = FALSE)* 2,3)
summary_table_H2.OLR <- cbind(summary_table_H2.OLR, "p value" = pval1) #p-value: 0.059 (just) not significant 
summary_table_H2.OLR #only p-value of IV (ITR) important; p > 0.05: no significant correlation btw. ITR and Proactive steps. --> just not significant

##                       Value Std. Error   t value p value
## ITR               0.2907705  0.1542909  1.884560   0.059
## Unlikely|Neutral -0.8705083  0.4278597 -2.034565   0.042
## Neutral|Likely    0.7335940  0.4178897  1.755473   0.079

pval1 <- pval1[1:5]
cbind(exp(cbind(OR = coef(H2.OLR), ci = confint(H2.OLR))),"p value" = pval1) #confidence interval, OR (odds ratio): When ITR gets higher by 1°C, the odds that we get a higher value (more likely) for Proactive Steps by 1.34 times. --> Odds increase by 34%

## Waiting for profiling to be done...

## Warning in cbind(exp(cbind(OR = coef(H2.OLR), ci = confint(H2.OLR))), `p value`
## = pval1): number of rows of result is not a multiple of vector length (arg 2)

##              OR       ci p value
## 2.5 %  1.337458 1.000437   0.059
## 97.5 % 1.337458 1.844209   0.042

brant::brant(H2.OLR) #not significant --> which is good!! --> because H0: Requirements of parallel regression assumption met

## -------------------------------------------- 
## Test for X2  df  probability 
## -------------------------------------------- 
## Omnibus      0.26    1   0.61
## ITR      0.26    1   0.61
## -------------------------------------------- 
## 
## H0: Parallel Regression Assumption holds

The correlation betwen the ITR and Proactive steps is just not significant. When the ITR(EV) rises by 1°C (1 unit) then the chance for a 1 unit increase in proactive steps increases by (1.34 times) 34% (OR = 1.34, p = 0.059).

Question 2

## Descriptive Statistics 
# Frequency table
epiDisplay::tab1(data.H2$Impact_of_Reputation, graph = FALSE, cum.percent = TRUE)

## data.H2$Impact_of_Reputation : 
##                      Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## not at all important         3      2.0        2.0      2.0        2.0
## low importance               6      3.9        5.9      4.0        6.0
## neutral                     56     36.8       42.8     37.1       43.0
## very important              66     43.4       86.2     43.7       86.8
## extremely important         20     13.2       99.3     13.2      100.0
## NA's                         1      0.7      100.0      0.0      100.0
##   Total                    152    100.0      100.0    100.0      100.0

# Present epidisplay nicely
t.H2.2 <- as.data.table(epiDisplay::tab1(data.H2$Impact_of_Reputation, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H2.2 <- t.H2.2[,c(2,3,4,6,7)] 
colnames(t.H2.2) <- c("Category", "Frequency", "%", "valid %", "cum. valid %") 
t.H2.2.ft <- flextable(t.H2.2)
t.H2.2.ft <- bold(t.H2.2.ft, bold=T, part = "header")
autofit(t.H2.2.ft)

Category	Frequency	%	valid %	cum. valid %
not at all important	3	2.0	2.0	2.0
low importance	6	3.9	4.0	6.0
neutral	56	36.8	37.1	43.0
very important	66	43.4	43.7	86.8
extremely important	20	13.2	13.2	100.0
NA's	1	0.7	0.0	100.0
Total	152	100.0	100.0	100.0

# Overview over the number of answers per point on the likert scale "Impact of Reputation" 
ggplot(data = subset(data.H2, !is.na(Impact_of_Reputation)), aes(Impact_of_Reputation)) +
  scale_x_discrete(name = "Impact of Reputation") +
  geom_bar(fill = "#00C08D") +
  ggtitle("Number of answers with increasing importance of reputation") +
  theme(text = element_text(size = 14)) +
  theme_light()

# Spearman's correlation between reputation and likelihood of steps being taken towards carbon neutrality

A rather moderate correlation

cor.test(as.numeric(data.H2$Impact_of_Reputation), as.numeric(data.H2$Proactive_Steps), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 321811, p-value = 1.291e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4162688

H3: The higher the likelihood that a company considers the long-term costs because of the anticipation of having to offset CO2 emissions, the more likely it is that project management within the company will take action to reach carbon neutrality.

## Data preparation
# Making data frame for H3
data.H3_wide <- dplyr::select(data, ID, Proactive_Steps, Proactive_Steps_zsmf, Economic_Mindset, 
                              Costs_for_offsetting_up_0, Costs_for_offsetting_up_19, Costs_for_offsetting_up_39, Costs_for_offsetting_up_59, Costs_for_offsetting_up_79, Costs_for_offsetting_up_100, 
                              Considering_future_costs, Engagement_of_Project_Managers)

# Defining variables as factors with labels
data.H3_wide$Economic_Mindset <- factor(data.H3_wide$Economic_Mindset, 
                                        labels = c("very little extent", "little extent", "neutral", "large extent", "very large extent"),
                                        ordered = TRUE)
data.H3_wide$Proactive_Steps <- factor(data.H3_wide$Proactive_Steps, 
                                       labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
                                       ordered = TRUE)
data.H3_wide$Considering_future_costs <- factor(data.H3_wide$Considering_future_costs,
                                                labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"), 
                                                ordered = TRUE)
data.H3_wide$Engagement_of_Project_Managers <- factor(data.H3_wide$Engagement_of_Project_Managers,
                                                      labels = c("very little extent", "little extent", "neutral", "large extent", "very large extent"), 
                                                      ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_0 <- factor(data.H3_wide$Costs_for_offsetting_up_0,
                                                 labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"), 
                                                 ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_19 <- factor(data.H3_wide$Costs_for_offsetting_up_19,
                                                  labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"), 
                                                  ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_39 <- factor(data.H3_wide$Costs_for_offsetting_up_39,
                                                  labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"), 
                                                  ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_59 <- factor(data.H3_wide$Costs_for_offsetting_up_59,
                                                  labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"), 
                                                  ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_79 <- factor(data.H3_wide$Costs_for_offsetting_up_79,
                                                  labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"), 
                                                  ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_100 <- factor(data.H3_wide$Costs_for_offsetting_up_100,
                                                   labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"), 
                                                   ordered = TRUE)

Question 1

##Descriptive
# Frequency table 
epiDisplay::tab1(data.H3_wide$Economic_Mindset, graph = FALSE, cum.percent = TRUE)

## data.H3_wide$Economic_Mindset : 
##                    Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## very little extent         5      3.3        3.3      3.3        3.3
## little extent             25     16.4       19.7     16.7       20.0
## neutral                   48     31.6       51.3     32.0       52.0
## large extent              57     37.5       88.8     38.0       90.0
## very large extent         15      9.9       98.7     10.0      100.0
## NA's                       2      1.3      100.0      0.0      100.0
##   Total                  152    100.0      100.0    100.0      100.0

# Present epidisplay nicely
t.H3.1 <- as.data.table(epiDisplay::tab1(data.H3_wide$Economic_Mindset, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H3.1 <- t.H3.1[,c(2,3,4,6,7)] 
colnames(t.H3.1) <- c("Category", "Frequency", "%", "valid %", "cum. valid %") 
t.H3.1.ft <- flextable(t.H3.1)
t.H3.1.ft <- bold(t.H3.1.ft, bold=T, part = "header")
autofit(t.H3.1.ft)

Category	Frequency	%	valid %	cum. valid %
very little extent	5	3.3	3.3	3.3
little extent	25	16.4	16.7	20.0
neutral	48	31.6	32.0	52.0
large extent	57	37.5	38.0	90.0
very large extent	15	9.9	10.0	100.0
NA's	2	1.3	0.0	100.0
Total	152	100.0	100.0	100.0

# Overview over the number of answers per point on the likert scale "Economic_Mindset" 
ggplot(data = subset(data.H3_wide, !is.na(Economic_Mindset)), aes(Economic_Mindset)) +
  scale_x_discrete(name = "Influence of economic mindset") +
  geom_bar(fill = "#00C08D") +
  ggtitle("Number of answers per category of the variable 'economic mindset'") + 
  theme(text = element_text(size = 14)) +
  theme_light()

# Spearman's correlation between the extent of influence of economic sustainability mindset on decision-making processes related to carbon neutrality and likelihood of steps being taken towards carbon neutrality.

The more the economic sust. mindset is influencing decision making processes regarding carbon neutrality, the more likely they will take steps towards carbon neutrality to prevent future costs.

cor.test(as.numeric(data.H3_wide$Economic_Mindset), as.numeric(data.H3_wide$Proactive_Steps), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 318329, p-value = 7.939e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4225846

0.422: Moderate to strong correlation (depending on source)

# Visualization of numbwer of answers: Stacked barplot for percentage of answers of likelihood of Proactive steps per points on scale of "Economic mindset". 
data.H3_wide %>% filter(!is.na(Economic_Mindset) & !is.na(Proactive_Steps)) %>% 
  ggplot(aes(x = factor(Economic_Mindset), fill = factor(6-as.numeric(Proactive_Steps)))) + #x: EV, fill: ZV
  geom_bar(position = "fill") +
  labs(x = "Economic Mindset", fill = "Proactive Steps") +
  scale_fill_discrete(labels = c("very likely", "4", "3", "2", "very unlikely")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  theme_minimal() + #theme_(bw, minimal): background 
  theme(text = element_text(size = 14)) +
  ggtitle("Influence of the economic mindset on proactive steps\
towards carbon neutrality")

Question 2

## Data preparation
# Costs for offsetting into long format
data.H3_long <- data.H3_wide %>% 
  gather(key = "Offsetting", value = "score",
         Costs_for_offsetting_up_0, Costs_for_offsetting_up_19, Costs_for_offsetting_up_39,
         Costs_for_offsetting_up_59, Costs_for_offsetting_up_79, Costs_for_offsetting_up_100) %>% 
  convert_as_factor(ID, Offsetting) 
data.H3_long$score <- factor(data.H3_long$score, levels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
                             labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"), 
                             ordered = TRUE)

# Levels as factors and in the right order
data.H3_long$Offsetting <- factor(data.H3_long$Offsetting, levels=c("Costs_for_offsetting_up_0", "Costs_for_offsetting_up_19", "Costs_for_offsetting_up_39", "Costs_for_offsetting_up_59", "Costs_for_offsetting_up_79", "Costs_for_offsetting_up_100"))

## Descriptive
#Frequency table in percentages
(table.offsetting <- round(prop.table(table(data.H3_long$Offsetting, data.H3_long$score), 1), 4)*100)

##                              
##                               very unlikely unlikely neutral likely very likely
##   Costs_for_offsetting_up_0           10.07    11.41   28.19  20.81       29.53
##   Costs_for_offsetting_up_19           7.48     8.84   29.93  36.05       17.69
##   Costs_for_offsetting_up_39           7.04    11.97   38.73  32.39        9.86
##   Costs_for_offsetting_up_59          10.14    16.22   32.43  28.38       12.84
##   Costs_for_offsetting_up_79          18.00    22.00   21.33  24.67       14.00
##   Costs_for_offsetting_up_100         31.72    12.41   19.31  16.55       20.00

table.offsetting <- as.data.frame(table.offsetting)
table.offsetting <- pivot_wider(table.offsetting, names_from = Var2, values_from = Freq)
table.offsetting$Var1 <- car::recode(table.offsetting$Var1, "'Costs_for_offsetting_up_0'='Up 0%';'Costs_for_offsetting_up_19'='Up 19%'; 'Costs_for_offsetting_up_39'='Up 39%';'Costs_for_offsetting_up_59'='Up 59%';'Costs_for_offsetting_up_79'='Up 79%';'Costs_for_offsetting_up_100'='Up 100%'")

# Looking at amount of answers of the points on the likert scale (likelihood of taking action to reach carbon neutrality). 
# Table with increasing costs and likelihood
colnames(table.offsetting) <- c("Mandatory costs for Offsetting", "very unlikely", "unlikely", "neutral", "likely", "very likely")
ft2 <- flextable(table.offsetting)
ft2 <- flextable::colformat_double(ft2, digits = 1)
ft2 <- bold(ft2, bold=T, part = "header")
flextable::autofit(ft2)

Mandatory costs for Offsetting	very unlikely	unlikely	neutral	likely	very likely
Up 0%	10.1	11.4	28.2	20.8	29.5
Up 19%	7.5	8.8	29.9	36.0	17.7
Up 39%	7.0	12.0	38.7	32.4	9.9
Up 59%	10.1	16.2	32.4	28.4	12.8
Up 79%	18.0	22.0	21.3	24.7	14.0
Up 100%	31.7	12.4	19.3	16.6	20.0

# Stacked barplot for percentage of answers of likelihood of company taking action to reach carbon neutrality per increasing costs for offsetting emissions. 
data.H3_long %>% filter(!is.na(Offsetting) & !is.na(score)) %>% 
  ggplot(aes(x = factor(Offsetting), fill = factor(score))) +
  geom_bar(position = "fill") +
  labs(x = "Costs for offsetting", fill = "Likelihood of company \
taking action to achieve\
carbon neutrality") +
  scale_fill_discrete(labels = c("very unlikely", "2", "3", "4", "very likely")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  theme_minimal() + #theme_(bw, minimal): background
  ggtitle("Likelihood of organization taking action to achieve carbon\
neutrality as mandatory carbon offsetting costs rise") +
  theme(text = element_text(size = 14)) +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        plot.title = element_text(hjust=0.0)) #h.just:horizontal adjustment (0.5 in the middle)

# Spearman's correlation between increasing mandatory costs for offsetting carbon emissions and likelihood of steps being taken towards carbon neutrality. 
cor.test(as.numeric(data.H3_long$Offsetting), as.numeric(data.H3_long$score), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 134577317, p-value = 6.48e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1808533

A weak but significant negative correlation: The likelihood of taking action decreases with increasing costs. Maybe the question wasn’t correctly understood by many?

Question 3

## Data preparation
# 1&2 together und 4&5 together --> because little people at 1 and 5 
data.H3_wide <- data.H3_wide %>%
  mutate(Considering_future_costs_zsmf = fancycut::fancycut(as.numeric(Considering_future_costs),
                                                            Unlikely = "[1, 2]",
                                                            Neutral = "3",
                                                            Likely = "[4, 5]", 
                                                            na.bucket = 'missing'))
data.H3_wide$Considering_future_costs_zsmf[data.H3_wide$Considering_future_costs_zsmf=="missing"] <- NA
data.H3_wide$Considering_future_costs_zsmf <- droplevels(data.H3_wide$Considering_future_costs_zsmf)  

data.H3_wide <- data.H3_wide %>%
  mutate(Economic_Mindset_zsmf = fancycut::fancycut(as.numeric(Economic_Mindset),
                                                    `Little extent` = "[1, 2]",
                                                    Neutral = "3",
                                                    `Large extent` = "[4, 5]", 
                                                    na.bucket = 'missing'))
data.H3_wide$Economic_Mindset_zsmf[data.H3_wide$Economic_Mindset_zsmf=="missing"] <- NA
data.H3_wide$Economic_Mindset_zsmf <- droplevels(data.H3_wide$Economic_Mindset_zsmf) 

## Descriptive 
# Frequency table 
epiDisplay::tab1(data.H3_wide$Considering_future_costs, graph = FALSE, cum.percent = TRUE)

## data.H3_wide$Considering_future_costs : 
##               Frequency Percent Cum. percent
## very unlikely         5     3.3          3.3
## unlikely             16    10.5         13.8
## neutral              50    32.9         46.7
## likely               45    29.6         76.3
## very likely          36    23.7        100.0
##   Total             152   100.0        100.0

# Present epidisplay nicely
t.H3.3 <- as.data.table(epiDisplay::tab1(data.H3_wide$Considering_future_costs, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H3.3 <- t.H3.3[,c(2,3,4,5)] 
colnames(t.H3.3) <- c("Category", "Frequency", "valid %", "cum. valid %") 
t.H3.3.ft <- flextable(t.H3.3)
t.H3.3.ft <- bold(t.H3.3.ft, bold=T, part = "header")
autofit(t.H3.3.ft)

Category	Frequency	valid %	cum. valid %
very unlikely	5	3.3	3.3
unlikely	16	10.5	13.8
neutral	50	32.9	46.7
likely	45	29.6	76.3
very likely	36	23.7	100.0
Total	152	100.0	100.0

# Plot on "likelihood of taking proactive steps with differences in the likelihood of the company considering future costs" 
data.H3_wide %>% filter(!is.na(Considering_future_costs) & !is.na(Proactive_Steps)) %>% 
  ggplot(aes(x = factor(Considering_future_costs), fill = factor(6-as.numeric(Proactive_Steps)))) + #x: EV, fill: ZV
  geom_bar(position = "fill") +
  labs(x = "Considering future costs regarding offsetting emissions", fill = "Proactive Steps") +
  scale_fill_discrete(labels = c("very likely", "4", "3", "2", "very unlikely")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  theme_minimal() +
  theme(text = element_text(size = 14)) +
  ggtitle("Influence of considering future offset costs on\
proactive steps")

# Spearman's correlation between increasing mandatory costs for offsetting carbon emissions and likelihood of steps being taken towards carbon neutrality. 
cor.test(as.numeric(data.H3_wide$Proactive_Steps), as.numeric(data.H3_wide$Considering_future_costs), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 342873, p-value = 7.847e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3904216

A weak but significant correlation

# Spearman's correlation of economic sustainability mindset and considering future costs of having to offset CO2 emissions

The more a company is already thinking about future costs of having to offset CO2 emissions, the more the economic mindset is influencing decision-making processes related to carbon neutrality.

cor.test(as.numeric(data.H3_wide$Economic_Mindset), as.numeric(data.H3_wide$Considering_future_costs), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 352009, p-value = 2.395e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3741785

## Regression Analysis
# OLR testing H3
H3.OLR.1 <- polr(Proactive_Steps_zsmf ~ Considering_future_costs_zsmf, data = data.H3_wide, Hess = TRUE)
summary(H3.OLR.1)

## Call:
## polr(formula = Proactive_Steps_zsmf ~ Considering_future_costs_zsmf, 
##     data = data.H3_wide, Hess = TRUE)
## 
## Coefficients:
##                                       Value Std. Error t value
## Considering_future_costs_zsmfNeutral 0.9614     0.4819   1.995
## Considering_future_costs_zsmfLikely  1.7908     0.4726   3.789
## 
## Intercepts:
##                  Value   Std. Error t value
## Unlikely|Neutral -0.3924  0.4053    -0.9681
## Neutral|Likely    1.2685  0.4224     3.0030
## 
## Residual Deviance: 289.6303 
## AIC: 297.6303 
## (2 observations deleted due to missingness)

H3.OLR.2 <- polr(Proactive_Steps_zsmf ~ Considering_future_costs_zsmf + Economic_Mindset_zsmf, data = data.H3_wide, Hess = TRUE)
summary(H3.OLR.2)

## Call:
## polr(formula = Proactive_Steps_zsmf ~ Considering_future_costs_zsmf + 
##     Economic_Mindset_zsmf, data = data.H3_wide, Hess = TRUE)
## 
## Coefficients:
##                                       Value Std. Error t value
## Considering_future_costs_zsmfNeutral 0.4807     0.5202  0.9241
## Considering_future_costs_zsmfLikely  1.0965     0.5080  2.1585
## Economic_Mindset_zsmfNeutral         0.9249     0.4688  1.9729
## Economic_Mindset_zsmfLarge extent    1.9515     0.4813  4.0549
## 
## Intercepts:
##                  Value  Std. Error t value
## Unlikely|Neutral 0.1472 0.4503     0.3268 
## Neutral|Likely   1.9928 0.4897     4.0691 
## 
## Residual Deviance: 269.5101 
## AIC: 281.5101 
## (3 observations deleted due to missingness)

## Residual analysis
# summary table1 
summary_table_H3.OLR.1 <- coef(summary(H3.OLR.1))
pval2 <- round(pnorm(abs(summary_table_H3.OLR.1[, "t value"]), lower.tail = FALSE)* 2,3)
summary_table_H3.OLR.1 <- cbind(summary_table_H3.OLR.1, "p value" = pval2) 
summary_table_H3.OLR.1 #only p-value of IV important --> first 2 rows

##                                           Value Std. Error    t value p value
## Considering_future_costs_zsmfNeutral  0.9613997  0.4818886  1.9950666   0.046
## Considering_future_costs_zsmfLikely   1.7907847  0.4726029  3.7891949   0.000
## Unlikely|Neutral                     -0.3923514  0.4052893 -0.9680774   0.333
## Neutral|Likely                        1.2685439  0.4224214  3.0030297   0.003

pval2 <- pval2[1:5]
cbind(exp(cbind(OR = coef(H3.OLR.1), ci = confint(H3.OLR.1))),"p value" = pval2)

## Waiting for profiling to be done...

## Warning in cbind(exp(cbind(OR = coef(H3.OLR.1), ci = confint(H3.OLR.1))), :
## number of rows of result is not a multiple of vector length (arg 2)

##                                            OR    2.5 %   97.5 % p value
## Considering_future_costs_zsmfNeutral 2.615355 1.024621  6.83358   0.046
## Considering_future_costs_zsmfLikely  5.994154 2.401823 15.44645   0.000

Category left out is the reference group: Here Consideringfuturecosts unlikely. ConsideringfuturecostsNeutral differs non-significantly from consideringfuturecostsunlikely regarding my DV (Proactive Steps). But we can see that the odds ratio is 2.76 which means within my sample the odds of having neutral are 2.76 times higher for an increase in (1 unit of) Proactive steps compared to unlikely.

brant::brant(H3.OLR.1) #omnibus: not significant --> which is good!! --> because H0: Requirement of proportional odds/parallel regression assumption met: Steigung zwischen likert points ist ungefähr die selbe zwischen proactive steps und considering future costs

## ---------------------------------------------------------------------------- 
## Test for                 X2  df  probability 
## ---------------------------------------------------------------------------- 
## Omnibus                      1.37    2   0.5
## Considering_future_costs_zsmfNeutral 0.01    1   0.92
## Considering_future_costs_zsmfLikely      0.74    1   0.39
## ---------------------------------------------------------------------------- 
## 
## H0: Parallel Regression Assumption holds

# summary table2

“worse” p-values: no significant correlation btw proactive steps and considering future costs/or/economic mindset for category unlikely to neutral. significant correlation btw proactive steps and considering future costs/or/economic mindset for category unlikely to likely.

summary_table_H3.OLR.2 <- coef(summary(H3.OLR.2))
pval3 <- round(pnorm(abs(summary_table_H3.OLR.2[, "t value"]), lower.tail = FALSE)* 2,3)
summary_table_H3.OLR.2 <- cbind(summary_table_H3.OLR.2, "p value" = pval3) 
summary_table_H3.OLR.2

##                                          Value Std. Error   t value p value
## Considering_future_costs_zsmfNeutral 0.4806577  0.5201587 0.9240598   0.355
## Considering_future_costs_zsmfLikely  1.0965260  0.5080100 2.1584732   0.031
## Economic_Mindset_zsmfNeutral         0.9248988  0.4687918 1.9729415   0.049
## Economic_Mindset_zsmfLarge extent    1.9515386  0.4812796 4.0548953   0.000
## Unlikely|Neutral                     0.1471618  0.4503295 0.3267869   0.744
## Neutral|Likely                       1.9927830  0.4897380 4.0690799   0.000

pval3 <- pval3[1:5]
cbind(exp(cbind(OR = coef(H3.OLR.2), ci = confint(H3.OLR.2))),"p value" = pval3)

## Waiting for profiling to be done...

## Warning in cbind(exp(cbind(OR = coef(H3.OLR.2), ci = confint(H3.OLR.2))), :
## number of rows of result is not a multiple of vector length (arg 2)

##                                            OR     2.5 %    97.5 % p value
## Considering_future_costs_zsmfNeutral 1.617138 0.5818307  4.518483   0.355
## Considering_future_costs_zsmfLikely  2.993748 1.1057757  8.193019   0.031
## Economic_Mindset_zsmfNeutral         2.521613 1.0133018  6.411839   0.049
## Economic_Mindset_zsmfLarge extent    7.039510 2.7759832 18.443562   0.000

brant::brant(H3.OLR.2) #not significant --> which is good!! --> because H0: Requirement parallel regression assumption met

## ---------------------------------------------------------------------------- 
## Test for                 X2  df  probability 
## ---------------------------------------------------------------------------- 
## Omnibus                      4.26    4   0.37
## Considering_future_costs_zsmfNeutral 0.1 1   0.76
## Considering_future_costs_zsmfLikely      0.78    1   0.38
## Economic_Mindset_zsmfNeutral         1.02    1   0.31
## Economic_Mindset_zsmfLarge extent        0.16    1   0.69
## ---------------------------------------------------------------------------- 
## 
## H0: Parallel Regression Assumption holds

## Warning in brant::brant(H3.OLR.2): 1 combinations in table(dv,ivs) do not occur.
## Because of that, the test results might be invalid.

Question 4

## Descriptive 
# Frequency table 
epiDisplay::tab1(data.H3_wide$Engagement_of_Project_Managers, graph = FALSE, cum.percent = TRUE)

## data.H3_wide$Engagement_of_Project_Managers : 
##                    Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## very little extent        13      8.6        8.6      8.6        8.6
## little extent             30     19.7       28.3     19.9       28.5
## neutral                   57     37.5       65.8     37.7       66.2
## large extent              29     19.1       84.9     19.2       85.4
## very large extent         22     14.5       99.3     14.6      100.0
## NA's                       1      0.7      100.0      0.0      100.0
##   Total                  152    100.0      100.0    100.0      100.0

# Overview over the amount of answers per point on the likert scale "Engagement of Project Managers" 
ggplot(data = subset(data.H3_wide, !is.na(Engagement_of_Project_Managers)), aes(Engagement_of_Project_Managers)) +
  scale_x_discrete(name = "Engagement of Project Managers") +
  geom_bar(fill = "#00C08D") +
  labs(title = "Amount of answers per point of extent of engagement\
of project managers in the topic of carbon neutrality", y = "Percentage of total cases") +
  theme(text = element_text(size = 14)) +
  theme_minimal()

#Correlation btw Engagement of project managers and industry sector 
cor.test(as.numeric(data$Engagement_of_Project_Managers), as.numeric(data$Sector), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 580450, p-value = 0.1521
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1191234

Analysis of who uses the GPM P5 Standard and who doesn’t

## Descriptive 
# Frequency table 
epiDisplay::tab1(data$Knowing, graph = FALSE, cum.percent = TRUE, main = "Amount of participants having \ 
or having not heard of the P5 Standard")

## data$Knowing : 
##         Frequency Percent Cum. percent
## Yes            84    55.3         55.3
## No             68    44.7        100.0
##   Total       152   100.0        100.0

# Percentage of answers per point on the likert scale 
round(tally (~ Knowing, format = "percent", data = data, 2))

## Knowing
##   Yes    No Total 
##    55    45   100

# Barplot showing the amount of people that have or haven't heard of the P5 Standard 
ggplot(data = subset(data, !is.na(Knowing)), aes(Knowing)) +
  scale_x_discrete(name = "Heard of the P5 Standard") +
  scale_y_continuous(name = "Count") +
  ggtitle("P5 Standard awareness among participants") +
  geom_bar(fill = "#00C08D") +
  theme(text = element_text(size = 14)) +
  theme_minimal()

#Heard of it (Yes/no) per industry sector 
table(data$Knowing, data$Sector)

##      
##        1  2  3  4  5  6  7  8  9 10 11 12 13 15 16 17 18 19 20 21 22 23 24
##   Yes  7  8  6 10 12  7  8  1  0  1  0  1  1  0  7  5  1  0  2  0  1  3  0
##   No   3  2  2  1  1  3  0  1  1  1 16  1  3  4  7  5  0  3  6  4  0  1  1

# Create simple Data Frame
data_plot <- dplyr::select (data, Sector, Knowing)
data_plot <- na.omit(data_plot)

# Convert Sectors to Factor, making all Factor Levels explicit (INCLUDING NO.14)
all_possible_levels <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24)
data_plot$Sector <- factor(data_plot$Sector, levels = all_possible_levels)

# Set Labels For Sector Groups
my_labels <- c("Energy", "Materials", "Automobiles & Components", "Utilities", "Capital Goods", "Food Beverage & Tobacco", "Semiconductors & Semiconductor Equipment",
               "Consumber Durables & Apparel", "Consumer Discretionary Distribution & Retail", "Consumer Staples Distribution & Retail", "Banks", "Transportation",
               "Technology Hardware & Equipment", "Equity Real Estate Investment Trusts (REITs)", "Consumer Services", "Financial Services", "Commercial & Professional Services", "Household & Personal Products",
               "Health Care Equipment & Services", "Pharmaceuticals, Biotechnology & Life Sciences", "Media & Entertainment", "Telecommunication Services", "Software & Services", "Insurance")

# Plot Barplot
ggplot(data_plot, aes(x = Sector, fill=Knowing)) +
  geom_bar() +
  scale_x_discrete(labels = my_labels, drop = FALSE, name = "Industry Sectors") +   ## Drop = False ensures, that Sector with count = 0 is not dropped
  scale_fill_manual(values = c("#619CFF", "#F8766D")) +
  guides(fill = guide_legend(title = "Heard of the standard")) +
  ggtitle("Awareness of P5 Standard among industry sectors") + 
  scale_y_continuous(name = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        plot.title = element_text(hjust=0.0)) +
  theme(text = element_text(size = 14))

#Correlation btw having heard of it and industry sector 
cor.test(as.numeric(data$Knowing), as.numeric(data$Sector), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 325235, p-value = 1.409e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3856493

#Correlation btw having heard of it and experience 
cor.test(as.numeric(data$Knowing), as.numeric(data$Experience), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 335633, p-value = 0.09825
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1439776

#Correlation btw having heard of it and age 
cor.test(as.numeric(data$Knowing), as.numeric(data$Age), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 660764, p-value = 0.0002401
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3005114

H4: The more experienced the project manager, the less likely the use of the GPM P5 Ontology.

## Data preparation
data.H4 <- dplyr::select(data, ID, Frequency_usage, Experience_of_standard, Experience, Training)

data.H4$Frequency_usage <- factor(data.H4$Frequency_usage, 
                                  labels = c("never", "rarely", "sometimes", "often", "always"),
                                  ordered = TRUE)
data.H4$Experience_of_standard <- factor(data.H4$Experience_of_standard, 
                                         labels = c("poor", "average", "good", "very good"), #1: very poor is not here because no one chose it. 
                                         ordered = TRUE)

Question 1 + Years as a Project Manager/Experience + Training

## Data Preparation
# Bring categories 1&2 and 4&5 together for OLR because not enough observations
data.H4 <- data.H4 %>%
  mutate(Frequency_usage_zsmf = fancycut::fancycut(as.numeric(Frequency_usage),
                                                   NeverRarely = "[1, 2]",
                                                   Sometimes = "3",
                                                   OftenAlways = "[4, 5]", 
                                                   na.bucket = 'missing'))
data.H4$Frequency_usage_zsmf[data.H4$Frequency_usage_zsmf=="missing"] <- NA
data.H4$Frequency_usage_zsmf <- droplevels(data.H4$Frequency_usage_zsmf)

# Frequency table
epiDisplay::tab1(data.H4$Frequency_usage, graph = FALSE, cum.percent = TRUE)

## data.H4$Frequency_usage : 
##           Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## never             3      2.0        2.0      3.6        3.6
## rarely           10      6.6        8.6     11.9       15.5
## sometimes        45     29.6       38.2     53.6       69.0
## often            21     13.8       52.0     25.0       94.0
## always            5      3.3       55.3      6.0      100.0
## NA's             68     44.7      100.0      0.0      100.0
##   Total         152    100.0      100.0    100.0      100.0

# Present epidisplay nicely
t.H4.1 <- as.data.table(epiDisplay::tab1(data.H4$Frequency_usage, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H4.1 <- t.H4.1[,c(2,3,4,6,7)] 
colnames(t.H4.1) <- c("Category", "Frequency", "%", "valid %", "cum. valid %") 
t.H4.1.ft <- flextable(t.H4.1)
t.H4.1.ft <- bold(t.H4.1.ft, bold=T, part = "header")
autofit(t.H4.1.ft)

Category	Frequency	%	valid %	cum. valid %
never	3	2.0	3.6	3.6
rarely	10	6.6	11.9	15.5
sometimes	45	29.6	53.6	69.0
often	21	13.8	25.0	94.0
always	5	3.3	6.0	100.0
NA's	68	44.7	0.0	100.0
Total	152	100.0	100.0	100.0

# Boxplot
data.H4 %>% filter(!is.na(Frequency_usage)) %>%
  ggplot(aes(y = Frequency_usage, x = Experience, fill = Frequency_usage)) + 
  labs(title = "Frequency of P5 usage vs. project manager experience", x = "Experience in years", y = "Frequency of P5 usage") + 
  scale_fill_manual(values = c("#E76BF3","#619CFF", "#00C08D", "#7CAE00", "#F8766D"), guide = guide_legend(reverse = TRUE)) +
  theme_minimal() +
  theme(text = element_text(size = 14)) +
  geom_boxplot()

## Warning: Removed 16 rows containing non-finite values (`stat_boxplot()`).

# Correlation between frequency of usage and the years of experience 
cor.test(as.numeric(data.H4$Experience), as.numeric(data.H4$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 56712, p-value = 0.504
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.08242131

# Correlation between frquency of usage and what they think of the standard/what their experience with it is
cor.test(as.numeric(data.H4$Experience_of_standard), as.numeric(data.H4$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 45753, p-value = 1.475e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4637502

Significant moderate to strong correlation

# Correlation between frequency of usage and PM training yes/no of project manager
cor.test(as.numeric(data.H4$Training), as.numeric(data.H4$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 94228, p-value = 0.6779
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.04598726

No correlation + not significant

# Training vs. no training
summary(data$Training)

## No training    Training 
##          42         110

# Analysing training data

Cloumn 3: Total percentages per Training of who has a training (exluding those who don’t) Row “Total” & column “…cases”: Total/100 –> How many answers were given on average –> How many trainings they have on average 1.05

(df.Training <- ufs::multiResponse(data, items = c("TrainingPMP", "TrainingCAPM", "TrainingPRINCE2", "TrainingPMIACP", "TrainingCSM", "TrainingCPM", "TrainingCPMP", "TrainingIPMA", "TrainingGPM")))

##             Option Frequency Percentage of responses Percentage of (152) cases
## 1      TrainingPMP        35              22.0125786                23.0263158
## 2     TrainingCAPM        27              16.9811321                17.7631579
## 3  TrainingPRINCE2        17              10.6918239                11.1842105
## 4   TrainingPMIACP        19              11.9496855                12.5000000
## 5      TrainingCSM        17              10.6918239                11.1842105
## 6      TrainingCPM        25              15.7232704                16.4473684
## 7     TrainingCPMP        16              10.0628931                10.5263158
## 8     TrainingIPMA         1               0.6289308                 0.6578947
## 9      TrainingGPM         2               1.2578616                 1.3157895
## 10           Total       159             100.0000000               104.6052632

# Barplot showing the amount of people per training (saying that multiple responses possible so over 100%)
df.Training <- rename(df.Training, Percentage_of_total_cases = `Percentage of (152) cases`)
ggplot(df.Training[-10,], aes(y = Percentage_of_total_cases, x = Option)) +
  scale_x_discrete(name = "Different PM trainings") +
  geom_bar(stat = "identity", fill = "#00C08D") +
  labs(title = "Participant distribution across various PM trainings ", y = "Percentage of total cases") + 
  theme_minimal() +  
  theme(text = element_text(size = 14)) +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        plot.title = element_text(hjust=0.0)) #h.just:horizontal adjustment (0.5 in the middle)

## Regression Analysis

I have many missing values (more than half): say that!! Residual analysis is significant –> clm

H4.OLR <- polr(Frequency_usage_zsmf ~ Experience, data = data.H4, Hess = TRUE)
summary(H4.OLR)

## Call:
## polr(formula = Frequency_usage_zsmf ~ Experience, data = data.H4, 
##     Hess = TRUE)
## 
## Coefficients:
##               Value Std. Error t value
## Experience -0.09186    0.03747  -2.451
## 
## Intercepts:
##                       Value   Std. Error t value
## NeverRarely|Sometimes -2.0723  0.4223    -4.9072
## Sometimes|OftenAlways  0.4190  0.3232     1.2967
## 
## Residual Deviance: 131.8776 
## AIC: 137.8776 
## (84 observations deleted due to missingness)

## Residual Analysis
# brant is significant for experience so we need to do clm instead 
summary_table_H4.OLR <- coef(summary(H4.OLR))
pval4 <- round(pnorm(abs(summary_table_H4.OLR[, "t value"]), lower.tail = FALSE)* 2,3)
summary_table_H4.OLR <- cbind(summary_table_H4.OLR, "p value" = pval4) 
summary_table_H4.OLR #only p-values of IV important; 0.009 & 0.712: significant corr. btw Experience and frequency usage but not btw training

##                            Value Std. Error   t value p value
## Experience            -0.0918551 0.03747215 -2.451290   0.014
## NeverRarely|Sometimes -2.0723095 0.42229783 -4.907223   0.000
## Sometimes|OftenAlways  0.4190245 0.32315436  1.296670   0.195

pval4 <- pval4[1:5]
cbind(exp(cbind(OR = coef(H4.OLR), ci = confint(H4.OLR))),"p value" = pval4) #confidence interval, OR (odds ratio): When likelihood of company considering long term costs for offsetting emissions gets higher by 1 (point on likert scale), the odds that we get a higher value (more likely) for Proactive Steps by x times. --> Odds increase by x%

## Waiting for profiling to be done...

## Warning in cbind(exp(cbind(OR = coef(H4.OLR), ci = confint(H4.OLR))), `p value`
## = pval4): number of rows of result is not a multiple of vector length (arg 2)

##               OR        ci p value
## 2.5 %  0.9122373 0.8409246   0.014
## 97.5 % 0.9122373 0.9785995   0.000

brant::brant(H4.OLR) # significant --> not good!! --> because H0: Requirments met

## -------------------------------------------- 
## Test for X2  df  probability 
## -------------------------------------------- 
## Omnibus      23.92   1   0
## Experience   23.92   1   0
## -------------------------------------------- 
## 
## H0: Parallel Regression Assumption holds

# Clm: cumulative link model
H4.clm <- clm(Frequency_usage_zsmf ~ Experience, data = data.H4)
summary(H4.clm) # Once 0.14 and once -0.03 --> not the same slope for the two steps

## formula: Frequency_usage_zsmf ~ Experience
## data:    data.H4
## 
##  link  threshold nobs logLik AIC    niter max.grad cond.H 
##  logit flexible  68   -65.94 137.88 5(0)  4.40e-11 3.0e+02
## 
## Coefficients:
##            Estimate Std. Error z value Pr(>|z|)  
## Experience -0.09186    0.03747  -2.451   0.0142 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##                       Estimate Std. Error z value
## NeverRarely|Sometimes  -2.0723     0.4223  -4.907
## Sometimes|OftenAlways   0.4190     0.3232   1.297
## (84 observations deleted due to missingness)

(summary_table_H4.clm <- coef(summary(H4.clm)))

##                         Estimate Std. Error   z value     Pr(>|z|)
## NeverRarely|Sometimes -2.0723095 0.42229678 -4.907235 9.236934e-07
## Sometimes|OftenAlways  0.4190245 0.32315295  1.296675 1.947429e-01
## Experience            -0.0918551 0.03747152 -2.451331 1.423291e-02

# OR
exp(coef(H4.clm))

## NeverRarely|Sometimes Sometimes|OftenAlways            Experience 
##             0.1258947             1.5204776             0.9122373

If experience increases by 1 year, chances to go from never to sometimes increase 1.15 fold/15%

0.9122373^10

## [1] 0.3990968

If someone has 10 years more experience, the chances the standard is used decreases by about 60% ((4.02-1)*100 = 302%) (OR^10)

exp(confint(H4.clm))

##                2.5 %    97.5 %
## Experience 0.8409283 0.9785993

Question 1.1 if never, why?

# The three that chose that they never use it, answered: "My company doesn't use it"

Question 2

## Descriptive 
# Frequency table 
epiDisplay::tab1(data.H4$Experience_of_standard, graph = FALSE, cum.percent = TRUE)

## data.H4$Experience_of_standard : 
##           Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## poor              4      2.6        2.6      5.0        5.0
## average          27     17.8       20.4     33.8       38.8
## good             40     26.3       46.7     50.0       88.8
## very good         9      5.9       52.6     11.2      100.0
## NA's             72     47.4      100.0      0.0      100.0
##   Total         152    100.0      100.0    100.0      100.0

#Present epidisplay nicely 
t.H4.2 <- as.data.table(epiDisplay::tab1(data.H4$Experience_of_standard, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H4.2 <- t.H4.2[,c(2,3,4,6,7)] 
colnames(t.H4.2) <- c("Category", "Frequency", "%", "valid %", "cum. valid %") 
new_row <- data.table("Category" = "very poor", "Frequency" = 0, "%" = 0, "valid %" = 0, "cum. valid %" = 0)
t.H4.2 <- rbindlist(list(new_row, t.H4.2))
t.H4.2.ft <- flextable(t.H4.2)
t.H4.2.ft <- bold(t.H4.2.ft, bold=T, part = "header")
autofit(t.H4.2.ft)

Category	Frequency	%	valid %	cum. valid %
very poor	0	0.0	0.0	0.0
poor	4	2.6	5.0	5.0
average	27	17.8	33.8	38.8
good	40	26.3	50.0	88.8
very good	9	5.9	11.2	100.0
NA's	72	47.4	0.0	100.0
Total	152	100.0	100.0	100.0

# Experience of standard: 1&2 zsm und 4&5 zsm --> because little people at 1 and 5 (no people at very poor - so left that out)
data.H4 <- data.H4 %>%
  mutate(Experience_of_standard_zsmf = fancycut::fancycut(as.numeric(Experience_of_standard),
                                                          Poor = "1",
                                                          Average = "2",
                                                          Good = "[3, 4]", 
                                                          na.bucket = 'missing'))
data.H4$Experience_of_standard_zsmf[data.H4$Experience_of_standard_zsmf == "missing"] <- NA
data.H4$Experience_of_standard_zsmf <- droplevels(data.H4$Experience_of_standard_zsmf)

# To check if it's true
table(data.H4$Experience_of_standard, data.H4$Experience_of_standard_zsmf)

##            
##             Poor Average Good
##   poor         4       0    0
##   average      0      27    0
##   good         0       0   40
##   very good    0       0    9

# Barplot showing the number of people per point of "Experience of standard" 
ggplot(data = subset(data.H4, !is.na(Experience_of_standard)), aes(Experience_of_standard)) +
  scale_x_discrete(name = "Experience of standard") +
  geom_bar(fill = "#00C08D") +
  ggtitle("Number of answers per experience") + 
  theme(text = element_text(size = 14)) +
  theme_light()

# Correlation between frquency of usage and what they think of the standard/what their experience with it is
cor.test(as.numeric(data.H4$Experience_of_standard), as.numeric(data.H4$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 45753, p-value = 1.475e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4637502

Significant moderate to strong correlation

H5: The more goals of the GPM P5 Ontology are trying to be addressed within a project, the less realistic/possible it seems to finish the project successfully

## Data preparation
data.H5_wide <- dplyr::select(data, ID, Knowing, goals_0, goals_9, goals_19, goals_29, goals_39, goals_49, Tradeoff)

# Data as factors and with labels
data.H5_wide$goals_0 <- factor(data.H5_wide$goals_0, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"), 
                               ordered = TRUE)
data.H5_wide$goals_9 <- factor(data.H5_wide$goals_9, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"), 
                               ordered = TRUE)
data.H5_wide$goals_19 <- factor(data.H5_wide$goals_19, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"), 
                                ordered = TRUE)
data.H5_wide$goals_29 <- factor(data.H5_wide$goals_29, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"), 
                                ordered = TRUE)
data.H5_wide$goals_39 <- factor(data.H5_wide$goals_39, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"), 
                                ordered = TRUE)
data.H5_wide$goals_49 <- factor(data.H5_wide$goals_49, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"), 
                                ordered = TRUE)
data.H5_wide$Tradeoff <- factor(data.H5_wide$Tradeoff, labels = c("Yes", "No"), 
                                ordered = TRUE)

# 1&2 together und 4&5 together --> because little people at 1 and 5 
data.H5_wide <- data.H5_wide %>%
  mutate(goals0_zsmf = fancycut::fancycut(as.numeric(goals_0),
                                          Unlikely = "[1, 2]",
                                          Neutral = "3",
                                          Likely = "[4, 5]",     
                                          na.bucket = 'missing'))
data.H5_wide$goals0_zsmf[data.H5_wide$goals0_zsmf=="missing"] <- NA
data.H5_wide$goals0_zsmf <- droplevels(data.H5_wide$goals0_zsmf)

data.H5_wide <- data.H5_wide %>%
  mutate(goals9_zsmf = fancycut::fancycut(as.numeric(goals_9),
                                          Unlikely = "[1, 2]",
                                          Neutral = "3",
                                          Likely = "[4, 5]", 
                                          na.bucket = 'missing'))
data.H5_wide$goals9_zsmf[data.H5_wide$goals9_zsmf=="missing"] <- NA
data.H5_wide$goals9_zsmf <- droplevels(data.H5_wide$goals9_zsmf)

data.H5_wide <- data.H5_wide %>%
  mutate(goals19_zsmf = fancycut::fancycut(as.numeric(goals_19),
                                           Unlikely = "[1, 2]",
                                           Neutral = "3",
                                           Likely = "[4, 5]", 
                                           na.bucket = 'missing'))
data.H5_wide$goals19_zsmf[data.H5_wide$goals19_zsmf=="missing"] <- NA
data.H5_wide$goals19_zsmf <- droplevels(data.H5_wide$goals19_zsmf)

data.H5_wide <- data.H5_wide %>%
  mutate(goals29_zsmf = fancycut::fancycut(as.numeric(goals_29),
                                           Unlikely = "[1, 2]",
                                           Neutral = "3",
                                           Likely = "[4, 5]",
                                           na.bucket = 'missing'))
data.H5_wide$goals29_zsmf[data.H5_wide$goals29_zsmf=="missing"] <- NA
data.H5_wide$goals29_zsmf <- droplevels(data.H5_wide$goals29_zsmf) 

data.H5_wide <- data.H5_wide %>%
  mutate(goals39_zsmf = fancycut::fancycut(as.numeric(goals_39),
                                           Unlikely = "[1, 2]",
                                           Neutral = "3",
                                           Likely = "[4, 5]",   
                                           na.bucket = 'missing'))
data.H5_wide$goals39_zsmf[data.H5_wide$goals39_zsmf=="missing"] <- NA
data.H5_wide$goals39_zsmf <- droplevels(data.H5_wide$goals39_zsmf)

data.H5_wide <- data.H5_wide %>%
  mutate(goals49_zsmf = fancycut::fancycut(as.numeric(goals_49),
                                           Unlikely = "[1, 2]",
                                           Neutral = "3",
                                           Likely = "[4, 5]",  
                                           na.bucket = 'missing'))
data.H5_wide$goals49_zsmf[data.H5_wide$goals49_zsmf=="missing"] <- NA
data.H5_wide$goals49_zsmf <- droplevels(data.H5_wide$goals49_zsmf)

Question 1

## Data preparation
# Data to long format
data.H5_long <- data.H5_wide %>% 
  gather(key = "Goals", value = "score",
         goals_0, goals_9, goals_19, goals_29, goals_39, goals_49) %>% 
  convert_as_factor(ID, Goals) 
# data.H5_long <- data.H5_long %>% dplyr::select(ID, Goals, score)
data.H5_long2 <- data.H5_wide %>% 
  gather(key = "Goals_zsmf", value = "score_zsmf",
         goals0_zsmf, goals9_zsmf, goals19_zsmf, goals29_zsmf, goals39_zsmf, goals49_zsmf) %>% 
  convert_as_factor(ID, Goals_zsmf) 
data.H5_long2 <- data.H5_long2 %>% dplyr::select(ID, Goals_zsmf, score_zsmf)

# Bringing all two data sets together
data.H5_long <- lsr::sortFrame(data.H5_long, ID)
data.H5_long2 <- lsr::sortFrame(data.H5_long2, ID)

data.H5_long <- cbind(data.H5_long, data.H5_long2[,-1])
rm(data.H5_long2)

# order  
data.H5_long$Goals <- factor(data.H5_long$Goals, labels = c("goals_0", "goals_9", "goals_19", "goals_29", "goals_39", "goals_49"), 
                             ordered = TRUE)
data.H5_long$score <- factor(data.H5_long$score, labels = c("not possible at all", "unlikely", "neutral", "likely", "very possible"), 
                             ordered = TRUE)
data.H5_long$Goals_zsmf <- factor(data.H5_long$Goals_zsmf, labels = c("goals0_zsmf", "goals9_zsmf", "goals19_zsmf", "goals29_zsmf", "goals39_zsmf", "goals49_zsmf"), 
                                  ordered = TRUE)
data.H5_long$score_zsmf <- factor(data.H5_long$score_zsmf, labels = c("unlikely", "neutral", "likely"), 
                                  ordered = TRUE)

## Descriptive 
# Frequency table with percentages 
(table.Goals <- round(prop.table(table(data.H5_long$Goals, data.H5_long$score), 1), 4)*100)

##           
##            not possible at all unlikely neutral likely very possible
##   goals_0                17.52    34.31    3.65   9.49         35.04
##   goals_9                41.18    38.24    0.74   6.62         13.24
##   goals_19               25.18    46.04    3.60  16.55          8.63
##   goals_29               29.20    31.39   13.14  17.52          8.76
##   goals_39               21.32    25.74   19.85  15.44         17.65
##   goals_49               30.88    34.56    0.74   9.56         24.26

# Stacked barplot for percentage of answers of how possible it seems to finish a project successfully if any y goals are trying to be addressed.
data.H5_long %>% filter(!is.na(Goals) & !is.na(score)) %>% 
  ggplot(aes(x = factor(Goals), fill = factor(6-as.numeric(score)))) +
  geom_bar(position = "fill") +
  labs(x = "Amount of goals trying to be reached", fill = "Likelihood to finish\
a project successfully") +
  scale_x_discrete(labels = c("0", "9", "19", "29", "39", "49")) +
  scale_fill_discrete(labels = c("very possible", "4", "3", "2", "not possible at all")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  theme_minimal() +
  theme(text = element_text(size = 14)) +
  ggtitle("Number of ontology goals and project success likelihood")

# To check if correct data formatting
#table(data.H5_long$Goals, data.H5_long$Goals_zsmf)
#table(data.H5_long$Goals, data.H5_long$score)
#table(data.H5_long$Goals_zsmf, data.H5_long$score_zsmf)

# To test hypothesis
# Correlation between amount of goals trying to be reached and how possible it seems to finish a project successfully
cor.test(as.numeric(data.H5_long$Goals), as.numeric(data.H5_long$score), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 92685242, p-value = 0.888
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##          rho 
## -0.004923521

No correlation –> so hypothesis 5 is rejected

## Regression analysis
# Friedmann test: Is there a significant difference between the tendencies of multiple dependent variables?

H0: No association between the two variables/variables are the same –> No relationship between the amount of goals and the likelihood to finish a project successfully. H1: There is a significant relationship.

friedman.test(data.H5_long$score_zsmf, data.H5_long$Goals_zsmf, data.H5_long$ID) #1st var: DV, 2nd var: IV, 3rd var: person

## 
##  Friedman rank sum test
## 
## data:  data.H5_long$score_zsmf, data.H5_long$Goals_zsmf and data.H5_long$ID
## Friedman chi-squared = 41.832, df = 5, p-value = 6.369e-08

# Posthoc to determine where the difference is
friedmanmc(as.numeric(data.H5_long$score_zsmf), data.H5_long$Goals_zsmf, data.H5_long$ID)

## Multiple comparisons between groups after Friedman test 
## alpha: 0.05 
## Comparisons
##                           obs.dif critical.dif stat.signif      p.value
## goals0_zsmf-goals9_zsmf      14.0     95.74331       FALSE 1.000000e+00
## goals0_zsmf-goals19_zsmf     96.0     95.74331        TRUE 6.824499e-02
## goals0_zsmf-goals29_zsmf    115.0     95.74331        TRUE 8.874652e-03
## goals0_zsmf-goals39_zsmf    136.5     95.74331        TRUE 5.997036e-04
## goals0_zsmf-goals49_zsmf     62.5     95.74331       FALSE 1.000000e+00
## goals9_zsmf-goals19_zsmf    110.0     95.74331        TRUE 1.565598e-02
## goals9_zsmf-goals29_zsmf    129.0     95.74331        TRUE 1.608918e-03
## goals9_zsmf-goals39_zsmf    150.5     95.74331        TRUE 8.299892e-05
## goals9_zsmf-goals49_zsmf     76.5     95.74331       FALSE 3.992923e-01
## goals19_zsmf-goals29_zsmf    19.0     95.74331       FALSE 1.000000e+00
## goals19_zsmf-goals39_zsmf    40.5     95.74331       FALSE 1.000000e+00
## goals19_zsmf-goals49_zsmf    33.5     95.74331       FALSE 1.000000e+00
## goals29_zsmf-goals39_zsmf    21.5     95.74331       FALSE 1.000000e+00
## goals29_zsmf-goals49_zsmf    52.5     95.74331       FALSE 1.000000e+00
## goals39_zsmf-goals49_zsmf    74.0     95.74331       FALSE 4.891260e-01

## Now with only the people that have heard of the GPM P5 Standard 
data.H5.knowing <- subset(data.H5_long, Knowing == "Yes")

# Frequency table with percentages 
(table.Goals.knowing <- round(prop.table(table(data.H5.knowing$Goals, data.H5.knowing$score), 1), 4)*100)

##           
##            not possible at all unlikely neutral likely very possible
##   goals_0                30.14    46.58    2.74   8.22         12.33
##   goals_9                43.06    44.44    0.00   2.78          9.72
##   goals_19               24.32    56.76    0.00   9.46          9.46
##   goals_29               38.36    36.99    2.74  10.96         10.96
##   goals_39               27.78    37.50    5.56   4.17         25.00
##   goals_49               31.51    47.95    0.00   8.22         12.33

# Stacked barplot for percentage of answers of how possible it seems to finish a project successfully if any y goals are trying to be addressed. For only the people that have heard of the GPM P5 Standard
data.H5.knowing %>% filter(!is.na(Goals) & !is.na(score)) %>% 
  ggplot(aes(x = factor(Goals), fill = factor(6-as.numeric(score)))) +
  geom_bar(position = "fill") +
  labs(x = "Amount of goals trying to be achieved", fill = "Likelihood to finish\
a project successfully") +
  scale_x_discrete(labels = c("0", "9", "19", "29", "39", "49")) +
  scale_fill_discrete(labels = c("very possible", "4", "3", "2", "not possible at all")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  ggtitle("Likelihood of finishing a project successfully if y goals are tying to be achieved") + 
  theme(text = element_text(size = 14)) +
  theme_minimal()

# To test hypothesis
# Correlation between amount of goals trying to be reached and how possible it seems to finish a project successfully
cor.test(as.numeric(data.H5.knowing$Goals), as.numeric(data.H5.knowing$score), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 13145787, p-value = 0.2525
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.05486073

Question 2

# Frequency table with median 
epiDisplay::tab1(data.H5_wide$Tradeoff, graph = FALSE, cum.percent = TRUE)

## data.H5_wide$Tradeoff : 
##         Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## Yes           136     89.5       89.5     90.7       90.7
## No             14      9.2       98.7      9.3      100.0
## NA's            2      1.3      100.0      0.0      100.0
##   Total       152    100.0      100.0    100.0      100.0

# Barplot to look at the number of answers
ggplot(data = subset(data.H5_wide, !is.na(Tradeoff)), aes(Tradeoff)) +
  scale_x_discrete(name = "Trade-off") +
  scale_y_continuous(name = "Count") +
  ggtitle("Number of participants on whether there is a trade-off or not\
when considering an increasing number of ontology goals") + 
  geom_bar(fill = "#00C08D") +
  theme_minimal() +
  theme(text = element_text(size = 14))

# For only the people who have heard of the standard
# Frequency table with median 
epiDisplay::tab1(data.H5.knowing$Tradeoff, graph = FALSE, cum.percent = TRUE)

## data.H5.knowing$Tradeoff : 
##         Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## Yes           480     95.2       95.2     96.4       96.4
## No             18      3.6       98.8      3.6      100.0
## NA's            6      1.2      100.0      0.0      100.0
##   Total       504    100.0      100.0    100.0      100.0

# Barplot to look at the amount of answers
ggplot(data = subset(data.H5.knowing, !is.na(Tradeoff)), aes(Tradeoff)) +
  scale_x_discrete(name = "Trade-off") +
  scale_y_continuous(name = "Count") +
  ggtitle("Number of participants on whether there is a trade-off or not\
when considering an increasing number of ontology goals") + 
  geom_bar(fill = "#00C08D") +
  theme_minimal() +
  theme(text = element_text(size = 14))

H6: The more unclear which tools can be used to evaluate the outcome of the project in relation to the goals of the ontology, the less the GPM P5 standard gets used.

## Data preparation
data.H6 <- dplyr::select(data, ID, Frequency_usage, Understanding_tools, Sufficiency)

# Data as factors and with labels
data.H6$Frequency_usage <- factor(data.H6$Frequency_usage, labels = c("Never", "Rarely", "Sometimes", "Often", "Always"), 
                                  ordered = TRUE)
data.H6$Understanding_tools <- factor(data.H6$Understanding_tools, labels = c("none", "limited", "moderate", "good", "very good"), 
                                      ordered = TRUE)
data.H6$Sufficiency <- factor(data.H6$Sufficiency,
                              labels = c("strongly disagree", "disagree", "neutral", "agree", "strongly agree"), 
                              ordered = TRUE)


# 1&2 zsm und 4&5 zsm --> because little people at 1 and 5 
data.H6 <- data.H6 %>%
  mutate(Sufficiency_zsmf = fancycut::fancycut(as.numeric(Sufficiency),
                                               Disagree = "[1, 2]",
                                               Neutral = "3",
                                               Agree = "[4, 5]",
                                               na.bucket = 'missing'))
data.H6$Sufficiency_zsmf[data.H6$Sufficiency_zsmf=="missing"] <- NA
data.H6$Sufficiency_zsmf <- droplevels(data.H6$Sufficiency_zsmf) 
#data.H6$Sufficiency_zsmf <- factor(data.H6$Sufficiency_zsmf, levels = c(1,2,3), labels = c("Disagree", "Neutral", "Agree"))

data.H6 <- data.H6 %>%
  mutate(Understanding_tools_zsmf = fancycut::fancycut(as.numeric(Understanding_tools),
                                                       Limited = "[1, 2]",
                                                       Neutral = "3",
                                                       Good = "[4, 5]",
                                                       na.bucket = 'missing'))
data.H6$Understanding_tools_zsmf[data.H6$Understanding_tools_zsmf=="missing"] <- NA
data.H6$Understanding_tools_zsmf <- droplevels(data.H6$Understanding_tools_zsmf)

Question 1

## Descriptive
# Frequency table (+ percentages with and without NA --> always name the percentages without NA (NA-))
epiDisplay::tab1(data.H6$Understanding_tools, graph = FALSE, cum.percent = TRUE) #cum.percent: valid percentages

## data.H6$Understanding_tools : 
##           Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## none              2      1.3        1.3      2.5        2.5
## limited           6      3.9        5.3      7.4        9.9
## moderate         26     17.1       22.4     32.1       42.0
## good             41     27.0       49.3     50.6       92.6
## very good         6      3.9       53.3      7.4      100.0
## NA's             71     46.7      100.0      0.0      100.0
##   Total         152    100.0      100.0    100.0      100.0

#Present epidisplay nicely 
t.H6.1 <- as.data.table(epiDisplay::tab1(data.H6$Understanding_tools, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H6.1 <- t.H6.1[,c(2,3,4,6,7)] 
colnames(t.H6.1) <- c("Category", "Frequency", "%", "valid %", "cum. valid %") 
t.H6.1.ft <- flextable(t.H6.1)
t.H6.1.ft <- bold(t.H6.1.ft, bold=T, part = "header")
autofit(t.H6.1.ft)

Category	Frequency	%	valid %	cum. valid %
none	2	1.3	2.5	2.5
limited	6	3.9	7.4	9.9
moderate	26	17.1	32.1	42.0
good	41	27.0	50.6	92.6
very good	6	3.9	7.4	100.0
NA's	71	46.7	0.0	100.0
Total	152	100.0	100.0	100.0

# Visualization of amount of answers: Stacked barplot 
data.H6 %>% filter(!is.na(Understanding_tools) & !is.na(Frequency_usage)) %>% 
  ggplot(aes(x = factor(Understanding_tools), fill = factor(5-as.numeric(Frequency_usage)))) + #x: EV, fill: ZV
  geom_bar(position = "fill") +
  labs(x = "Understanding of tools", fill = "Frequency of P5 use") +
  scale_fill_manual(labels = c("always", "often", "sometimes", "rarely"), values = c("#F8766D", "#7CAE00", "#00C08D", "#619CFF", "#E76BF3")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  theme_minimal() + #theme_(bw, minimal): background 
  theme(text = element_text(size = 14)) +
  ggtitle("Frequency of P5 use vs project outcome evaluation knowledge")

# Correlation
cor.test(as.numeric(data.H6$Understanding_tools), as.numeric(data.H6$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 55942, p-value = 0.0007169
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3683122

Positive and significant correlation

cor.test(as.numeric(data.H6$Sufficiency), as.numeric(data.H6$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 46012, p-value = 5.647e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4804404

cor.test(as.numeric(data.H6$Sufficiency), as.numeric(data.H6$Understanding_tools_zsmf), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 55772, p-value = 0.0006688
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3702346

## Regression analysis
# OLR
H6.OLR.1 <- polr(as.factor(Frequency_usage) ~ Understanding_tools_zsmf, data = data.H6, Hess = TRUE)
summary(H6.OLR.1)

## Call:
## polr(formula = as.factor(Frequency_usage) ~ Understanding_tools_zsmf, 
##     data = data.H6, Hess = TRUE)
## 
## Coefficients:
##                                 Value Std. Error t value
## Understanding_tools_zsmfNeutral 2.030     0.8285   2.450
## Understanding_tools_zsmfGood    3.036     0.8375   3.625
## 
## Intercepts:
##                  Value   Std. Error t value
## Never|Rarely     -9.9482 36.2770    -0.2742
## Rarely|Sometimes  0.0727  0.6846     0.1061
## Sometimes|Often   3.2757  0.8080     4.0544
## Often|Always      5.3758  0.9149     5.8759
## 
## Residual Deviance: 163.9337 
## AIC: 175.9337 
## (71 observations deleted due to missingness)

H6.OLR.2 <- polr(as.factor(Frequency_usage) ~ Understanding_tools_zsmf + Sufficiency_zsmf, data = data.H6, Hess = TRUE)
summary(H6.OLR.2)

## Call:
## polr(formula = as.factor(Frequency_usage) ~ Understanding_tools_zsmf + 
##     Sufficiency_zsmf, data = data.H6, Hess = TRUE)
## 
## Coefficients:
##                                  Value Std. Error t value
## Understanding_tools_zsmfNeutral 1.8890     0.8767   2.155
## Understanding_tools_zsmfGood    2.3650     0.8993   2.630
## Sufficiency_zsmfNeutral         0.9074     0.8944   1.014
## Sufficiency_zsmfAgree           2.4741     0.9257   2.673
## 
## Intercepts:
##                  Value   Std. Error t value
## Never|Rarely     -8.4737 26.8024    -0.3162
## Rarely|Sometimes  0.9063  0.9961     0.9098
## Sometimes|Often   4.4625  1.1642     3.8332
## Often|Always      6.7472  1.2584     5.3617
## 
## Residual Deviance: 151.9577 
## AIC: 167.9577 
## (71 observations deleted due to missingness)

## Residual analysis
# summary table 1
summary_table_H6.OLR.1 <- coef(summary(H6.OLR.1))
pval5 <- round(pnorm(abs(summary_table_H6.OLR.1[, "t value"]),lower.tail = FALSE)* 2,3)
summary_table_H6.OLR.1 <- cbind(summary_table_H6.OLR.1, "p value" = pval5) 
summary_table_H6.OLR.1 #p-value: 0.014 & 0.000

##                                       Value Std. Error    t value p value
## Understanding_tools_zsmfNeutral  2.02990813  0.8284699  2.4501892   0.014
## Understanding_tools_zsmfGood     3.03605272  0.8375268  3.6250217   0.000
## Never|Rarely                    -9.94817014 36.2770071 -0.2742280   0.784
## Rarely|Sometimes                 0.07266253  0.6845970  0.1061391   0.915
## Sometimes|Often                  3.27573544  0.8079548  4.0543549   0.000
## Often|Always                     5.37576160  0.9148770  5.8759393   0.000

pval5 <- pval5[1:5]
cbind(exp(cbind(OR = coef(H6.OLR.1), ci = confint(H6.OLR.1))),"p value" = pval5)

## Waiting for profiling to be done...

## Warning in cbind(exp(cbind(OR = coef(H6.OLR.1), ci = confint(H6.OLR.1))), :
## number of rows of result is not a multiple of vector length (arg 2)

##                                        OR    2.5 %   97.5 % p value
## Understanding_tools_zsmfNeutral  7.613387 1.547544  41.8371   0.014
## Understanding_tools_zsmfGood    20.822887 4.214773 117.4308   0.000

brant::brant(H6.OLR.1) #not significant --> which is good!! --> because H0: Requirements of parallel regression assumption met

## -------------------------------------------------------------------- 
## Test for             X2  df  probability 
## -------------------------------------------------------------------- 
## Omnibus                  4.87    6   0.56
## Understanding_tools_zsmfNeutral  0   3   1
## Understanding_tools_zsmfGood     0   3   1
## -------------------------------------------------------------------- 
## 
## H0: Parallel Regression Assumption holds

## Warning in brant::brant(H6.OLR.1): 3 combinations in table(dv,ivs) do not occur.
## Because of that, the test results might be invalid.

Category left out is the reference group: Here understandingtools limited. Understandingtools neutral differs significantly from understandingtools limited regarding my DV (Frequency of usage). But we can see that the odds ratio is 7.63 which means the odds of an one point increase in Usage are 7.63 times higher for Neutral compared to Limited. Understandingtools good differs significantly from understandingtools limited regarding my DV (Frequency of usage).

# summary table 2
summary_table_H6.OLR.2 <- coef(summary(H6.OLR.2))
pval6 <- round(pnorm(abs(summary_table_H6.OLR.2[, "t value"]),lower.tail = FALSE)* 2,3)
summary_table_H6.OLR.2 <- cbind(summary_table_H6.OLR.2, "p value" = pval6) 
summary_table_H6.OLR.2 #p-values: 0.031 & 0.009, 0.310, 0.008

##                                      Value Std. Error    t value p value
## Understanding_tools_zsmfNeutral  1.8889716  0.8767258  2.1545752   0.031
## Understanding_tools_zsmfGood     2.3650482  0.8993488  2.6297340   0.009
## Sufficiency_zsmfNeutral          0.9073582  0.8944249  1.0144599   0.310
## Sufficiency_zsmfAgree            2.4741435  0.9257266  2.6726502   0.008
## Never|Rarely                    -8.4737094 26.8023841 -0.3161551   0.752
## Rarely|Sometimes                 0.9062851  0.9961277  0.9098082   0.363
## Sometimes|Often                  4.4625147  1.1641877  3.8331573   0.000
## Often|Always                     6.7471878  1.2584034  5.3617052   0.000

pval6 <- pval6[1:5]
cbind(exp(cbind(OR = coef(H6.OLR.2), ci = confint(H6.OLR.2))),"p value" = pval6)

## Waiting for profiling to be done...

## Warning in cbind(exp(cbind(OR = coef(H6.OLR.2), ci = confint(H6.OLR.2))), :
## number of rows of result is not a multiple of vector length (arg 2)

##                                        OR     2.5 %   97.5 % p value
## Understanding_tools_zsmfNeutral  6.612565 1.2090080 39.69147   0.031
## Understanding_tools_zsmfGood    10.644551 1.8799511 66.81548   0.009
## Sufficiency_zsmfNeutral          2.477768 0.4333137 14.83813   0.310
## Sufficiency_zsmfAgree           11.871534 2.0330994 76.99071   0.008

brant::brant(H6.OLR.2)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## -------------------------------------------------------------------- 
## Test for             X2  df  probability 
## -------------------------------------------------------------------- 
## Omnibus                  4.93    12  0.96
## Understanding_tools_zsmfNeutral  0   3   1
## Understanding_tools_zsmfGood     0   3   1
## Sufficiency_zsmfNeutral      0   3   1
## Sufficiency_zsmfAgree            0   3   1
## -------------------------------------------------------------------- 
## 
## H0: Parallel Regression Assumption holds

## Warning in brant::brant(H6.OLR.2): 18 combinations in table(dv,ivs) do not
## occur. Because of that, the test results might be invalid.

Question 2

## Descriptive
# Frequency table 
epiDisplay::tab1(data.H6$Sufficiency, graph = FALSE, cum.percent = TRUE)

## data.H6$Sufficiency : 
##                   Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## strongly disagree         1      0.7        0.7      1.2        1.2
## disagree                  6      3.9        4.6      7.4        8.6
## neutral                  39     25.7       30.3     48.1       56.8
## agree                    24     15.8       46.1     29.6       86.4
## strongly agree           11      7.2       53.3     13.6      100.0
## NA's                     71     46.7      100.0      0.0      100.0
##   Total                 152    100.0      100.0    100.0      100.0

# Present epiDisplay nicely
t.H6.2 <- as.data.table(epiDisplay::tab1(data.H6$Sufficiency, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H6.2 <- t.H6.2[,c(2,3,4,6,7)] 
colnames(t.H6.2) <- c("Category", "Frequency", "%", "valid %", "cum. valid %") 
t.H6.2.ft <- flextable(t.H6.2)
t.H6.2.ft <- bold(t.H6.2.ft, bold=T, part = "header")
autofit(t.H6.2.ft)

Category	Frequency	%	valid %	cum. valid %
strongly disagree	1	0.7	1.2	1.2
disagree	6	3.9	7.4	8.6
neutral	39	25.7	48.1	56.8
agree	24	15.8	29.6	86.4
strongly agree	11	7.2	13.6	100.0
NA's	71	46.7	0.0	100.0
Total	152	100.0	100.0	100.0

# Overview over the number of answers per point on the Likert scale "Sufficiency" 
ggplot(data = subset(data.H6, !is.na(Sufficiency)), aes(Sufficiency)) +
  scale_x_discrete(name = "Sufficiency of provided tools") +
  scale_y_continuous(name = "Count") +
  geom_bar(fill = "#00C08D") +
  theme_minimal() +
  ggtitle("Number of answers on how the sufficiency of tools provided\
through and with the standard is preceived.")

# Visualization of number of answers: Stacked barplot 
data.H6 %>% filter(!is.na(Sufficiency) & !is.na(Frequency_usage)) %>% 
  ggplot(aes(x = factor(Sufficiency), fill = factor(5-as.numeric(Frequency_usage)))) + #x: EV, fill: ZV
  geom_bar(position = "fill") +
  labs(x = "Sufficiency", fill = "Frequency of P5 use") +
  scale_fill_discrete(labels = c("always", "often", "sometimes", "rarely")) + 
  scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
  theme_minimal() + #theme_(bw, minimal): background 
  theme(text = element_text(size = 14)) +
  ggtitle("Frequency of P5 use vs sufficiency of P5 Standard, ontology and templated")

# Correlation btw the perceived sufficiency of tools provided through and with the standard and the frequency of its usage. 
cor.test(as.numeric(data.H6$Sufficiency), as.numeric(data.H6$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")

## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  x and y
## S = 46012, p-value = 5.647e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4804404

Moderate to strong correlation

Yes have heard of it but never use it There are only three participants in this category. For the multiple choice question: “Why don’t you use the P5 Standard?” all three chose the answer “My company doesn’t use it” instead of “I’ve tried it, its’ not good”, “I use another sustainability concept”, or “other”.
For the question: “On a scale of 1 to 5, rate your level of understanding of the tools available for assessing the degree of sustainability of project results” 2, 3, and 4 were chosen as answers (limited, moderate and good understanding). –> a very spread out distribution. They were also asked to give suggestions for improvements.

Haven’t heard of it These people were also asked to answer the same questions for Hypothesis 5 as the ones who have heard of it. They were additionally asked “Now that you’ve heard about it and have seen the P5 Ontology, would you consider using it?”

# Making data frame 
# df.open2 <- dplyr::select(data, Why_not_consider)
# df.open2 <- na.omit(df.open2)
# df.open2 <- as.data.table(df.open2)

# Export as an excel file
# write.xlsx(df.open2, file = "df.open2.xlsx", overwrite = TRUE)

## Descriptive
# Frequency table 
epiDisplay::tab1(data$Consider_for_future, graph = FALSE, cum.percent = TRUE)

## data$Consider_for_future : 
##         Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## No             15      9.9        9.9     22.7       22.7
## Yes            51     33.6       43.4     77.3      100.0
## NA's           86     56.6      100.0      0.0      100.0
##   Total       152    100.0      100.0    100.0      100.0

# Barplot to look at the amount of answers for consider/not consider using the standard
ggplot(data = subset(data, !is.na(Consider_for_future)), aes(Consider_for_future)) +
  scale_x_discrete(name = "Considering the standard for future use?") +
  scale_y_continuous(name = "Count") +
  ggtitle("Number of participants on if they consider the standard for future use or not") + 
  geom_bar(fill = "#00C08D") +
  theme_minimal() +
  theme(text = element_text(size = 14))

And they were asked to give their understanding of the tools just as the yes, never people were asked.

# People that haven't heard of the standard + people that have heard of it but never use it on their understanding of tools
data$GYnever1_und.tools[is.na(data$GYnever1_und.tools)] <- 0
data$GN1_und.tools[is.na(data$GN1_und.tools)] <- 0
data$GN.understanding <- data$GYnever1_und.tools + data$GN1_und.tools
data$GN.understanding[data$GN.understanding == 0] <- NA

data$GN.understanding <- factor(data$GN.understanding, levels = c(1, 2, 3, 4, 5), labels = c("none", "limited", "moderate", "good", "very good"))

# Frequency table 
epiDisplay::tab1(data$GN.understanding, graph = FALSE, cum.percent = TRUE)

## data$GN.understanding : 
##           Frequency   %(NA+) cum.%(NA+)   %(NA-) cum.%(NA-)
## none              8      5.3        5.3     11.3       11.3
## limited          27     17.8       23.0     38.0       49.3
## moderate         20     13.2       36.2     28.2       77.5
## good             15      9.9       46.1     21.1       98.6
## very good         1      0.7       46.7      1.4      100.0
## NA's             81     53.3      100.0      0.0      100.0
##   Total         152    100.0      100.0    100.0      100.0

# Barplot to look at the amount of answers for understanding of the tools available to evaluate the outcome of a project
ggplot(data = subset(data, !is.na(GN.understanding)), aes(GN.understanding)) +
  scale_x_discrete(name = "Understanding of tools") +
  scale_y_continuous(name = "Count") +
  ggtitle("Number of participants on their understanding of tools to evaluate the outcome of a project") + 
  geom_bar(fill = "#00C08D") +
  theme_minimal() +
  theme(text = element_text(size = 14))

Regression outputs summarized

# Save all tables on a word document
#save_as_docx(ft1, t.H2.1.ft, ft2, t.H2.2.ft, t.H3.1.ft, t.H3.3.ft, t.H4.1.ft, t.H4.2.ft, t.H6.1.ft, t.H6.2.ft, path = mypath) 

# Regression output with stargazer (gives OR)  
stargazer(H2.OLR, H3.OLR.1, H3.OLR.2, type="text", out="stargazer.doc", apply.coef = exp, apply.se = exp, p.auto = F,
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(.1, .05, .01, .001), 
          notes.append =F, notes = c("+: p<0.1; p<0.05; *p<0.01; ***p<0.001"))

## 
## ============================================================================
##                                                Dependent variable:          
##                                      ---------------------------------------
##                                               Proactive_Steps_zsmf          
##                                           (1)          (2)          (3)     
## ----------------------------------------------------------------------------
## ITR                                     1.337+                              
##                                         (1.167)                             
##                                                                             
## Considering_future_costs_zsmfNeutral                  2.615*       1.617    
##                                                      (1.619)      (1.682)   
##                                                                             
## Considering_future_costs_zsmfLikely                  5.994***      2.994*   
##                                                      (1.604)      (1.662)   
##                                                                             
## Economic_Mindset_zsmfNeutral                                       2.522*   
##                                                                   (1.598)   
##                                                                             
## Economic_Mindset_zsmfLarge extent                                 7.040***  
##                                                                   (1.618)   
##                                                                             
## ----------------------------------------------------------------------------
## Observations                              146          150          149     
## ============================================================================
## Note:                                  +: p<0.1; p<0.05; *p<0.01; ***p<0.001

stargazer(H6.OLR.1, H6.OLR.2, type="text", out="stargazer.doc", apply.coef = exp, apply.se = exp, p.auto = F,
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(.1, .05, .01, .001), 
          notes.append =F, notes = c("+: p<0.1; p<0.05; *p<0.01; ***p<0.001"))

## 
## ======================================================================
##                                          Dependent variable:          
##                                 --------------------------------------
##                                            Frequency_usage            
##                                         (1)                (2)        
## ----------------------------------------------------------------------
## Understanding_tools_zsmfNeutral       7.613*              6.613*      
##                                       (2.290)            (2.403)      
##                                                                       
## Understanding_tools_zsmfGood         20.823***           10.645**     
##                                       (2.311)            (2.458)      
##                                                                       
## Sufficiency_zsmfNeutral                                   2.478       
##                                                          (2.446)      
##                                                                       
## Sufficiency_zsmfAgree                                    11.872**     
##                                                          (2.524)      
##                                                                       
## ----------------------------------------------------------------------
## Observations                            81                  81        
## ======================================================================
## Note:                            +: p<0.1; p<0.05; *p<0.01; ***p<0.001

Code-thesis-sustainable-PM,-Anna-Grether.R

annagrether

2023-11-13