#Libraries
library(openxlsx)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggpubr)
## Loading required package: ggplot2
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ car::recode() masks dplyr::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(readxl)
library(mosaic)
## Registered S3 method overwritten by 'mosaic':
## method from
## fortify.SpatialPolygonsDataFrame ggplot2
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
## Attaching package: 'mosaic'
##
## The following object is masked from 'package:Matrix':
##
## mean
##
## The following objects are masked from 'package:rstatix':
##
## cor_test, prop_test, t_test
##
## The following object is masked from 'package:purrr':
##
## cross
##
## The following objects are masked from 'package:car':
##
## deltaMethod, logit
##
## The following object is masked from 'package:ggplot2':
##
## stat
##
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
##
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
## quantile, sd, t.test, var
##
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum
library(epiDisplay)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:rstatix':
##
## select
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: nnet
##
## Attaching package: 'epiDisplay'
##
## The following object is masked from 'package:lattice':
##
## dotplot
##
## The following object is masked from 'package:ggplot2':
##
## alpha
library(brant)
library(pgirmess) #Posthoc of Friedmann
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
##
## The following object is masked from 'package:purrr':
##
## transpose
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(flextable)
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:mosaic':
##
## surround
##
## The following object is masked from 'package:purrr':
##
## compose
##
## The following objects are masked from 'package:ggpubr':
##
## border, font, rotate
library(fancycut)
library(lsr)
library(ufs)
##
## Attaching package: 'ufs'
##
## The following object is masked from 'package:lsr':
##
## cramersV
library(janitor)
##
## Attaching package: 'janitor'
##
## The following object is masked from 'package:rstatix':
##
## make_clean_names
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(psych)
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:pgirmess':
##
## shannon
##
## The following objects are masked from 'package:epiDisplay':
##
## alpha, cs, lookup
##
## The following objects are masked from 'package:mosaic':
##
## logit, rescale
##
## The following object is masked from 'package:car':
##
## logit
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:mosaic':
##
## theme_map
##
## The following object is masked from 'package:lubridate':
##
## stamp
##
## The following object is masked from 'package:ggpubr':
##
## get_legend
library(rcompanion)
##
## Attaching package: 'rcompanion'
##
## The following object is masked from 'package:psych':
##
## phi
library(DescTools)
##
## Attaching package: 'DescTools'
##
## The following objects are masked from 'package:psych':
##
## AUC, ICC, SD
##
## The following object is masked from 'package:data.table':
##
## %like%
##
## The following object is masked from 'package:mosaic':
##
## MAD
##
## The following object is masked from 'package:car':
##
## Recode
library(ordinal)
##
## Attaching package: 'ordinal'
##
## The following object is masked from 'package:dplyr':
##
## slice
library(stargazer)
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
# Needs to be adapted depending on person looking at the code
setwd("/Users/annagrether/Desktop/BA R")
data <- read.csv("~/Desktop/BA R/data.csv", sep=";")
mypath <- tempfile(pattern = "tables", tmpdir="/Users/annagrether/Desktop/BA R", fileext = ".docx")
Cleaning the data
## Columns
data[ , c('external_lfdn', 'tester', 'dispcode', 'lastpage', 'quality', 'browser',
'referer', 'device_type', 'quota', 'quota_assignment', 'quota_rejected_id',
'page_history', 'hflip', 'vflip', 'output_mode', 'javascript', 'flash',
'session_id', 'language', 'cleaned', 'ats', 'datetime', 'date_of_last_access',
'date_of_first_mail', 'rts6683840', 'rts6683939', 'rts6683945', 'rts6683978',
'rts6683997', 'rts6684002', 'rts6684005', 'rts6689118')] <- list(NULL)
## Rows/participants
Only participants that haven’t already been removed in the previous rows get removed in the rows. So if a participant has been removed in the first row because a senseless answer was given in question “A3_whydifficulty”, the same person will not appear in the row with duration selecting even if the duration was bellow 2 minutes. This is because the same person can’t be removed twice. In the end 152 from 256 observations/participants are left.
drops <- c(41, 61, 62, 63, 64, 99, 112, 113, 114, 126, 127, 244, 245, 246, #Observed A3_whydifficulty: senseless answers (eg. Yes) or multiple times the same answers over some rows
129, 130, 131, 132, 133, 135, 136, 137, 138, 139, 141, 142, 143, 144, #Same duration and exact same answers over some rows
145, 147, 148, 149, 150, 151, 154, 155, 156, 157, 159, 160, 161, 163, #Same duration and exact same answers over some rows
165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 207, 208, 210, 211, #Same duration and exact same answers over some rows
212, 214, 215, 216, 219, 220, 221, 222, 224, 225, 226, 227, 229, #Same duration and exact same answers over some rows
197, 198, 199, 205, 206, 209, 213, 217, 218, 223, 228, 230, 231, 232, 242, 243, 244, 249, #Control question (v_163): Answered Yes(1) instead of No (2)
43, 83, 84, 85, 87, 200, 240, #Age over 100 years old. Entry in column "Age" is bellow 1923 or doesn't have a number as an answer
53, 77, 128, 134, 140, 146, 153, 158, 162, 164, 170) #Duration is under 2 minutes and not -1. -1 implies that there was an error in the system.
data <- data[-drops,]
#Renaming of columns
data <- data %>%
dplyr::rename("Economic" = "A1_ec",
"Environmental" = "A1_env",
"Social" = "A1_so",
"Economic_Difficulty" = "A2_ec_difficulty",
"Economic_Action" = "A2_ec_action",
"Economic_Priority" = "A2_ec_priority",
"Environmental_Difficulty" = "A2_env_difficulty",
"Environmental_Action" = "A2_env_action",
"Environmental_Priority" = "A2_env_priority",
"Social_Difficulty" = "A2_so_difficulty",
"Social_Action" = "A2_so_action",
"Social_Priority" = "A2_so_priority",
"Proactive_Steps" = "B1_proactivesteps",
"Impact_of_Reputation" = "B2_ImpactRep",
"Economic_Mindset" = "C1_ec.infl.net0",
"Costs_for_offsetting_up_0" = "C2_0cost",
"Costs_for_offsetting_up_19" = "C2_19cost",
"Costs_for_offsetting_up_39" = "C2_39cost",
"Costs_for_offsetting_up_59" = "C2_59cost",
"Costs_for_offsetting_up_79" = "C2_79cost",
"Costs_for_offsetting_up_100" = "C2_100cost",
"Considering_future_costs" = "C3_futurecost",
"Engagement_of_Project_Managers" = "C4_0ConlevelPM",
"Frequency_usage" = "EY1_freq.usage",
"Experience_of_standard" = "FYuseyes1_experience",
"goals_0" = "EY2N1_0goals",
"goals_9" = "EY2N1_9goals",
"goals_19" = "EY2N1_19goals",
"goals_29" = "EY2N1_29goals",
"goals_39" = "EY2N1_39goals",
"goals_49" = "EY2N1_49goals",
"Tradeoff" = "EY3N2_tradeoff",
"Understanding_tools" = "GY1_levelofunderstanding",
"Sufficiency" = "GY2_sufficient",
"Knowing" = "D1KnowGPM_YN",
"Training" = "TrainingNo",
"Consider_for_future" = "FN1_consider",
"Why_not_consider" = "FN1_whyconsider",
"Improvement_Standard" = "GY3_improvement")
# Add ID column at beginning
data <- tibble::rowid_to_column(data, "ID")
# Defining NAs
data[data == -99 | data == -77 | data == -66] <- NA
data$Economic[data$Economic == 0] <- NA
data$Environmental[data$Environmental == 0] <- NA
data$Social[data$Social == 0] <- NA
data$Economic_Difficulty[data$Economic_Difficulty == 0] <- NA
data$Economic_Action[data$Economic_Action == 0] <- NA
data$Economic_Priority[data$Economic_Priority == 0] <- NA
data$Environmental_Difficulty[data$Environmental_Difficulty == 0] <- NA
data$Environmental_Action[data$Environmental_Action == 0] <- NA
data$Environmental_Priority[data$Environmental_Priority == 0] <- NA
data$Social_Difficulty[data$Social_Difficulty == 0] <- NA
data$Social_Action[data$Social_Action == 0] <- NA
data$Social_Priority[data$Social_Priority == 0] <- NA
data$Proactive_Steps[data$Proactive_Steps == 0] <- NA
data$Impact_of_Reputation[data$Impact_of_Reputation == 0] <- NA
data$Economic_Mindset[data$Economic_Mindset == 0] <- NA
data$Costs_for_offsetting_up_0[data$Costs_for_offsetting_up_0 == 0] <- NA
data$Costs_for_offsetting_up_19[data$Costs_for_offsetting_up_19 == 0] <- NA
data$Costs_for_offsetting_up_39[data$Costs_for_offsetting_up_39 == 0] <- NA
data$Costs_for_offsetting_up_59[data$Costs_for_offsetting_up_59 == 0] <- NA
data$Costs_for_offsetting_up_79[data$Costs_for_offsetting_up_79 == 0] <- NA
data$Costs_for_offsetting_up_100[data$Costs_for_offsetting_up_100 == 0] <- NA
data$Engagement_of_Project_Managers[data$Engagement_of_Project_Managers == 0] <- NA
data$goals_0[data$goals_0 == 0] <- NA
data$goals_9[data$goals_9 == 0] <- NA
data$goals_19[data$goals_19 == 0] <- NA
data$goals_29[data$goals_29 == 0] <- NA
data$goals_39[data$goals_39 == 0] <- NA
data$goals_49[data$goals_49 == 0] <- NA
data$Tradeoff[data$Tradeoff == 0] <- NA
data$Experience_of_standard[data$Experience_of_standard == 0] <- NA
data$Consider_for_future[data$Consider_for_future == 0] <- NA
data$Sector[data$Sector == 0] <- NA
# Recoding data; all 0s are No's and all 1s are Yes'
data$Training <- ifelse(data$Training == 0, 1, 0) # From 0:Training, 1: No training to 1:Training, 0:No training
data$Training <- factor(data$Training, levels = c(0,1), labels = c("No training", "Training"))
data$Consider_for_future <- ifelse(data$Consider_for_future == 3, 0, 1) #From 1: Yes consider and 3:Not consider to 1: Consider, 2: not consider
data$Consider_for_future <- factor(data$Consider_for_future, levels = c(0,1), labels = c("No", "Yes"))
# Writing some values as factors and giving labels
data$Knowing <- factor(data$Knowing,
labels = c("Yes", "No"),
ordered = TRUE)
# 1&2 together und 4&5 together --> because little people at 1 and 5
data <- data %>%
mutate(Proactive_Steps_zsmf = fancycut::fancycut(as.numeric(Proactive_Steps),
Unlikely = "[1, 2]",
Neutral = "3",
Likely = "[4, 5]",
na.bucket = 'missing'))
data$Proactive_Steps_zsmf[data$Proactive_Steps_zsmf == "missing"] <- NA
data$Proactive_Steps_zsmf <- droplevels(data$Proactive_Steps_zsmf)
# Median of duration
summary(data$duration)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.0 448.5 701.0 943.7 1145.2 11123.0
Hypotheses ** H1: Of the three pillars of sustainability, environmental sustainability has arrived the least in project management.
## Data preparation
# Gather columns Economic, Environmental and Social into long format and convert id and pillar into factor variables
# to be able to analyse first question in itself.
data.H1_wide_pillars <- data %>% dplyr::select(ID, Economic:Social)
data.H1_long_pillars <- data.H1_wide_pillars %>%
gather(key = "pillar", value = "pillar_score", Economic, Environmental, Social) %>%
convert_as_factor(ID, pillar)
# Gather columns Economic Difficulty, Economic Action and Economic Priority into long format (-> Economic_DAP) and convert ID and Economic into factor variables + same for Envrionment and Social
data.H1_wide_EconomicDAP <- data %>% dplyr::select(ID, Economic_Difficulty:Economic_Priority)
data.H1_long_EconomicDAP <- data.H1_wide_EconomicDAP %>%
gather(key = "Economic_DAP", value = "rank_ec", Economic_Difficulty, Economic_Action, Economic_Priority) %>%
convert_as_factor(ID, Economic_DAP)
data.H1_wide_EnvironmentalDAP <- data %>% dplyr::select(ID, Environmental_Difficulty:Environmental_Priority)
data.H1_long_EnvironmentalDAP <- data.H1_wide_EnvironmentalDAP %>%
gather(key = "Environment_DAP", value = "rank_env", Environmental_Difficulty, Environmental_Action, Environmental_Priority) %>%
convert_as_factor(ID, Environment_DAP)
data.H1_wide_SocialDAP <- data %>% dplyr::select(ID, Social_Difficulty:Social_Priority)
data.H1_long_SocialDAP <- data.H1_wide_SocialDAP %>%
gather(key = "Social_DAP", value = "rank_so", Social_Difficulty, Social_Action, Social_Priority) %>%
convert_as_factor(ID, Social_DAP)
# Bringing all three data sets DAP together
data.H1_long_pillars <- lsr::sortFrame(data.H1_long_pillars, ID)
data.H1_long_EconomicDAP <- lsr::sortFrame(data.H1_long_EconomicDAP, ID)
data.H1_long_EnvironmentalDAP <- lsr::sortFrame(data.H1_long_EnvironmentalDAP, ID)
data.H1_long_SocialDAP <- lsr::sortFrame(data.H1_long_SocialDAP, ID)
data.H1_long <- cbind(data.H1_long_pillars,data.H1_long_EconomicDAP[,-1], data.H1_long_EnvironmentalDAP[,-1], data.H1_long_SocialDAP[,-1])
# Removing unnecessary data frames
rm(data.H1_wide_EconomicDAP, data.H1_long_EconomicDAP, data.H1_wide_EnvironmentalDAP, data.H1_long_EnvironmentalDAP, data.H1_wide_SocialDAP, data.H1_long_SocialDAP, data.H1_long_pillars, data.H1_wide_pillars)
# Making factors and labels
data.H1_long$pillar_score <- factor(data.H1_long$pillar_score, labels = c("Very little extent", "Little extent", "Neutral", "Large extent", "Very large extent"),
ordered = TRUE)
Question1
## Descriptive Statistics
# Summary tables with median, min, max, NAs, Q1 etc. (not mean and sd bc ordinal data)
round(mosaic::favstats(~ Economic, data = data), 2) #missing: NAs
## min Q1 median Q3 max mean sd n missing
## 1 3 4 4 5 3.53 0.89 149 3
round(mosaic::favstats(~ Environmental, data = data), 2)
## min Q1 median Q3 max mean sd n missing
## 1 3 3 4 5 3.18 0.97 148 4
round(mosaic::favstats(~ Social, data = data), 2)
## min Q1 median Q3 max mean sd n missing
## 1 3 3 4 5 3.3 0.94 148 4
# Amount of people that answered what (very large to very little extent) per pillar: Table
table(data.H1_long$pillar, data.H1_long$pillar_score)
##
## Very little extent Little extent Neutral Large extent
## Economic 3 14 50 65
## Environmental 7 25 64 39
## Social 4 24 58 48
##
## Very large extent
## Economic 17
## Environmental 13
## Social 14
# 1&2 together und 4&5 together --> because little people at 1 and 5
data.H1_long <- data.H1_long %>%
mutate(pillar_score_zsmf = fancycut::fancycut(as.numeric(pillar_score),
Little_extent = "[1, 2]",
Neutral = "3",
Large_extent = "[4, 5]",
na.bucket = 'missing'))
data.H1_long$pillar_score_zsmf[data.H1_long$pillar_score_zsmf == "missing"] <- NA
data.H1_long$pillar_score_zsmf <- droplevels(data.H1_long$pillar_score_zsmf)
# Visualization of amount of answers: Stacked barplot for percentage of answers in "pillars" per "score" (extent of being addressed)
data.H1_long %>% filter(!is.na(pillar) & !is.na(pillar_score)) %>%
ggplot(aes(x = factor(pillar), fill = factor(6-as.numeric(pillar_score)))) +
geom_bar(position = "fill") +
labs(x = "Pillar", fill = "Extent of being addressed") +
# geom_text(aes(label=scales::percent(as.numeric(pillar))), position=position_stack(vjust=0.5), size=2) +
scale_fill_discrete(labels = c("very large extent", "4", "neutral", "2", "very little extent")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
theme_classic() + #theme_(bw, minimal): background
ggtitle("The extent to which each pillar of sustainability\
is addressed in PM of the organization") +
theme(text = element_text(size = 14))
## Connection with H2
# Give labels to Proactive steps in data
data$Proactive_Steps <- factor(data$Proactive_Steps,
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
# Is there correlation between the first statement of how likely it is that the environmental pillar of sustainability is addressed and to what extent Proactive steps are taken towards carbon neutrality?
A moderate to strong correlation btw the extent to which environemntal sustainability is addresses and likelihood a company takes steps towards carbon neutrality. –> makes sense. Weak correlations for the other 2.
cor.test(as.numeric(data$Proactive_Steps), data$Environmental, method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 277254, p-value = 3.23e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.465446
cor.test(as.numeric(data$Proactive_Steps), data$Economic, method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 348581, p-value = 2.298e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3415498
cor.test(as.numeric(data$Proactive_Steps), data$Social, method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 332164, p-value = 8.293e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3595797
# Plot on "to what extent company takes proactive steps to reach carbon neutrality and the likelihood that it addresses Environmental sustainability"
data %>% filter(!is.na(Proactive_Steps) & !is.na(Environmental)) %>%
ggplot(aes(x = factor(Proactive_Steps), fill = factor(Environmental))) +
geom_bar(position = "fill") +
labs(x = "Proactive Steps", fill = "Environmental") +
scale_fill_discrete(labels = c("very unlikely", "2", "3", "4", "very likely")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
ggtitle("Proactive steps taken with likelihood of addressing\
environmental sustainability") +
theme(text = element_text(size = 14)) +
theme_classic()
## Regression analysis
# Friedmann test: Is there a significant difference between the tendencies of multiple dependent variables?
H0: No association between the two variables/variables are the same –> No relationship between the pillar of sustainability and extent to which it has arrived in PM. H1: There is a significant relationship.
missings <- data %>% dplyr::filter(is.na(Economic) | is.na(Environmental) |is.na(Social)) # missing people out; necessary for kendallW
stats::friedman.test(y=as.matrix(data %>% dplyr::select(Economic:Social)), groups = data$ID)
##
## Friedman rank sum test
##
## data: as.matrix(data %>% dplyr::select(Economic:Social))
## Friedman chi-squared = 22.665, df = 2, p-value = 1.198e-05
# Effect size + Friedmann
DescTools::KendallW(t(data %>% dplyr::filter(!ID %in% missings$ID) %>% dplyr::select(Economic:Social)), correct = TRUE, test = TRUE)
##
## Kendall's coefficient of concordance Wt
##
## data: t(data %>% dplyr::filter(!ID %in% missings$ID) %>% dplyr::select(Economic:Social))
## Kendall chi-squared = 22.665, df = 2, subjects = 3, raters = 148,
## p-value = 1.198e-05
## alternative hypothesis: Wt is greater 0
## sample estimates:
## Wt
## 0.07657073
rcompanion::kendallW(t(data %>% dplyr::filter(!ID %in% missings$ID) %>% dplyr::select(Economic:Social)), correct = TRUE, ci=TRUE) #mit CI -- the effect size lies in the CI with 95% confidence --> not necessary
## W lower.ci upper.ci
## 1 0.0766 0.0277 0.145
Effect size is 0.0766: small effect Effect size, tells me how much the difference btw the groups is –> depending on the source/test (kendallW) it is low, medium or high
# Posthoc to determine where the difference is
pairwise.wilcox.test(as.numeric(data.H1_long$pillar_score), data.H1_long$pillar,paired = T, p.adjust.method = "holm")
##
## Pairwise comparisons using Wilcoxon signed rank test with continuity correction
##
## data: as.numeric(data.H1_long$pillar_score) and data.H1_long$pillar
##
## Economic Environmental
## Environmental 0.00027 -
## Social 0.00921 0.21750
##
## P value adjustment method: holm
# Effect size for posthoc test
coin::wilcoxsign_test(data$Economic ~ data$Environmental)@statistic@teststatistic/sqrt(148)
## [1] 0.3470781
coin::wilcoxsign_test(data$Economic ~ data$Social)@statistic@teststatistic/sqrt(148)
## [1] 0.2696815
coin::wilcoxsign_test(data$Social ~ data$Environmental)@statistic@teststatistic/sqrt(148)
## [1] 0.1001237
# Hypothesis test: Proportion test
Such that the pillar of sustainability has arrived in PM, at least 51% of participants must have chosen a score of 4 (large extent) or above.
# 1. Data preparation
data$economic_arrived <- ifelse(data$Economic >= 4, 1, 0) #0:not arrived, 1:arrived
data$economic_arrived <- factor(data$economic_arrived, levels = c(0,1), labels = c("Not arrived", "Arrived"))
data$environmental_arrived <- ifelse(data$Environmental >= 4, 1, 0)
data$environmental_arrived <- factor(data$environmental_arrived, levels = c(0,1), labels = c("Not arrived", "Arrived"))
data$social_arrived <- ifelse(data$Social >= 4, 1, 0)
data$social_arrived <- factor(data$social_arrived, levels = c(0,1), labels = c("Not arrived", "Arrived"))
# 2. Looking at amount of answers of sustainability "arrived" and "not arrived" in the pillars.
# Table putting all 3 relative amounts of arrived, not arrived of the 3 pillars together
table1 <- data.table::as.data.table(epiDisplay::tab1(data$economic_arrived, graph = FALSE, cum.percent = TRUE))
table1 <- table1[c(1,2), c(1)]
colnames(table1) <- "Arrival in PM"
table1[1,] <- "Not arrived"
table1[2,] <- "Arrived"
table1_ec <- data.table::as.data.table(epiDisplay::tab1(data$economic_arrived, graph = FALSE, cum.percent = TRUE))
table1_ec <- table1_ec[1:2, 5]
colnames(table1_ec) <- "Economic"
table1 <- cbind(table1, table1_ec)
table1_env <- data.table::as.data.table(epiDisplay::tab1(data$environmental_arrived, graph = FALSE, cum.percent = TRUE))
table1_env <- table1_env[1:2, 5]
colnames(table1_env) <- "Environmental"
table1 <- cbind(table1, table1_env)
table1_so <- data.table::as.data.table(epiDisplay::tab1(data$social_arrived, graph = FALSE, cum.percent = TRUE))
table1_so <- table1_so[1:2, 5]
colnames(table1_so) <- "Social"
table1 <- cbind(table1, table1_so)
table1 <- adorn_totals(table1, where = "row", name = "Total [%]")
ft1 <- flextable(table1)
ft1 <- flextable::colformat_double(ft1, digits = 1)
ft1 <- bold(ft1, bold=T, part = "header")
flextable::autofit(ft1)
Arrival in PM | Economic | Environmental | Social |
|---|---|---|---|
Not arrived | 45.0 | 64.9 | 58.1 |
Arrived | 55.0 | 35.1 | 41.9 |
Total [%] | 100.0 | 100.0 | 100.0 |
prop.test(x = 83, n = 153, p = 0.5, alternative = "greater")
##
## 1-sample proportions test with continuity correction
##
## data: 83 out of 153
## X-squared = 0.94118, df = 1, p-value = 0.166
## alternative hypothesis: true p is greater than 0.5
## 95 percent confidence interval:
## 0.4728247 1.0000000
## sample estimates:
## p
## 0.5424837
Not significant; Not more than 50% of the people have answered more than 4 significantly –> Economic sustainability hasn’t arrived in PM with the definition given Just because it’s not significant, it doesn’t necessarily mean that Economic sustainability really hasn’t arrived in PM. It just couldn’t be shown that significantly more than half of the participants chose 4 (large extent) or 5 (very large extent). Still more than half chose 4 or higher. It just isn’t significant.
**
Questions 2,3,4
##Descriptive Statistics
#Amount of people that answered what (very large to very little extent) per pillar: Table
Difficulty: 1easiet,3most difficult, Action: 1least done, 3most done, Priority: 1highest, 3lowest
table(data.H1_long$Economic_DAP, data.H1_long$rank_ec)
##
## 1 2 3
## Economic_Action 19 67 54
## Economic_Difficulty 57 69 16
## Economic_Priority 66 54 23
table(data.H1_long$Environment_DAP, data.H1_long$rank_env)
##
## 1 2 3
## Environmental_Action 43 75 22
## Environmental_Difficulty 30 74 43
## Environmental_Priority 41 70 30
table(data.H1_long$Social_DAP, data.H1_long$rank_so)
##
## 1 2 3
## Social_Action 46 75 22
## Social_Difficulty 33 77 34
## Social_Priority 35 65 42
#Frequency tables in percentages
(table.Economic_DAP <- round(prop.table(table(data.H1_long$Economic_DAP, data.H1_long$rank_ec), 1), 4)*100)
##
## 1 2 3
## Economic_Action 13.57 47.86 38.57
## Economic_Difficulty 40.14 48.59 11.27
## Economic_Priority 46.15 37.76 16.08
(table.Environment_DAP <- round(prop.table(table(data.H1_long$Environment_DAP, data.H1_long$rank_env), 1), 4)*100)
##
## 1 2 3
## Environmental_Action 30.71 53.57 15.71
## Environmental_Difficulty 20.41 50.34 29.25
## Environmental_Priority 29.08 49.65 21.28
(table.Social_DAP <- round(prop.table(table(data.H1_long$Social_DAP, data.H1_long$rank_so), 1), 4)*100)
##
## 1 2 3
## Social_Action 32.17 52.45 15.38
## Social_Difficulty 22.92 53.47 23.61
## Social_Priority 24.65 45.77 29.58
Visualization of amount of answers: Stacked barplot for percentage of answers in “pillars” per “Question of action, difficulty and priority” and their rank “Minimal” summarizes easiest to achieve, lowest priority, and least action taken “Maximal” summarizes hardest to achieve, highest priority, and most action taken.
g1 <- data.H1_long %>% filter(!is.na(Economic_DAP) & !is.na(rank_ec)) %>%
ggplot(aes(x = factor(Economic_DAP), fill = factor(4-as.numeric(rank_ec)))) +
geom_bar(position = "fill") +
labs(x = "Economic", fill = "Rank") +
scale_fill_discrete(labels = c("maximal", "2", "minimal")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
scale_x_discrete(labels = c("Priority", "Difficulty", "Action")) +
scale_fill_manual(values = c("#F8766D","#00C08D", "#619CFF")) +
theme_minimal() + #theme_(bw, minimal): background
theme(axis.text.x = element_blank(), axis.title.x = element_blank()) +
ggtitle("Ranking of each pillar in their priority, difficulty\
and the action being taken to be achieved") +
theme(text = element_text(size = 14)) +
coord_flip()
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
g2 <- data.H1_long %>% filter(!is.na(Environment_DAP) & !is.na(rank_env)) %>%
ggplot(aes(x = factor(Environment_DAP), fill = factor(4-as.numeric(rank_env)))) +
geom_bar(position = "fill") +
labs(x = "Environmental", fill = "Rank") +
scale_fill_discrete(labels = c("maximal", "2", "minimal")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
scale_x_discrete(labels = c("Priority", "Difficulty", "Action")) +
scale_fill_manual(values = c("#F8766D","#00C08D", "#619CFF")) +
theme_minimal() + #theme_(bw, minimal): background
theme(axis.text.x = element_blank(), axis.title.x = element_blank()) +
theme(text = element_text(size = 14)) +
coord_flip()
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
g3 <- data.H1_long %>% filter(!is.na(Social_DAP) & !is.na(rank_so)) %>%
ggplot(aes(x = factor(Social_DAP), fill = factor(4-as.numeric(rank_so)))) +
geom_bar(position = "fill") +
labs(x = "Social", fill = "Rank") +
scale_fill_discrete(labels = c("maximal", "2", "minimal")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
scale_x_discrete(labels = c("Priority", "Difficulty", "Action")) +
scale_fill_manual(values = c("#F8766D","#00C08D", "#619CFF")) +
theme_minimal() + #theme_(bw, minimal): background
theme(text = element_text(size = 14)) +
coord_flip()
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
plot_grid(g1, g2, g3, align = "v", ncol = 1)
Question 5
# Making data frame
#df.open1 <- dplyr::select(data, Economic_Difficulty:A3_whydifficulty)
#df.open1 <- na.omit(df.open1)
#df.open1 <- as.data.table(df.open1)
# Export as an excel file
# write.xlsx(df.open1, file = "df.open1.xlsx", overwrite = TRUE)
H2: The higher the implied temperature rise (ITR) of an industry group, the more likely it is that project management within that industry group takes action to reach carbon neutrality.
## Data preparation
ITR <- read_excel("ITR by GICS industry group.xlsx")
ITR <- cbind(n = 1:nrow(ITR), ITR)
data.H2 <- dplyr::select(data, ID, Sector, Proactive_Steps, Proactive_Steps_zsmf, Impact_of_Reputation)
data.H2 <- data.H2 %>%
dplyr::mutate(ITR = case_when(Sector == 1 ~ '6.0', Sector == 2 ~ '3.7', Sector == 3 ~ '3.5',
Sector == 4 ~ '2.9', Sector == 5 ~ '2.8', Sector == 6 ~ '2.5',
Sector == 7 ~ '2.2', Sector == 8 ~ '2.2', Sector == 9 ~ '2.2',
Sector == 10 ~ '2.0', Sector == 11 ~ '1.9', Sector == 12 ~ '1.9',
Sector == 13 ~ '1.9', Sector == 14 ~ '1.8', Sector == 15 ~ '1.8',
Sector == 16 ~ '1.8', Sector == 17 ~ '1.7', Sector == 18 ~ '1.6',
Sector == 19 ~ '1.6', Sector == 20 ~ '1.6', Sector == 21 ~ '1.4',
Sector == 22 ~ '1.4', Sector == 23 ~ '1.4', Sector == 24 ~ '1.4'))
data.H2$ITR <- as.numeric(data.H2$ITR)
data.H2$Proactive_Steps <- factor(data.H2$Proactive_Steps,
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
data.H2$Impact_of_Reputation <- factor(data.H2$Impact_of_Reputation, labels = c("not at all important", "low importance", "neutral", "very important", "extremely important"),
ordered = TRUE)
Question1
## Descriptive Statistics
# Frequency table with median
epiDisplay::tab1(data.H2$Proactive_Steps, graph = FALSE, cum.percent = TRUE) #percentages more relevant
## data.H2$Proactive_Steps :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## very unlikely 9 5.9 5.9 6.0 6.0
## unlikely 18 11.8 17.8 12.0 18.0
## neutral 48 31.6 49.3 32.0 50.0
## likely 55 36.2 85.5 36.7 86.7
## very likely 20 13.2 98.7 13.3 100.0
## NA's 2 1.3 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
# Amount&Percentage of people in Proactive Steps
epiDisplay::tab1(data.H2$Proactive_Steps_zsmf, graph = FALSE, cum.percent = TRUE)
## data.H2$Proactive_Steps_zsmf :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## Unlikely 27 17.8 17.8 18 18
## Neutral 48 31.6 49.3 32 50
## Likely 75 49.3 98.7 50 100
## NA's 2 1.3 100.0 0 100
## Total 152 100.0 100.0 100 100
# Present epidisplay nicely
t.H2.1 <- as.data.table(epiDisplay::tab1(data.H2$Proactive_Steps, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H2.1 <- t.H2.1[,c(2,3,4,6,7)]
colnames(t.H2.1) <- c("Category", "Frequency", "%", "valid %", "cum. valid %")
t.H2.1.ft <- flextable(t.H2.1)
t.H2.1.ft <- bold(t.H2.1.ft, bold=T, part = "header")
autofit(t.H2.1.ft)
Category | Frequency | % | valid % | cum. valid % |
|---|---|---|---|---|
very unlikely | 9 | 5.9 | 6.0 | 6.0 |
unlikely | 18 | 11.8 | 12.0 | 18.0 |
neutral | 48 | 31.6 | 32.0 | 50.0 |
likely | 55 | 36.2 | 36.7 | 86.7 |
very likely | 20 | 13.2 | 13.3 | 100.0 |
NA's | 2 | 1.3 | 0.0 | 100.0 |
Total | 152 | 100.0 | 100.0 | 100.0 |
# Number of answers in a barplot
ggplot(data = subset(data.H2, !is.na(Proactive_Steps)), aes(x = factor(Proactive_Steps))) +
scale_x_discrete(name = "Proactive Steps") +
geom_bar(aes(y=after_stat(count)/sum(after_stat(count))), fill = "#00C08D") +
scale_y_continuous(labels=scales::percent, limits = c(0,0.5)) +
ggtitle("Number of participants with increasing likelihood of taking\
proactive steps towards carbon neutrality") +
labs(y = "Percent") +
theme_light()
# Dataframe including Sector 14 which has 0 answers
sector_id <- list(Sector = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24))
sector_labels <- list(name = c("Energy", "Materials", "Automobiles & Components", "Utilities", "Capital Goods", "Food Beverage & Tobacco", "Semiconductors & Semiconductor Equipment",
"Consumber Durables & Apparel", "Consumer Discretionary Distribution & Retail", "Consumer Staples Distribution & Retail", "Banks", "Transportation",
"Technology Hardware & Equipment", "Equity Real Estate Investment Trusts (REITs)", "Consumer Services", "Financial Services", "Commercial & Professional Services", "Household & Personal Products",
"Health Care Equipment & Services", "Pharmaceuticals, Biotechnology & Life Sciences", "Media & Entertainment", "Telecommunication Services", "Software & Services", "Insurance"))
# Including Sector 14 into the barplot
Sector_df1 <- data.frame(sector_id, sector_labels)
Sector_df2 <- data.H2 %>%
group_by(Sector) %>%
dplyr::summarize(count = n())
Sector_df <- full_join(Sector_df1, Sector_df2, by = "Sector")
Sector_df[14, 3] <- 0
Sector_df <- na.omit(Sector_df)
Sector_df$name <- factor(Sector_df$name, levels = Sector_df$name)
At least 1 answer per sector except in sector 14 (Equity Real Estate Investment Trusts (REITs)). Most answers in the ‘Banks’ and ‘Financial services’ sector.
ggplot(data = Sector_df, aes(y = count, x = name)) +
scale_x_discrete(name = "Industry Sector") +
scale_y_continuous(name = "Count") +
geom_bar(stat = "identity", fill = "#00C08D") +
ggtitle("Number of participants per industry sector") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust=1),
plot.title = element_text(hjust=0.0)) +
theme(text = element_text(size = 14))
table(data.H2$Sector)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 15 16 17 18 19 20 21 22 23 24
## 10 10 8 11 13 10 8 2 1 2 16 2 4 4 14 10 1 3 8 4 1 4 1
# Looking at ITRs
round(favstats(~ ITR, data = data.H2), 2)
## min Q1 median Q3 max mean sd n missing
## 1.4 1.8 2 2.9 6 2.52 1.15 147 5
# Boxplot: ITR per answer of Proactive steps
data.H2 %>% filter(!is.na(Proactive_Steps)) %>% #!is.na: nicht NAs kommen weiter
ggplot(aes(y = Proactive_Steps, x = ITR, fill = Proactive_Steps)) + #data.H2 darf ich hier nicht auch noch stehen haben
labs(x = "ITR [°C]", y = "Proactive Steps") +
scale_fill_manual(values = c("#E76BF3","#619CFF", "#00C08D", "#7CAE00", "#F8766D"), guide = guide_legend(reverse = TRUE)) +
theme_minimal() +
theme(text = element_text(size = 14)) +
ggtitle("ITR per category of Proactive Steps") +
geom_boxplot()
## Warning: Removed 4 rows containing non-finite values (`stat_boxplot()`).
# Spearman's correlation between ITR and likelihood of steps being taken towards carbon neutrality
Very weak non-significant correlations
cor.test(as.numeric(data.H2$Proactive_Steps), as.numeric(data.H2$ITR), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 492853, p-value = 0.5508
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.04976658
cor.test(as.numeric(data.H2$Proactive_Steps_zsmf), as.numeric(data.H2$ITR), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 457897, p-value = 0.159
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1171627
## Regression Analysis
# OLR
H2.OLR <- polr(Proactive_Steps_zsmf ~ ITR, data = data.H2, Hess = TRUE)
summary(H2.OLR)
## Call:
## polr(formula = Proactive_Steps_zsmf ~ ITR, data = data.H2, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## ITR 0.2908 0.1543 1.885
##
## Intercepts:
## Value Std. Error t value
## Unlikely|Neutral -0.8705 0.4279 -2.0346
## Neutral|Likely 0.7336 0.4179 1.7555
##
## Residual Deviance: 292.3737
## AIC: 298.3737
## (6 observations deleted due to missingness)
## Residual analysis
# summary table
summary_table_H2.OLR <- coef(summary(H2.OLR))
pval1 <- round(pnorm(abs(summary_table_H2.OLR[, "t value"]),lower.tail = FALSE)* 2,3)
summary_table_H2.OLR <- cbind(summary_table_H2.OLR, "p value" = pval1) #p-value: 0.059 (just) not significant
summary_table_H2.OLR #only p-value of IV (ITR) important; p > 0.05: no significant correlation btw. ITR and Proactive steps. --> just not significant
## Value Std. Error t value p value
## ITR 0.2907705 0.1542909 1.884560 0.059
## Unlikely|Neutral -0.8705083 0.4278597 -2.034565 0.042
## Neutral|Likely 0.7335940 0.4178897 1.755473 0.079
pval1 <- pval1[1:5]
cbind(exp(cbind(OR = coef(H2.OLR), ci = confint(H2.OLR))),"p value" = pval1) #confidence interval, OR (odds ratio): When ITR gets higher by 1°C, the odds that we get a higher value (more likely) for Proactive Steps by 1.34 times. --> Odds increase by 34%
## Waiting for profiling to be done...
## Warning in cbind(exp(cbind(OR = coef(H2.OLR), ci = confint(H2.OLR))), `p value`
## = pval1): number of rows of result is not a multiple of vector length (arg 2)
## OR ci p value
## 2.5 % 1.337458 1.000437 0.059
## 97.5 % 1.337458 1.844209 0.042
brant::brant(H2.OLR) #not significant --> which is good!! --> because H0: Requirements of parallel regression assumption met
## --------------------------------------------
## Test for X2 df probability
## --------------------------------------------
## Omnibus 0.26 1 0.61
## ITR 0.26 1 0.61
## --------------------------------------------
##
## H0: Parallel Regression Assumption holds
The correlation betwen the ITR and Proactive steps is just not significant. When the ITR(EV) rises by 1°C (1 unit) then the chance for a 1 unit increase in proactive steps increases by (1.34 times) 34% (OR = 1.34, p = 0.059).
**
Question 2
## Descriptive Statistics
# Frequency table
epiDisplay::tab1(data.H2$Impact_of_Reputation, graph = FALSE, cum.percent = TRUE)
## data.H2$Impact_of_Reputation :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## not at all important 3 2.0 2.0 2.0 2.0
## low importance 6 3.9 5.9 4.0 6.0
## neutral 56 36.8 42.8 37.1 43.0
## very important 66 43.4 86.2 43.7 86.8
## extremely important 20 13.2 99.3 13.2 100.0
## NA's 1 0.7 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
# Present epidisplay nicely
t.H2.2 <- as.data.table(epiDisplay::tab1(data.H2$Impact_of_Reputation, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H2.2 <- t.H2.2[,c(2,3,4,6,7)]
colnames(t.H2.2) <- c("Category", "Frequency", "%", "valid %", "cum. valid %")
t.H2.2.ft <- flextable(t.H2.2)
t.H2.2.ft <- bold(t.H2.2.ft, bold=T, part = "header")
autofit(t.H2.2.ft)
Category | Frequency | % | valid % | cum. valid % |
|---|---|---|---|---|
not at all important | 3 | 2.0 | 2.0 | 2.0 |
low importance | 6 | 3.9 | 4.0 | 6.0 |
neutral | 56 | 36.8 | 37.1 | 43.0 |
very important | 66 | 43.4 | 43.7 | 86.8 |
extremely important | 20 | 13.2 | 13.2 | 100.0 |
NA's | 1 | 0.7 | 0.0 | 100.0 |
Total | 152 | 100.0 | 100.0 | 100.0 |
# Overview over the number of answers per point on the likert scale "Impact of Reputation"
ggplot(data = subset(data.H2, !is.na(Impact_of_Reputation)), aes(Impact_of_Reputation)) +
scale_x_discrete(name = "Impact of Reputation") +
geom_bar(fill = "#00C08D") +
ggtitle("Number of answers with increasing importance of reputation") +
theme(text = element_text(size = 14)) +
theme_light()
# Spearman's correlation between reputation and likelihood of steps being taken towards carbon neutrality
A rather moderate correlation
cor.test(as.numeric(data.H2$Impact_of_Reputation), as.numeric(data.H2$Proactive_Steps), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 321811, p-value = 1.291e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4162688
H3: The higher the likelihood that a company considers the long-term costs because of the anticipation of having to offset CO2 emissions, the more likely it is that project management within the company will take action to reach carbon neutrality.
## Data preparation
# Making data frame for H3
data.H3_wide <- dplyr::select(data, ID, Proactive_Steps, Proactive_Steps_zsmf, Economic_Mindset,
Costs_for_offsetting_up_0, Costs_for_offsetting_up_19, Costs_for_offsetting_up_39, Costs_for_offsetting_up_59, Costs_for_offsetting_up_79, Costs_for_offsetting_up_100,
Considering_future_costs, Engagement_of_Project_Managers)
# Defining variables as factors with labels
data.H3_wide$Economic_Mindset <- factor(data.H3_wide$Economic_Mindset,
labels = c("very little extent", "little extent", "neutral", "large extent", "very large extent"),
ordered = TRUE)
data.H3_wide$Proactive_Steps <- factor(data.H3_wide$Proactive_Steps,
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
data.H3_wide$Considering_future_costs <- factor(data.H3_wide$Considering_future_costs,
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
data.H3_wide$Engagement_of_Project_Managers <- factor(data.H3_wide$Engagement_of_Project_Managers,
labels = c("very little extent", "little extent", "neutral", "large extent", "very large extent"),
ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_0 <- factor(data.H3_wide$Costs_for_offsetting_up_0,
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_19 <- factor(data.H3_wide$Costs_for_offsetting_up_19,
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_39 <- factor(data.H3_wide$Costs_for_offsetting_up_39,
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_59 <- factor(data.H3_wide$Costs_for_offsetting_up_59,
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_79 <- factor(data.H3_wide$Costs_for_offsetting_up_79,
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
data.H3_wide$Costs_for_offsetting_up_100 <- factor(data.H3_wide$Costs_for_offsetting_up_100,
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
Question 1
##Descriptive
# Frequency table
epiDisplay::tab1(data.H3_wide$Economic_Mindset, graph = FALSE, cum.percent = TRUE)
## data.H3_wide$Economic_Mindset :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## very little extent 5 3.3 3.3 3.3 3.3
## little extent 25 16.4 19.7 16.7 20.0
## neutral 48 31.6 51.3 32.0 52.0
## large extent 57 37.5 88.8 38.0 90.0
## very large extent 15 9.9 98.7 10.0 100.0
## NA's 2 1.3 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
# Present epidisplay nicely
t.H3.1 <- as.data.table(epiDisplay::tab1(data.H3_wide$Economic_Mindset, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H3.1 <- t.H3.1[,c(2,3,4,6,7)]
colnames(t.H3.1) <- c("Category", "Frequency", "%", "valid %", "cum. valid %")
t.H3.1.ft <- flextable(t.H3.1)
t.H3.1.ft <- bold(t.H3.1.ft, bold=T, part = "header")
autofit(t.H3.1.ft)
Category | Frequency | % | valid % | cum. valid % |
|---|---|---|---|---|
very little extent | 5 | 3.3 | 3.3 | 3.3 |
little extent | 25 | 16.4 | 16.7 | 20.0 |
neutral | 48 | 31.6 | 32.0 | 52.0 |
large extent | 57 | 37.5 | 38.0 | 90.0 |
very large extent | 15 | 9.9 | 10.0 | 100.0 |
NA's | 2 | 1.3 | 0.0 | 100.0 |
Total | 152 | 100.0 | 100.0 | 100.0 |
# Overview over the number of answers per point on the likert scale "Economic_Mindset"
ggplot(data = subset(data.H3_wide, !is.na(Economic_Mindset)), aes(Economic_Mindset)) +
scale_x_discrete(name = "Influence of economic mindset") +
geom_bar(fill = "#00C08D") +
ggtitle("Number of answers per category of the variable 'economic mindset'") +
theme(text = element_text(size = 14)) +
theme_light()
# Spearman's correlation between the extent of influence of economic sustainability mindset on decision-making processes related to carbon neutrality and likelihood of steps being taken towards carbon neutrality.
The more the economic sust. mindset is influencing decision making processes regarding carbon neutrality, the more likely they will take steps towards carbon neutrality to prevent future costs.
cor.test(as.numeric(data.H3_wide$Economic_Mindset), as.numeric(data.H3_wide$Proactive_Steps), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 318329, p-value = 7.939e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4225846
0.422: Moderate to strong correlation (depending on source)
# Visualization of numbwer of answers: Stacked barplot for percentage of answers of likelihood of Proactive steps per points on scale of "Economic mindset".
data.H3_wide %>% filter(!is.na(Economic_Mindset) & !is.na(Proactive_Steps)) %>%
ggplot(aes(x = factor(Economic_Mindset), fill = factor(6-as.numeric(Proactive_Steps)))) + #x: EV, fill: ZV
geom_bar(position = "fill") +
labs(x = "Economic Mindset", fill = "Proactive Steps") +
scale_fill_discrete(labels = c("very likely", "4", "3", "2", "very unlikely")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
theme_minimal() + #theme_(bw, minimal): background
theme(text = element_text(size = 14)) +
ggtitle("Influence of the economic mindset on proactive steps\
towards carbon neutrality")
Question 2
## Data preparation
# Costs for offsetting into long format
data.H3_long <- data.H3_wide %>%
gather(key = "Offsetting", value = "score",
Costs_for_offsetting_up_0, Costs_for_offsetting_up_19, Costs_for_offsetting_up_39,
Costs_for_offsetting_up_59, Costs_for_offsetting_up_79, Costs_for_offsetting_up_100) %>%
convert_as_factor(ID, Offsetting)
data.H3_long$score <- factor(data.H3_long$score, levels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
labels = c("very unlikely", "unlikely", "neutral", "likely", "very likely"),
ordered = TRUE)
# Levels as factors and in the right order
data.H3_long$Offsetting <- factor(data.H3_long$Offsetting, levels=c("Costs_for_offsetting_up_0", "Costs_for_offsetting_up_19", "Costs_for_offsetting_up_39", "Costs_for_offsetting_up_59", "Costs_for_offsetting_up_79", "Costs_for_offsetting_up_100"))
## Descriptive
#Frequency table in percentages
(table.offsetting <- round(prop.table(table(data.H3_long$Offsetting, data.H3_long$score), 1), 4)*100)
##
## very unlikely unlikely neutral likely very likely
## Costs_for_offsetting_up_0 10.07 11.41 28.19 20.81 29.53
## Costs_for_offsetting_up_19 7.48 8.84 29.93 36.05 17.69
## Costs_for_offsetting_up_39 7.04 11.97 38.73 32.39 9.86
## Costs_for_offsetting_up_59 10.14 16.22 32.43 28.38 12.84
## Costs_for_offsetting_up_79 18.00 22.00 21.33 24.67 14.00
## Costs_for_offsetting_up_100 31.72 12.41 19.31 16.55 20.00
table.offsetting <- as.data.frame(table.offsetting)
table.offsetting <- pivot_wider(table.offsetting, names_from = Var2, values_from = Freq)
table.offsetting$Var1 <- car::recode(table.offsetting$Var1, "'Costs_for_offsetting_up_0'='Up 0%';'Costs_for_offsetting_up_19'='Up 19%'; 'Costs_for_offsetting_up_39'='Up 39%';'Costs_for_offsetting_up_59'='Up 59%';'Costs_for_offsetting_up_79'='Up 79%';'Costs_for_offsetting_up_100'='Up 100%'")
# Looking at amount of answers of the points on the likert scale (likelihood of taking action to reach carbon neutrality).
# Table with increasing costs and likelihood
colnames(table.offsetting) <- c("Mandatory costs for Offsetting", "very unlikely", "unlikely", "neutral", "likely", "very likely")
ft2 <- flextable(table.offsetting)
ft2 <- flextable::colformat_double(ft2, digits = 1)
ft2 <- bold(ft2, bold=T, part = "header")
flextable::autofit(ft2)
Mandatory costs for Offsetting | very unlikely | unlikely | neutral | likely | very likely |
|---|---|---|---|---|---|
Up 0% | 10.1 | 11.4 | 28.2 | 20.8 | 29.5 |
Up 19% | 7.5 | 8.8 | 29.9 | 36.0 | 17.7 |
Up 39% | 7.0 | 12.0 | 38.7 | 32.4 | 9.9 |
Up 59% | 10.1 | 16.2 | 32.4 | 28.4 | 12.8 |
Up 79% | 18.0 | 22.0 | 21.3 | 24.7 | 14.0 |
Up 100% | 31.7 | 12.4 | 19.3 | 16.6 | 20.0 |
# Stacked barplot for percentage of answers of likelihood of company taking action to reach carbon neutrality per increasing costs for offsetting emissions.
data.H3_long %>% filter(!is.na(Offsetting) & !is.na(score)) %>%
ggplot(aes(x = factor(Offsetting), fill = factor(score))) +
geom_bar(position = "fill") +
labs(x = "Costs for offsetting", fill = "Likelihood of company \
taking action to achieve\
carbon neutrality") +
scale_fill_discrete(labels = c("very unlikely", "2", "3", "4", "very likely")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
theme_minimal() + #theme_(bw, minimal): background
ggtitle("Likelihood of organization taking action to achieve carbon\
neutrality as mandatory carbon offsetting costs rise") +
theme(text = element_text(size = 14)) +
theme(axis.text.x = element_text(angle = 45, hjust=1),
plot.title = element_text(hjust=0.0)) #h.just:horizontal adjustment (0.5 in the middle)
# Spearman's correlation between increasing mandatory costs for offsetting carbon emissions and likelihood of steps being taken towards carbon neutrality.
cor.test(as.numeric(data.H3_long$Offsetting), as.numeric(data.H3_long$score), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 134577317, p-value = 6.48e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1808533
A weak but significant negative correlation: The likelihood of taking action decreases with increasing costs. Maybe the question wasn’t correctly understood by many?
**
Question 3
## Data preparation
# 1&2 together und 4&5 together --> because little people at 1 and 5
data.H3_wide <- data.H3_wide %>%
mutate(Considering_future_costs_zsmf = fancycut::fancycut(as.numeric(Considering_future_costs),
Unlikely = "[1, 2]",
Neutral = "3",
Likely = "[4, 5]",
na.bucket = 'missing'))
data.H3_wide$Considering_future_costs_zsmf[data.H3_wide$Considering_future_costs_zsmf=="missing"] <- NA
data.H3_wide$Considering_future_costs_zsmf <- droplevels(data.H3_wide$Considering_future_costs_zsmf)
data.H3_wide <- data.H3_wide %>%
mutate(Economic_Mindset_zsmf = fancycut::fancycut(as.numeric(Economic_Mindset),
`Little extent` = "[1, 2]",
Neutral = "3",
`Large extent` = "[4, 5]",
na.bucket = 'missing'))
data.H3_wide$Economic_Mindset_zsmf[data.H3_wide$Economic_Mindset_zsmf=="missing"] <- NA
data.H3_wide$Economic_Mindset_zsmf <- droplevels(data.H3_wide$Economic_Mindset_zsmf)
## Descriptive
# Frequency table
epiDisplay::tab1(data.H3_wide$Considering_future_costs, graph = FALSE, cum.percent = TRUE)
## data.H3_wide$Considering_future_costs :
## Frequency Percent Cum. percent
## very unlikely 5 3.3 3.3
## unlikely 16 10.5 13.8
## neutral 50 32.9 46.7
## likely 45 29.6 76.3
## very likely 36 23.7 100.0
## Total 152 100.0 100.0
# Present epidisplay nicely
t.H3.3 <- as.data.table(epiDisplay::tab1(data.H3_wide$Considering_future_costs, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H3.3 <- t.H3.3[,c(2,3,4,5)]
colnames(t.H3.3) <- c("Category", "Frequency", "valid %", "cum. valid %")
t.H3.3.ft <- flextable(t.H3.3)
t.H3.3.ft <- bold(t.H3.3.ft, bold=T, part = "header")
autofit(t.H3.3.ft)
Category | Frequency | valid % | cum. valid % |
|---|---|---|---|
very unlikely | 5 | 3.3 | 3.3 |
unlikely | 16 | 10.5 | 13.8 |
neutral | 50 | 32.9 | 46.7 |
likely | 45 | 29.6 | 76.3 |
very likely | 36 | 23.7 | 100.0 |
Total | 152 | 100.0 | 100.0 |
# Plot on "likelihood of taking proactive steps with differences in the likelihood of the company considering future costs"
data.H3_wide %>% filter(!is.na(Considering_future_costs) & !is.na(Proactive_Steps)) %>%
ggplot(aes(x = factor(Considering_future_costs), fill = factor(6-as.numeric(Proactive_Steps)))) + #x: EV, fill: ZV
geom_bar(position = "fill") +
labs(x = "Considering future costs regarding offsetting emissions", fill = "Proactive Steps") +
scale_fill_discrete(labels = c("very likely", "4", "3", "2", "very unlikely")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
theme_minimal() +
theme(text = element_text(size = 14)) +
ggtitle("Influence of considering future offset costs on\
proactive steps")
# Spearman's correlation between increasing mandatory costs for offsetting carbon emissions and likelihood of steps being taken towards carbon neutrality.
cor.test(as.numeric(data.H3_wide$Proactive_Steps), as.numeric(data.H3_wide$Considering_future_costs), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 342873, p-value = 7.847e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3904216
A weak but significant correlation
# Spearman's correlation of economic sustainability mindset and considering future costs of having to offset CO2 emissions
The more a company is already thinking about future costs of having to offset CO2 emissions, the more the economic mindset is influencing decision-making processes related to carbon neutrality.
cor.test(as.numeric(data.H3_wide$Economic_Mindset), as.numeric(data.H3_wide$Considering_future_costs), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 352009, p-value = 2.395e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3741785
## Regression Analysis
# OLR testing H3
H3.OLR.1 <- polr(Proactive_Steps_zsmf ~ Considering_future_costs_zsmf, data = data.H3_wide, Hess = TRUE)
summary(H3.OLR.1)
## Call:
## polr(formula = Proactive_Steps_zsmf ~ Considering_future_costs_zsmf,
## data = data.H3_wide, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## Considering_future_costs_zsmfNeutral 0.9614 0.4819 1.995
## Considering_future_costs_zsmfLikely 1.7908 0.4726 3.789
##
## Intercepts:
## Value Std. Error t value
## Unlikely|Neutral -0.3924 0.4053 -0.9681
## Neutral|Likely 1.2685 0.4224 3.0030
##
## Residual Deviance: 289.6303
## AIC: 297.6303
## (2 observations deleted due to missingness)
H3.OLR.2 <- polr(Proactive_Steps_zsmf ~ Considering_future_costs_zsmf + Economic_Mindset_zsmf, data = data.H3_wide, Hess = TRUE)
summary(H3.OLR.2)
## Call:
## polr(formula = Proactive_Steps_zsmf ~ Considering_future_costs_zsmf +
## Economic_Mindset_zsmf, data = data.H3_wide, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## Considering_future_costs_zsmfNeutral 0.4807 0.5202 0.9241
## Considering_future_costs_zsmfLikely 1.0965 0.5080 2.1585
## Economic_Mindset_zsmfNeutral 0.9249 0.4688 1.9729
## Economic_Mindset_zsmfLarge extent 1.9515 0.4813 4.0549
##
## Intercepts:
## Value Std. Error t value
## Unlikely|Neutral 0.1472 0.4503 0.3268
## Neutral|Likely 1.9928 0.4897 4.0691
##
## Residual Deviance: 269.5101
## AIC: 281.5101
## (3 observations deleted due to missingness)
## Residual analysis
# summary table1
summary_table_H3.OLR.1 <- coef(summary(H3.OLR.1))
pval2 <- round(pnorm(abs(summary_table_H3.OLR.1[, "t value"]), lower.tail = FALSE)* 2,3)
summary_table_H3.OLR.1 <- cbind(summary_table_H3.OLR.1, "p value" = pval2)
summary_table_H3.OLR.1 #only p-value of IV important --> first 2 rows
## Value Std. Error t value p value
## Considering_future_costs_zsmfNeutral 0.9613997 0.4818886 1.9950666 0.046
## Considering_future_costs_zsmfLikely 1.7907847 0.4726029 3.7891949 0.000
## Unlikely|Neutral -0.3923514 0.4052893 -0.9680774 0.333
## Neutral|Likely 1.2685439 0.4224214 3.0030297 0.003
pval2 <- pval2[1:5]
cbind(exp(cbind(OR = coef(H3.OLR.1), ci = confint(H3.OLR.1))),"p value" = pval2)
## Waiting for profiling to be done...
## Warning in cbind(exp(cbind(OR = coef(H3.OLR.1), ci = confint(H3.OLR.1))), :
## number of rows of result is not a multiple of vector length (arg 2)
## OR 2.5 % 97.5 % p value
## Considering_future_costs_zsmfNeutral 2.615355 1.024621 6.83358 0.046
## Considering_future_costs_zsmfLikely 5.994154 2.401823 15.44645 0.000
Category left out is the reference group: Here Consideringfuturecosts unlikely. ConsideringfuturecostsNeutral differs non-significantly from consideringfuturecostsunlikely regarding my DV (Proactive Steps). But we can see that the odds ratio is 2.76 which means within my sample the odds of having neutral are 2.76 times higher for an increase in (1 unit of) Proactive steps compared to unlikely.
brant::brant(H3.OLR.1) #omnibus: not significant --> which is good!! --> because H0: Requirement of proportional odds/parallel regression assumption met: Steigung zwischen likert points ist ungefähr die selbe zwischen proactive steps und considering future costs
## ----------------------------------------------------------------------------
## Test for X2 df probability
## ----------------------------------------------------------------------------
## Omnibus 1.37 2 0.5
## Considering_future_costs_zsmfNeutral 0.01 1 0.92
## Considering_future_costs_zsmfLikely 0.74 1 0.39
## ----------------------------------------------------------------------------
##
## H0: Parallel Regression Assumption holds
# summary table2
“worse” p-values: no significant correlation btw proactive steps and considering future costs/or/economic mindset for category unlikely to neutral. significant correlation btw proactive steps and considering future costs/or/economic mindset for category unlikely to likely.
summary_table_H3.OLR.2 <- coef(summary(H3.OLR.2))
pval3 <- round(pnorm(abs(summary_table_H3.OLR.2[, "t value"]), lower.tail = FALSE)* 2,3)
summary_table_H3.OLR.2 <- cbind(summary_table_H3.OLR.2, "p value" = pval3)
summary_table_H3.OLR.2
## Value Std. Error t value p value
## Considering_future_costs_zsmfNeutral 0.4806577 0.5201587 0.9240598 0.355
## Considering_future_costs_zsmfLikely 1.0965260 0.5080100 2.1584732 0.031
## Economic_Mindset_zsmfNeutral 0.9248988 0.4687918 1.9729415 0.049
## Economic_Mindset_zsmfLarge extent 1.9515386 0.4812796 4.0548953 0.000
## Unlikely|Neutral 0.1471618 0.4503295 0.3267869 0.744
## Neutral|Likely 1.9927830 0.4897380 4.0690799 0.000
pval3 <- pval3[1:5]
cbind(exp(cbind(OR = coef(H3.OLR.2), ci = confint(H3.OLR.2))),"p value" = pval3)
## Waiting for profiling to be done...
## Warning in cbind(exp(cbind(OR = coef(H3.OLR.2), ci = confint(H3.OLR.2))), :
## number of rows of result is not a multiple of vector length (arg 2)
## OR 2.5 % 97.5 % p value
## Considering_future_costs_zsmfNeutral 1.617138 0.5818307 4.518483 0.355
## Considering_future_costs_zsmfLikely 2.993748 1.1057757 8.193019 0.031
## Economic_Mindset_zsmfNeutral 2.521613 1.0133018 6.411839 0.049
## Economic_Mindset_zsmfLarge extent 7.039510 2.7759832 18.443562 0.000
brant::brant(H3.OLR.2) #not significant --> which is good!! --> because H0: Requirement parallel regression assumption met
## ----------------------------------------------------------------------------
## Test for X2 df probability
## ----------------------------------------------------------------------------
## Omnibus 4.26 4 0.37
## Considering_future_costs_zsmfNeutral 0.1 1 0.76
## Considering_future_costs_zsmfLikely 0.78 1 0.38
## Economic_Mindset_zsmfNeutral 1.02 1 0.31
## Economic_Mindset_zsmfLarge extent 0.16 1 0.69
## ----------------------------------------------------------------------------
##
## H0: Parallel Regression Assumption holds
## Warning in brant::brant(H3.OLR.2): 1 combinations in table(dv,ivs) do not occur.
## Because of that, the test results might be invalid.
Question 4
## Descriptive
# Frequency table
epiDisplay::tab1(data.H3_wide$Engagement_of_Project_Managers, graph = FALSE, cum.percent = TRUE)
## data.H3_wide$Engagement_of_Project_Managers :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## very little extent 13 8.6 8.6 8.6 8.6
## little extent 30 19.7 28.3 19.9 28.5
## neutral 57 37.5 65.8 37.7 66.2
## large extent 29 19.1 84.9 19.2 85.4
## very large extent 22 14.5 99.3 14.6 100.0
## NA's 1 0.7 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
# Overview over the amount of answers per point on the likert scale "Engagement of Project Managers"
ggplot(data = subset(data.H3_wide, !is.na(Engagement_of_Project_Managers)), aes(Engagement_of_Project_Managers)) +
scale_x_discrete(name = "Engagement of Project Managers") +
geom_bar(fill = "#00C08D") +
labs(title = "Amount of answers per point of extent of engagement\
of project managers in the topic of carbon neutrality", y = "Percentage of total cases") +
theme(text = element_text(size = 14)) +
theme_minimal()
#Correlation btw Engagement of project managers and industry sector
cor.test(as.numeric(data$Engagement_of_Project_Managers), as.numeric(data$Sector), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 580450, p-value = 0.1521
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1191234
Analysis of who uses the GPM P5 Standard and who doesn’t
## Descriptive
# Frequency table
epiDisplay::tab1(data$Knowing, graph = FALSE, cum.percent = TRUE, main = "Amount of participants having \
or having not heard of the P5 Standard")
## data$Knowing :
## Frequency Percent Cum. percent
## Yes 84 55.3 55.3
## No 68 44.7 100.0
## Total 152 100.0 100.0
# Percentage of answers per point on the likert scale
round(tally (~ Knowing, format = "percent", data = data, 2))
## Knowing
## Yes No Total
## 55 45 100
# Barplot showing the amount of people that have or haven't heard of the P5 Standard
ggplot(data = subset(data, !is.na(Knowing)), aes(Knowing)) +
scale_x_discrete(name = "Heard of the P5 Standard") +
scale_y_continuous(name = "Count") +
ggtitle("P5 Standard awareness among participants") +
geom_bar(fill = "#00C08D") +
theme(text = element_text(size = 14)) +
theme_minimal()
#Heard of it (Yes/no) per industry sector
table(data$Knowing, data$Sector)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 15 16 17 18 19 20 21 22 23 24
## Yes 7 8 6 10 12 7 8 1 0 1 0 1 1 0 7 5 1 0 2 0 1 3 0
## No 3 2 2 1 1 3 0 1 1 1 16 1 3 4 7 5 0 3 6 4 0 1 1
# Create simple Data Frame
data_plot <- dplyr::select (data, Sector, Knowing)
data_plot <- na.omit(data_plot)
# Convert Sectors to Factor, making all Factor Levels explicit (INCLUDING NO.14)
all_possible_levels <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24)
data_plot$Sector <- factor(data_plot$Sector, levels = all_possible_levels)
# Set Labels For Sector Groups
my_labels <- c("Energy", "Materials", "Automobiles & Components", "Utilities", "Capital Goods", "Food Beverage & Tobacco", "Semiconductors & Semiconductor Equipment",
"Consumber Durables & Apparel", "Consumer Discretionary Distribution & Retail", "Consumer Staples Distribution & Retail", "Banks", "Transportation",
"Technology Hardware & Equipment", "Equity Real Estate Investment Trusts (REITs)", "Consumer Services", "Financial Services", "Commercial & Professional Services", "Household & Personal Products",
"Health Care Equipment & Services", "Pharmaceuticals, Biotechnology & Life Sciences", "Media & Entertainment", "Telecommunication Services", "Software & Services", "Insurance")
# Plot Barplot
ggplot(data_plot, aes(x = Sector, fill=Knowing)) +
geom_bar() +
scale_x_discrete(labels = my_labels, drop = FALSE, name = "Industry Sectors") + ## Drop = False ensures, that Sector with count = 0 is not dropped
scale_fill_manual(values = c("#619CFF", "#F8766D")) +
guides(fill = guide_legend(title = "Heard of the standard")) +
ggtitle("Awareness of P5 Standard among industry sectors") +
scale_y_continuous(name = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust=1),
plot.title = element_text(hjust=0.0)) +
theme(text = element_text(size = 14))
#Correlation btw having heard of it and industry sector
cor.test(as.numeric(data$Knowing), as.numeric(data$Sector), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 325235, p-value = 1.409e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3856493
#Correlation btw having heard of it and experience
cor.test(as.numeric(data$Knowing), as.numeric(data$Experience), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 335633, p-value = 0.09825
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1439776
#Correlation btw having heard of it and age
cor.test(as.numeric(data$Knowing), as.numeric(data$Age), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 660764, p-value = 0.0002401
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.3005114
H4: The more experienced the project manager, the less likely the use of the GPM P5 Ontology.
## Data preparation
data.H4 <- dplyr::select(data, ID, Frequency_usage, Experience_of_standard, Experience, Training)
data.H4$Frequency_usage <- factor(data.H4$Frequency_usage,
labels = c("never", "rarely", "sometimes", "often", "always"),
ordered = TRUE)
data.H4$Experience_of_standard <- factor(data.H4$Experience_of_standard,
labels = c("poor", "average", "good", "very good"), #1: very poor is not here because no one chose it.
ordered = TRUE)
Question 1 + Years as a Project Manager/Experience + Training
## Data Preparation
# Bring categories 1&2 and 4&5 together for OLR because not enough observations
data.H4 <- data.H4 %>%
mutate(Frequency_usage_zsmf = fancycut::fancycut(as.numeric(Frequency_usage),
NeverRarely = "[1, 2]",
Sometimes = "3",
OftenAlways = "[4, 5]",
na.bucket = 'missing'))
data.H4$Frequency_usage_zsmf[data.H4$Frequency_usage_zsmf=="missing"] <- NA
data.H4$Frequency_usage_zsmf <- droplevels(data.H4$Frequency_usage_zsmf)
# Frequency table
epiDisplay::tab1(data.H4$Frequency_usage, graph = FALSE, cum.percent = TRUE)
## data.H4$Frequency_usage :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## never 3 2.0 2.0 3.6 3.6
## rarely 10 6.6 8.6 11.9 15.5
## sometimes 45 29.6 38.2 53.6 69.0
## often 21 13.8 52.0 25.0 94.0
## always 5 3.3 55.3 6.0 100.0
## NA's 68 44.7 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
# Present epidisplay nicely
t.H4.1 <- as.data.table(epiDisplay::tab1(data.H4$Frequency_usage, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H4.1 <- t.H4.1[,c(2,3,4,6,7)]
colnames(t.H4.1) <- c("Category", "Frequency", "%", "valid %", "cum. valid %")
t.H4.1.ft <- flextable(t.H4.1)
t.H4.1.ft <- bold(t.H4.1.ft, bold=T, part = "header")
autofit(t.H4.1.ft)
Category | Frequency | % | valid % | cum. valid % |
|---|---|---|---|---|
never | 3 | 2.0 | 3.6 | 3.6 |
rarely | 10 | 6.6 | 11.9 | 15.5 |
sometimes | 45 | 29.6 | 53.6 | 69.0 |
often | 21 | 13.8 | 25.0 | 94.0 |
always | 5 | 3.3 | 6.0 | 100.0 |
NA's | 68 | 44.7 | 0.0 | 100.0 |
Total | 152 | 100.0 | 100.0 | 100.0 |
# Boxplot
data.H4 %>% filter(!is.na(Frequency_usage)) %>%
ggplot(aes(y = Frequency_usage, x = Experience, fill = Frequency_usage)) +
labs(title = "Frequency of P5 usage vs. project manager experience", x = "Experience in years", y = "Frequency of P5 usage") +
scale_fill_manual(values = c("#E76BF3","#619CFF", "#00C08D", "#7CAE00", "#F8766D"), guide = guide_legend(reverse = TRUE)) +
theme_minimal() +
theme(text = element_text(size = 14)) +
geom_boxplot()
## Warning: Removed 16 rows containing non-finite values (`stat_boxplot()`).
# Correlation between frequency of usage and the years of experience
cor.test(as.numeric(data.H4$Experience), as.numeric(data.H4$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 56712, p-value = 0.504
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.08242131
# Correlation between frquency of usage and what they think of the standard/what their experience with it is
cor.test(as.numeric(data.H4$Experience_of_standard), as.numeric(data.H4$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 45753, p-value = 1.475e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4637502
Significant moderate to strong correlation
# Correlation between frequency of usage and PM training yes/no of project manager
cor.test(as.numeric(data.H4$Training), as.numeric(data.H4$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 94228, p-value = 0.6779
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.04598726
No correlation + not significant
# Training vs. no training
summary(data$Training)
## No training Training
## 42 110
# Analysing training data
Cloumn 3: Total percentages per Training of who has a training (exluding those who don’t) Row “Total” & column “…cases”: Total/100 –> How many answers were given on average –> How many trainings they have on average 1.05
(df.Training <- ufs::multiResponse(data, items = c("TrainingPMP", "TrainingCAPM", "TrainingPRINCE2", "TrainingPMIACP", "TrainingCSM", "TrainingCPM", "TrainingCPMP", "TrainingIPMA", "TrainingGPM")))
## Option Frequency Percentage of responses Percentage of (152) cases
## 1 TrainingPMP 35 22.0125786 23.0263158
## 2 TrainingCAPM 27 16.9811321 17.7631579
## 3 TrainingPRINCE2 17 10.6918239 11.1842105
## 4 TrainingPMIACP 19 11.9496855 12.5000000
## 5 TrainingCSM 17 10.6918239 11.1842105
## 6 TrainingCPM 25 15.7232704 16.4473684
## 7 TrainingCPMP 16 10.0628931 10.5263158
## 8 TrainingIPMA 1 0.6289308 0.6578947
## 9 TrainingGPM 2 1.2578616 1.3157895
## 10 Total 159 100.0000000 104.6052632
# Barplot showing the amount of people per training (saying that multiple responses possible so over 100%)
df.Training <- rename(df.Training, Percentage_of_total_cases = `Percentage of (152) cases`)
ggplot(df.Training[-10,], aes(y = Percentage_of_total_cases, x = Option)) +
scale_x_discrete(name = "Different PM trainings") +
geom_bar(stat = "identity", fill = "#00C08D") +
labs(title = "Participant distribution across various PM trainings ", y = "Percentage of total cases") +
theme_minimal() +
theme(text = element_text(size = 14)) +
theme(axis.text.x = element_text(angle = 45, hjust=1),
plot.title = element_text(hjust=0.0)) #h.just:horizontal adjustment (0.5 in the middle)
## Regression Analysis
I have many missing values (more than half): say that!! Residual analysis is significant –> clm
H4.OLR <- polr(Frequency_usage_zsmf ~ Experience, data = data.H4, Hess = TRUE)
summary(H4.OLR)
## Call:
## polr(formula = Frequency_usage_zsmf ~ Experience, data = data.H4,
## Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## Experience -0.09186 0.03747 -2.451
##
## Intercepts:
## Value Std. Error t value
## NeverRarely|Sometimes -2.0723 0.4223 -4.9072
## Sometimes|OftenAlways 0.4190 0.3232 1.2967
##
## Residual Deviance: 131.8776
## AIC: 137.8776
## (84 observations deleted due to missingness)
## Residual Analysis
# brant is significant for experience so we need to do clm instead
summary_table_H4.OLR <- coef(summary(H4.OLR))
pval4 <- round(pnorm(abs(summary_table_H4.OLR[, "t value"]), lower.tail = FALSE)* 2,3)
summary_table_H4.OLR <- cbind(summary_table_H4.OLR, "p value" = pval4)
summary_table_H4.OLR #only p-values of IV important; 0.009 & 0.712: significant corr. btw Experience and frequency usage but not btw training
## Value Std. Error t value p value
## Experience -0.0918551 0.03747215 -2.451290 0.014
## NeverRarely|Sometimes -2.0723095 0.42229783 -4.907223 0.000
## Sometimes|OftenAlways 0.4190245 0.32315436 1.296670 0.195
pval4 <- pval4[1:5]
cbind(exp(cbind(OR = coef(H4.OLR), ci = confint(H4.OLR))),"p value" = pval4) #confidence interval, OR (odds ratio): When likelihood of company considering long term costs for offsetting emissions gets higher by 1 (point on likert scale), the odds that we get a higher value (more likely) for Proactive Steps by x times. --> Odds increase by x%
## Waiting for profiling to be done...
## Warning in cbind(exp(cbind(OR = coef(H4.OLR), ci = confint(H4.OLR))), `p value`
## = pval4): number of rows of result is not a multiple of vector length (arg 2)
## OR ci p value
## 2.5 % 0.9122373 0.8409246 0.014
## 97.5 % 0.9122373 0.9785995 0.000
brant::brant(H4.OLR) # significant --> not good!! --> because H0: Requirments met
## --------------------------------------------
## Test for X2 df probability
## --------------------------------------------
## Omnibus 23.92 1 0
## Experience 23.92 1 0
## --------------------------------------------
##
## H0: Parallel Regression Assumption holds
# Clm: cumulative link model
H4.clm <- clm(Frequency_usage_zsmf ~ Experience, data = data.H4)
summary(H4.clm) # Once 0.14 and once -0.03 --> not the same slope for the two steps
## formula: Frequency_usage_zsmf ~ Experience
## data: data.H4
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 68 -65.94 137.88 5(0) 4.40e-11 3.0e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Experience -0.09186 0.03747 -2.451 0.0142 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## NeverRarely|Sometimes -2.0723 0.4223 -4.907
## Sometimes|OftenAlways 0.4190 0.3232 1.297
## (84 observations deleted due to missingness)
(summary_table_H4.clm <- coef(summary(H4.clm)))
## Estimate Std. Error z value Pr(>|z|)
## NeverRarely|Sometimes -2.0723095 0.42229678 -4.907235 9.236934e-07
## Sometimes|OftenAlways 0.4190245 0.32315295 1.296675 1.947429e-01
## Experience -0.0918551 0.03747152 -2.451331 1.423291e-02
# OR
exp(coef(H4.clm))
## NeverRarely|Sometimes Sometimes|OftenAlways Experience
## 0.1258947 1.5204776 0.9122373
If experience increases by 1 year, chances to go from never to sometimes increase 1.15 fold/15%
0.9122373^10
## [1] 0.3990968
If someone has 10 years more experience, the chances the standard is used decreases by about 60% ((4.02-1)*100 = 302%) (OR^10)
exp(confint(H4.clm))
## 2.5 % 97.5 %
## Experience 0.8409283 0.9785993
Question 1.1 if never, why?
# The three that chose that they never use it, answered: "My company doesn't use it"
Question 2
## Descriptive
# Frequency table
epiDisplay::tab1(data.H4$Experience_of_standard, graph = FALSE, cum.percent = TRUE)
## data.H4$Experience_of_standard :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## poor 4 2.6 2.6 5.0 5.0
## average 27 17.8 20.4 33.8 38.8
## good 40 26.3 46.7 50.0 88.8
## very good 9 5.9 52.6 11.2 100.0
## NA's 72 47.4 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
#Present epidisplay nicely
t.H4.2 <- as.data.table(epiDisplay::tab1(data.H4$Experience_of_standard, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H4.2 <- t.H4.2[,c(2,3,4,6,7)]
colnames(t.H4.2) <- c("Category", "Frequency", "%", "valid %", "cum. valid %")
new_row <- data.table("Category" = "very poor", "Frequency" = 0, "%" = 0, "valid %" = 0, "cum. valid %" = 0)
t.H4.2 <- rbindlist(list(new_row, t.H4.2))
t.H4.2.ft <- flextable(t.H4.2)
t.H4.2.ft <- bold(t.H4.2.ft, bold=T, part = "header")
autofit(t.H4.2.ft)
Category | Frequency | % | valid % | cum. valid % |
|---|---|---|---|---|
very poor | 0 | 0.0 | 0.0 | 0.0 |
poor | 4 | 2.6 | 5.0 | 5.0 |
average | 27 | 17.8 | 33.8 | 38.8 |
good | 40 | 26.3 | 50.0 | 88.8 |
very good | 9 | 5.9 | 11.2 | 100.0 |
NA's | 72 | 47.4 | 0.0 | 100.0 |
Total | 152 | 100.0 | 100.0 | 100.0 |
# Experience of standard: 1&2 zsm und 4&5 zsm --> because little people at 1 and 5 (no people at very poor - so left that out)
data.H4 <- data.H4 %>%
mutate(Experience_of_standard_zsmf = fancycut::fancycut(as.numeric(Experience_of_standard),
Poor = "1",
Average = "2",
Good = "[3, 4]",
na.bucket = 'missing'))
data.H4$Experience_of_standard_zsmf[data.H4$Experience_of_standard_zsmf == "missing"] <- NA
data.H4$Experience_of_standard_zsmf <- droplevels(data.H4$Experience_of_standard_zsmf)
# To check if it's true
table(data.H4$Experience_of_standard, data.H4$Experience_of_standard_zsmf)
##
## Poor Average Good
## poor 4 0 0
## average 0 27 0
## good 0 0 40
## very good 0 0 9
# Barplot showing the number of people per point of "Experience of standard"
ggplot(data = subset(data.H4, !is.na(Experience_of_standard)), aes(Experience_of_standard)) +
scale_x_discrete(name = "Experience of standard") +
geom_bar(fill = "#00C08D") +
ggtitle("Number of answers per experience") +
theme(text = element_text(size = 14)) +
theme_light()
# Correlation between frquency of usage and what they think of the standard/what their experience with it is
cor.test(as.numeric(data.H4$Experience_of_standard), as.numeric(data.H4$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 45753, p-value = 1.475e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4637502
Significant moderate to strong correlation
H5: The more goals of the GPM P5 Ontology are trying to be addressed within a project, the less realistic/possible it seems to finish the project successfully
## Data preparation
data.H5_wide <- dplyr::select(data, ID, Knowing, goals_0, goals_9, goals_19, goals_29, goals_39, goals_49, Tradeoff)
# Data as factors and with labels
data.H5_wide$goals_0 <- factor(data.H5_wide$goals_0, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"),
ordered = TRUE)
data.H5_wide$goals_9 <- factor(data.H5_wide$goals_9, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"),
ordered = TRUE)
data.H5_wide$goals_19 <- factor(data.H5_wide$goals_19, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"),
ordered = TRUE)
data.H5_wide$goals_29 <- factor(data.H5_wide$goals_29, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"),
ordered = TRUE)
data.H5_wide$goals_39 <- factor(data.H5_wide$goals_39, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"),
ordered = TRUE)
data.H5_wide$goals_49 <- factor(data.H5_wide$goals_49, labels = c("Not possible at all", "Unlikely", "Neutral", "Likely", "Very possible"),
ordered = TRUE)
data.H5_wide$Tradeoff <- factor(data.H5_wide$Tradeoff, labels = c("Yes", "No"),
ordered = TRUE)
# 1&2 together und 4&5 together --> because little people at 1 and 5
data.H5_wide <- data.H5_wide %>%
mutate(goals0_zsmf = fancycut::fancycut(as.numeric(goals_0),
Unlikely = "[1, 2]",
Neutral = "3",
Likely = "[4, 5]",
na.bucket = 'missing'))
data.H5_wide$goals0_zsmf[data.H5_wide$goals0_zsmf=="missing"] <- NA
data.H5_wide$goals0_zsmf <- droplevels(data.H5_wide$goals0_zsmf)
data.H5_wide <- data.H5_wide %>%
mutate(goals9_zsmf = fancycut::fancycut(as.numeric(goals_9),
Unlikely = "[1, 2]",
Neutral = "3",
Likely = "[4, 5]",
na.bucket = 'missing'))
data.H5_wide$goals9_zsmf[data.H5_wide$goals9_zsmf=="missing"] <- NA
data.H5_wide$goals9_zsmf <- droplevels(data.H5_wide$goals9_zsmf)
data.H5_wide <- data.H5_wide %>%
mutate(goals19_zsmf = fancycut::fancycut(as.numeric(goals_19),
Unlikely = "[1, 2]",
Neutral = "3",
Likely = "[4, 5]",
na.bucket = 'missing'))
data.H5_wide$goals19_zsmf[data.H5_wide$goals19_zsmf=="missing"] <- NA
data.H5_wide$goals19_zsmf <- droplevels(data.H5_wide$goals19_zsmf)
data.H5_wide <- data.H5_wide %>%
mutate(goals29_zsmf = fancycut::fancycut(as.numeric(goals_29),
Unlikely = "[1, 2]",
Neutral = "3",
Likely = "[4, 5]",
na.bucket = 'missing'))
data.H5_wide$goals29_zsmf[data.H5_wide$goals29_zsmf=="missing"] <- NA
data.H5_wide$goals29_zsmf <- droplevels(data.H5_wide$goals29_zsmf)
data.H5_wide <- data.H5_wide %>%
mutate(goals39_zsmf = fancycut::fancycut(as.numeric(goals_39),
Unlikely = "[1, 2]",
Neutral = "3",
Likely = "[4, 5]",
na.bucket = 'missing'))
data.H5_wide$goals39_zsmf[data.H5_wide$goals39_zsmf=="missing"] <- NA
data.H5_wide$goals39_zsmf <- droplevels(data.H5_wide$goals39_zsmf)
data.H5_wide <- data.H5_wide %>%
mutate(goals49_zsmf = fancycut::fancycut(as.numeric(goals_49),
Unlikely = "[1, 2]",
Neutral = "3",
Likely = "[4, 5]",
na.bucket = 'missing'))
data.H5_wide$goals49_zsmf[data.H5_wide$goals49_zsmf=="missing"] <- NA
data.H5_wide$goals49_zsmf <- droplevels(data.H5_wide$goals49_zsmf)
Question 1
## Data preparation
# Data to long format
data.H5_long <- data.H5_wide %>%
gather(key = "Goals", value = "score",
goals_0, goals_9, goals_19, goals_29, goals_39, goals_49) %>%
convert_as_factor(ID, Goals)
# data.H5_long <- data.H5_long %>% dplyr::select(ID, Goals, score)
data.H5_long2 <- data.H5_wide %>%
gather(key = "Goals_zsmf", value = "score_zsmf",
goals0_zsmf, goals9_zsmf, goals19_zsmf, goals29_zsmf, goals39_zsmf, goals49_zsmf) %>%
convert_as_factor(ID, Goals_zsmf)
data.H5_long2 <- data.H5_long2 %>% dplyr::select(ID, Goals_zsmf, score_zsmf)
# Bringing all two data sets together
data.H5_long <- lsr::sortFrame(data.H5_long, ID)
data.H5_long2 <- lsr::sortFrame(data.H5_long2, ID)
data.H5_long <- cbind(data.H5_long, data.H5_long2[,-1])
rm(data.H5_long2)
# order
data.H5_long$Goals <- factor(data.H5_long$Goals, labels = c("goals_0", "goals_9", "goals_19", "goals_29", "goals_39", "goals_49"),
ordered = TRUE)
data.H5_long$score <- factor(data.H5_long$score, labels = c("not possible at all", "unlikely", "neutral", "likely", "very possible"),
ordered = TRUE)
data.H5_long$Goals_zsmf <- factor(data.H5_long$Goals_zsmf, labels = c("goals0_zsmf", "goals9_zsmf", "goals19_zsmf", "goals29_zsmf", "goals39_zsmf", "goals49_zsmf"),
ordered = TRUE)
data.H5_long$score_zsmf <- factor(data.H5_long$score_zsmf, labels = c("unlikely", "neutral", "likely"),
ordered = TRUE)
## Descriptive
# Frequency table with percentages
(table.Goals <- round(prop.table(table(data.H5_long$Goals, data.H5_long$score), 1), 4)*100)
##
## not possible at all unlikely neutral likely very possible
## goals_0 17.52 34.31 3.65 9.49 35.04
## goals_9 41.18 38.24 0.74 6.62 13.24
## goals_19 25.18 46.04 3.60 16.55 8.63
## goals_29 29.20 31.39 13.14 17.52 8.76
## goals_39 21.32 25.74 19.85 15.44 17.65
## goals_49 30.88 34.56 0.74 9.56 24.26
# Stacked barplot for percentage of answers of how possible it seems to finish a project successfully if any y goals are trying to be addressed.
data.H5_long %>% filter(!is.na(Goals) & !is.na(score)) %>%
ggplot(aes(x = factor(Goals), fill = factor(6-as.numeric(score)))) +
geom_bar(position = "fill") +
labs(x = "Amount of goals trying to be reached", fill = "Likelihood to finish\
a project successfully") +
scale_x_discrete(labels = c("0", "9", "19", "29", "39", "49")) +
scale_fill_discrete(labels = c("very possible", "4", "3", "2", "not possible at all")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
theme_minimal() +
theme(text = element_text(size = 14)) +
ggtitle("Number of ontology goals and project success likelihood")
# To check if correct data formatting
#table(data.H5_long$Goals, data.H5_long$Goals_zsmf)
#table(data.H5_long$Goals, data.H5_long$score)
#table(data.H5_long$Goals_zsmf, data.H5_long$score_zsmf)
# To test hypothesis
# Correlation between amount of goals trying to be reached and how possible it seems to finish a project successfully
cor.test(as.numeric(data.H5_long$Goals), as.numeric(data.H5_long$score), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 92685242, p-value = 0.888
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.004923521
No correlation –> so hypothesis 5 is rejected
## Regression analysis
# Friedmann test: Is there a significant difference between the tendencies of multiple dependent variables?
H0: No association between the two variables/variables are the same –> No relationship between the amount of goals and the likelihood to finish a project successfully. H1: There is a significant relationship.
friedman.test(data.H5_long$score_zsmf, data.H5_long$Goals_zsmf, data.H5_long$ID) #1st var: DV, 2nd var: IV, 3rd var: person
##
## Friedman rank sum test
##
## data: data.H5_long$score_zsmf, data.H5_long$Goals_zsmf and data.H5_long$ID
## Friedman chi-squared = 41.832, df = 5, p-value = 6.369e-08
# Posthoc to determine where the difference is
friedmanmc(as.numeric(data.H5_long$score_zsmf), data.H5_long$Goals_zsmf, data.H5_long$ID)
## Multiple comparisons between groups after Friedman test
## alpha: 0.05
## Comparisons
## obs.dif critical.dif stat.signif p.value
## goals0_zsmf-goals9_zsmf 14.0 95.74331 FALSE 1.000000e+00
## goals0_zsmf-goals19_zsmf 96.0 95.74331 TRUE 6.824499e-02
## goals0_zsmf-goals29_zsmf 115.0 95.74331 TRUE 8.874652e-03
## goals0_zsmf-goals39_zsmf 136.5 95.74331 TRUE 5.997036e-04
## goals0_zsmf-goals49_zsmf 62.5 95.74331 FALSE 1.000000e+00
## goals9_zsmf-goals19_zsmf 110.0 95.74331 TRUE 1.565598e-02
## goals9_zsmf-goals29_zsmf 129.0 95.74331 TRUE 1.608918e-03
## goals9_zsmf-goals39_zsmf 150.5 95.74331 TRUE 8.299892e-05
## goals9_zsmf-goals49_zsmf 76.5 95.74331 FALSE 3.992923e-01
## goals19_zsmf-goals29_zsmf 19.0 95.74331 FALSE 1.000000e+00
## goals19_zsmf-goals39_zsmf 40.5 95.74331 FALSE 1.000000e+00
## goals19_zsmf-goals49_zsmf 33.5 95.74331 FALSE 1.000000e+00
## goals29_zsmf-goals39_zsmf 21.5 95.74331 FALSE 1.000000e+00
## goals29_zsmf-goals49_zsmf 52.5 95.74331 FALSE 1.000000e+00
## goals39_zsmf-goals49_zsmf 74.0 95.74331 FALSE 4.891260e-01
## Now with only the people that have heard of the GPM P5 Standard
data.H5.knowing <- subset(data.H5_long, Knowing == "Yes")
# Frequency table with percentages
(table.Goals.knowing <- round(prop.table(table(data.H5.knowing$Goals, data.H5.knowing$score), 1), 4)*100)
##
## not possible at all unlikely neutral likely very possible
## goals_0 30.14 46.58 2.74 8.22 12.33
## goals_9 43.06 44.44 0.00 2.78 9.72
## goals_19 24.32 56.76 0.00 9.46 9.46
## goals_29 38.36 36.99 2.74 10.96 10.96
## goals_39 27.78 37.50 5.56 4.17 25.00
## goals_49 31.51 47.95 0.00 8.22 12.33
# Stacked barplot for percentage of answers of how possible it seems to finish a project successfully if any y goals are trying to be addressed. For only the people that have heard of the GPM P5 Standard
data.H5.knowing %>% filter(!is.na(Goals) & !is.na(score)) %>%
ggplot(aes(x = factor(Goals), fill = factor(6-as.numeric(score)))) +
geom_bar(position = "fill") +
labs(x = "Amount of goals trying to be achieved", fill = "Likelihood to finish\
a project successfully") +
scale_x_discrete(labels = c("0", "9", "19", "29", "39", "49")) +
scale_fill_discrete(labels = c("very possible", "4", "3", "2", "not possible at all")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
ggtitle("Likelihood of finishing a project successfully if y goals are tying to be achieved") +
theme(text = element_text(size = 14)) +
theme_minimal()
# To test hypothesis
# Correlation between amount of goals trying to be reached and how possible it seems to finish a project successfully
cor.test(as.numeric(data.H5.knowing$Goals), as.numeric(data.H5.knowing$score), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 13145787, p-value = 0.2525
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.05486073
Question 2
# Frequency table with median
epiDisplay::tab1(data.H5_wide$Tradeoff, graph = FALSE, cum.percent = TRUE)
## data.H5_wide$Tradeoff :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## Yes 136 89.5 89.5 90.7 90.7
## No 14 9.2 98.7 9.3 100.0
## NA's 2 1.3 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
# Barplot to look at the number of answers
ggplot(data = subset(data.H5_wide, !is.na(Tradeoff)), aes(Tradeoff)) +
scale_x_discrete(name = "Trade-off") +
scale_y_continuous(name = "Count") +
ggtitle("Number of participants on whether there is a trade-off or not\
when considering an increasing number of ontology goals") +
geom_bar(fill = "#00C08D") +
theme_minimal() +
theme(text = element_text(size = 14))
# For only the people who have heard of the standard
# Frequency table with median
epiDisplay::tab1(data.H5.knowing$Tradeoff, graph = FALSE, cum.percent = TRUE)
## data.H5.knowing$Tradeoff :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## Yes 480 95.2 95.2 96.4 96.4
## No 18 3.6 98.8 3.6 100.0
## NA's 6 1.2 100.0 0.0 100.0
## Total 504 100.0 100.0 100.0 100.0
# Barplot to look at the amount of answers
ggplot(data = subset(data.H5.knowing, !is.na(Tradeoff)), aes(Tradeoff)) +
scale_x_discrete(name = "Trade-off") +
scale_y_continuous(name = "Count") +
ggtitle("Number of participants on whether there is a trade-off or not\
when considering an increasing number of ontology goals") +
geom_bar(fill = "#00C08D") +
theme_minimal() +
theme(text = element_text(size = 14))
H6: The more unclear which tools can be used to evaluate the outcome of the project in relation to the goals of the ontology, the less the GPM P5 standard gets used.
## Data preparation
data.H6 <- dplyr::select(data, ID, Frequency_usage, Understanding_tools, Sufficiency)
# Data as factors and with labels
data.H6$Frequency_usage <- factor(data.H6$Frequency_usage, labels = c("Never", "Rarely", "Sometimes", "Often", "Always"),
ordered = TRUE)
data.H6$Understanding_tools <- factor(data.H6$Understanding_tools, labels = c("none", "limited", "moderate", "good", "very good"),
ordered = TRUE)
data.H6$Sufficiency <- factor(data.H6$Sufficiency,
labels = c("strongly disagree", "disagree", "neutral", "agree", "strongly agree"),
ordered = TRUE)
# 1&2 zsm und 4&5 zsm --> because little people at 1 and 5
data.H6 <- data.H6 %>%
mutate(Sufficiency_zsmf = fancycut::fancycut(as.numeric(Sufficiency),
Disagree = "[1, 2]",
Neutral = "3",
Agree = "[4, 5]",
na.bucket = 'missing'))
data.H6$Sufficiency_zsmf[data.H6$Sufficiency_zsmf=="missing"] <- NA
data.H6$Sufficiency_zsmf <- droplevels(data.H6$Sufficiency_zsmf)
#data.H6$Sufficiency_zsmf <- factor(data.H6$Sufficiency_zsmf, levels = c(1,2,3), labels = c("Disagree", "Neutral", "Agree"))
data.H6 <- data.H6 %>%
mutate(Understanding_tools_zsmf = fancycut::fancycut(as.numeric(Understanding_tools),
Limited = "[1, 2]",
Neutral = "3",
Good = "[4, 5]",
na.bucket = 'missing'))
data.H6$Understanding_tools_zsmf[data.H6$Understanding_tools_zsmf=="missing"] <- NA
data.H6$Understanding_tools_zsmf <- droplevels(data.H6$Understanding_tools_zsmf)
Question 1
## Descriptive
# Frequency table (+ percentages with and without NA --> always name the percentages without NA (NA-))
epiDisplay::tab1(data.H6$Understanding_tools, graph = FALSE, cum.percent = TRUE) #cum.percent: valid percentages
## data.H6$Understanding_tools :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## none 2 1.3 1.3 2.5 2.5
## limited 6 3.9 5.3 7.4 9.9
## moderate 26 17.1 22.4 32.1 42.0
## good 41 27.0 49.3 50.6 92.6
## very good 6 3.9 53.3 7.4 100.0
## NA's 71 46.7 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
#Present epidisplay nicely
t.H6.1 <- as.data.table(epiDisplay::tab1(data.H6$Understanding_tools, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H6.1 <- t.H6.1[,c(2,3,4,6,7)]
colnames(t.H6.1) <- c("Category", "Frequency", "%", "valid %", "cum. valid %")
t.H6.1.ft <- flextable(t.H6.1)
t.H6.1.ft <- bold(t.H6.1.ft, bold=T, part = "header")
autofit(t.H6.1.ft)
Category | Frequency | % | valid % | cum. valid % |
|---|---|---|---|---|
none | 2 | 1.3 | 2.5 | 2.5 |
limited | 6 | 3.9 | 7.4 | 9.9 |
moderate | 26 | 17.1 | 32.1 | 42.0 |
good | 41 | 27.0 | 50.6 | 92.6 |
very good | 6 | 3.9 | 7.4 | 100.0 |
NA's | 71 | 46.7 | 0.0 | 100.0 |
Total | 152 | 100.0 | 100.0 | 100.0 |
# Visualization of amount of answers: Stacked barplot
data.H6 %>% filter(!is.na(Understanding_tools) & !is.na(Frequency_usage)) %>%
ggplot(aes(x = factor(Understanding_tools), fill = factor(5-as.numeric(Frequency_usage)))) + #x: EV, fill: ZV
geom_bar(position = "fill") +
labs(x = "Understanding of tools", fill = "Frequency of P5 use") +
scale_fill_manual(labels = c("always", "often", "sometimes", "rarely"), values = c("#F8766D", "#7CAE00", "#00C08D", "#619CFF", "#E76BF3")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
theme_minimal() + #theme_(bw, minimal): background
theme(text = element_text(size = 14)) +
ggtitle("Frequency of P5 use vs project outcome evaluation knowledge")
# Correlation
cor.test(as.numeric(data.H6$Understanding_tools), as.numeric(data.H6$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 55942, p-value = 0.0007169
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3683122
Positive and significant correlation
cor.test(as.numeric(data.H6$Sufficiency), as.numeric(data.H6$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 46012, p-value = 5.647e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4804404
cor.test(as.numeric(data.H6$Sufficiency), as.numeric(data.H6$Understanding_tools_zsmf), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 55772, p-value = 0.0006688
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3702346
## Regression analysis
# OLR
H6.OLR.1 <- polr(as.factor(Frequency_usage) ~ Understanding_tools_zsmf, data = data.H6, Hess = TRUE)
summary(H6.OLR.1)
## Call:
## polr(formula = as.factor(Frequency_usage) ~ Understanding_tools_zsmf,
## data = data.H6, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## Understanding_tools_zsmfNeutral 2.030 0.8285 2.450
## Understanding_tools_zsmfGood 3.036 0.8375 3.625
##
## Intercepts:
## Value Std. Error t value
## Never|Rarely -9.9482 36.2770 -0.2742
## Rarely|Sometimes 0.0727 0.6846 0.1061
## Sometimes|Often 3.2757 0.8080 4.0544
## Often|Always 5.3758 0.9149 5.8759
##
## Residual Deviance: 163.9337
## AIC: 175.9337
## (71 observations deleted due to missingness)
H6.OLR.2 <- polr(as.factor(Frequency_usage) ~ Understanding_tools_zsmf + Sufficiency_zsmf, data = data.H6, Hess = TRUE)
summary(H6.OLR.2)
## Call:
## polr(formula = as.factor(Frequency_usage) ~ Understanding_tools_zsmf +
## Sufficiency_zsmf, data = data.H6, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## Understanding_tools_zsmfNeutral 1.8890 0.8767 2.155
## Understanding_tools_zsmfGood 2.3650 0.8993 2.630
## Sufficiency_zsmfNeutral 0.9074 0.8944 1.014
## Sufficiency_zsmfAgree 2.4741 0.9257 2.673
##
## Intercepts:
## Value Std. Error t value
## Never|Rarely -8.4737 26.8024 -0.3162
## Rarely|Sometimes 0.9063 0.9961 0.9098
## Sometimes|Often 4.4625 1.1642 3.8332
## Often|Always 6.7472 1.2584 5.3617
##
## Residual Deviance: 151.9577
## AIC: 167.9577
## (71 observations deleted due to missingness)
## Residual analysis
# summary table 1
summary_table_H6.OLR.1 <- coef(summary(H6.OLR.1))
pval5 <- round(pnorm(abs(summary_table_H6.OLR.1[, "t value"]),lower.tail = FALSE)* 2,3)
summary_table_H6.OLR.1 <- cbind(summary_table_H6.OLR.1, "p value" = pval5)
summary_table_H6.OLR.1 #p-value: 0.014 & 0.000
## Value Std. Error t value p value
## Understanding_tools_zsmfNeutral 2.02990813 0.8284699 2.4501892 0.014
## Understanding_tools_zsmfGood 3.03605272 0.8375268 3.6250217 0.000
## Never|Rarely -9.94817014 36.2770071 -0.2742280 0.784
## Rarely|Sometimes 0.07266253 0.6845970 0.1061391 0.915
## Sometimes|Often 3.27573544 0.8079548 4.0543549 0.000
## Often|Always 5.37576160 0.9148770 5.8759393 0.000
pval5 <- pval5[1:5]
cbind(exp(cbind(OR = coef(H6.OLR.1), ci = confint(H6.OLR.1))),"p value" = pval5)
## Waiting for profiling to be done...
## Warning in cbind(exp(cbind(OR = coef(H6.OLR.1), ci = confint(H6.OLR.1))), :
## number of rows of result is not a multiple of vector length (arg 2)
## OR 2.5 % 97.5 % p value
## Understanding_tools_zsmfNeutral 7.613387 1.547544 41.8371 0.014
## Understanding_tools_zsmfGood 20.822887 4.214773 117.4308 0.000
brant::brant(H6.OLR.1) #not significant --> which is good!! --> because H0: Requirements of parallel regression assumption met
## --------------------------------------------------------------------
## Test for X2 df probability
## --------------------------------------------------------------------
## Omnibus 4.87 6 0.56
## Understanding_tools_zsmfNeutral 0 3 1
## Understanding_tools_zsmfGood 0 3 1
## --------------------------------------------------------------------
##
## H0: Parallel Regression Assumption holds
## Warning in brant::brant(H6.OLR.1): 3 combinations in table(dv,ivs) do not occur.
## Because of that, the test results might be invalid.
Category left out is the reference group: Here understandingtools limited. Understandingtools neutral differs significantly from understandingtools limited regarding my DV (Frequency of usage). But we can see that the odds ratio is 7.63 which means the odds of an one point increase in Usage are 7.63 times higher for Neutral compared to Limited. Understandingtools good differs significantly from understandingtools limited regarding my DV (Frequency of usage).
# summary table 2
summary_table_H6.OLR.2 <- coef(summary(H6.OLR.2))
pval6 <- round(pnorm(abs(summary_table_H6.OLR.2[, "t value"]),lower.tail = FALSE)* 2,3)
summary_table_H6.OLR.2 <- cbind(summary_table_H6.OLR.2, "p value" = pval6)
summary_table_H6.OLR.2 #p-values: 0.031 & 0.009, 0.310, 0.008
## Value Std. Error t value p value
## Understanding_tools_zsmfNeutral 1.8889716 0.8767258 2.1545752 0.031
## Understanding_tools_zsmfGood 2.3650482 0.8993488 2.6297340 0.009
## Sufficiency_zsmfNeutral 0.9073582 0.8944249 1.0144599 0.310
## Sufficiency_zsmfAgree 2.4741435 0.9257266 2.6726502 0.008
## Never|Rarely -8.4737094 26.8023841 -0.3161551 0.752
## Rarely|Sometimes 0.9062851 0.9961277 0.9098082 0.363
## Sometimes|Often 4.4625147 1.1641877 3.8331573 0.000
## Often|Always 6.7471878 1.2584034 5.3617052 0.000
pval6 <- pval6[1:5]
cbind(exp(cbind(OR = coef(H6.OLR.2), ci = confint(H6.OLR.2))),"p value" = pval6)
## Waiting for profiling to be done...
## Warning in cbind(exp(cbind(OR = coef(H6.OLR.2), ci = confint(H6.OLR.2))), :
## number of rows of result is not a multiple of vector length (arg 2)
## OR 2.5 % 97.5 % p value
## Understanding_tools_zsmfNeutral 6.612565 1.2090080 39.69147 0.031
## Understanding_tools_zsmfGood 10.644551 1.8799511 66.81548 0.009
## Sufficiency_zsmfNeutral 2.477768 0.4333137 14.83813 0.310
## Sufficiency_zsmfAgree 11.871534 2.0330994 76.99071 0.008
brant::brant(H6.OLR.2)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## --------------------------------------------------------------------
## Test for X2 df probability
## --------------------------------------------------------------------
## Omnibus 4.93 12 0.96
## Understanding_tools_zsmfNeutral 0 3 1
## Understanding_tools_zsmfGood 0 3 1
## Sufficiency_zsmfNeutral 0 3 1
## Sufficiency_zsmfAgree 0 3 1
## --------------------------------------------------------------------
##
## H0: Parallel Regression Assumption holds
## Warning in brant::brant(H6.OLR.2): 18 combinations in table(dv,ivs) do not
## occur. Because of that, the test results might be invalid.
Question 2
## Descriptive
# Frequency table
epiDisplay::tab1(data.H6$Sufficiency, graph = FALSE, cum.percent = TRUE)
## data.H6$Sufficiency :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## strongly disagree 1 0.7 0.7 1.2 1.2
## disagree 6 3.9 4.6 7.4 8.6
## neutral 39 25.7 30.3 48.1 56.8
## agree 24 15.8 46.1 29.6 86.4
## strongly agree 11 7.2 53.3 13.6 100.0
## NA's 71 46.7 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
# Present epiDisplay nicely
t.H6.2 <- as.data.table(epiDisplay::tab1(data.H6$Sufficiency, graph = FALSE, cum.percent = TRUE), keep.rownames=T)
t.H6.2 <- t.H6.2[,c(2,3,4,6,7)]
colnames(t.H6.2) <- c("Category", "Frequency", "%", "valid %", "cum. valid %")
t.H6.2.ft <- flextable(t.H6.2)
t.H6.2.ft <- bold(t.H6.2.ft, bold=T, part = "header")
autofit(t.H6.2.ft)
Category | Frequency | % | valid % | cum. valid % |
|---|---|---|---|---|
strongly disagree | 1 | 0.7 | 1.2 | 1.2 |
disagree | 6 | 3.9 | 7.4 | 8.6 |
neutral | 39 | 25.7 | 48.1 | 56.8 |
agree | 24 | 15.8 | 29.6 | 86.4 |
strongly agree | 11 | 7.2 | 13.6 | 100.0 |
NA's | 71 | 46.7 | 0.0 | 100.0 |
Total | 152 | 100.0 | 100.0 | 100.0 |
# Overview over the number of answers per point on the Likert scale "Sufficiency"
ggplot(data = subset(data.H6, !is.na(Sufficiency)), aes(Sufficiency)) +
scale_x_discrete(name = "Sufficiency of provided tools") +
scale_y_continuous(name = "Count") +
geom_bar(fill = "#00C08D") +
theme_minimal() +
ggtitle("Number of answers on how the sufficiency of tools provided\
through and with the standard is preceived.")
# Visualization of number of answers: Stacked barplot
data.H6 %>% filter(!is.na(Sufficiency) & !is.na(Frequency_usage)) %>%
ggplot(aes(x = factor(Sufficiency), fill = factor(5-as.numeric(Frequency_usage)))) + #x: EV, fill: ZV
geom_bar(position = "fill") +
labs(x = "Sufficiency", fill = "Frequency of P5 use") +
scale_fill_discrete(labels = c("always", "often", "sometimes", "rarely")) +
scale_y_continuous(name = "Percentage", labels = scales::label_percent(accuracy = 1)) +
theme_minimal() + #theme_(bw, minimal): background
theme(text = element_text(size = 14)) +
ggtitle("Frequency of P5 use vs sufficiency of P5 Standard, ontology and templated")
# Correlation btw the perceived sufficiency of tools provided through and with the standard and the frequency of its usage.
cor.test(as.numeric(data.H6$Sufficiency), as.numeric(data.H6$Frequency_usage), method = "spearman", use = "pairwise.complete.obs")
## Warning in cor.test.default(x, y, ...): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: x and y
## S = 46012, p-value = 5.647e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4804404
Moderate to strong correlation
Yes have heard of it but never use it There are only three
participants in this category. For the multiple choice question:
“Why don’t you use the P5 Standard?” all three chose the answer
“My company doesn’t use it” instead of “I’ve tried it, its’ not good”,
“I use another sustainability concept”, or “other”.
For the question: “On a scale of 1 to 5, rate your level of
understanding of the tools available for assessing the degree of
sustainability of project results” 2, 3, and 4 were chosen as
answers (limited, moderate and good understanding). –> a very spread
out distribution. They were also asked to give suggestions for
improvements.
Haven’t heard of it These people were also asked to answer the same questions for Hypothesis 5 as the ones who have heard of it. They were additionally asked “Now that you’ve heard about it and have seen the P5 Ontology, would you consider using it?”
# Making data frame
# df.open2 <- dplyr::select(data, Why_not_consider)
# df.open2 <- na.omit(df.open2)
# df.open2 <- as.data.table(df.open2)
# Export as an excel file
# write.xlsx(df.open2, file = "df.open2.xlsx", overwrite = TRUE)
## Descriptive
# Frequency table
epiDisplay::tab1(data$Consider_for_future, graph = FALSE, cum.percent = TRUE)
## data$Consider_for_future :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## No 15 9.9 9.9 22.7 22.7
## Yes 51 33.6 43.4 77.3 100.0
## NA's 86 56.6 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
# Barplot to look at the amount of answers for consider/not consider using the standard
ggplot(data = subset(data, !is.na(Consider_for_future)), aes(Consider_for_future)) +
scale_x_discrete(name = "Considering the standard for future use?") +
scale_y_continuous(name = "Count") +
ggtitle("Number of participants on if they consider the standard for future use or not") +
geom_bar(fill = "#00C08D") +
theme_minimal() +
theme(text = element_text(size = 14))
And they were asked to give their understanding of the tools just as the yes, never people were asked.
# People that haven't heard of the standard + people that have heard of it but never use it on their understanding of tools
data$GYnever1_und.tools[is.na(data$GYnever1_und.tools)] <- 0
data$GN1_und.tools[is.na(data$GN1_und.tools)] <- 0
data$GN.understanding <- data$GYnever1_und.tools + data$GN1_und.tools
data$GN.understanding[data$GN.understanding == 0] <- NA
data$GN.understanding <- factor(data$GN.understanding, levels = c(1, 2, 3, 4, 5), labels = c("none", "limited", "moderate", "good", "very good"))
# Frequency table
epiDisplay::tab1(data$GN.understanding, graph = FALSE, cum.percent = TRUE)
## data$GN.understanding :
## Frequency %(NA+) cum.%(NA+) %(NA-) cum.%(NA-)
## none 8 5.3 5.3 11.3 11.3
## limited 27 17.8 23.0 38.0 49.3
## moderate 20 13.2 36.2 28.2 77.5
## good 15 9.9 46.1 21.1 98.6
## very good 1 0.7 46.7 1.4 100.0
## NA's 81 53.3 100.0 0.0 100.0
## Total 152 100.0 100.0 100.0 100.0
# Barplot to look at the amount of answers for understanding of the tools available to evaluate the outcome of a project
ggplot(data = subset(data, !is.na(GN.understanding)), aes(GN.understanding)) +
scale_x_discrete(name = "Understanding of tools") +
scale_y_continuous(name = "Count") +
ggtitle("Number of participants on their understanding of tools to evaluate the outcome of a project") +
geom_bar(fill = "#00C08D") +
theme_minimal() +
theme(text = element_text(size = 14))
Regression outputs summarized
# Save all tables on a word document
#save_as_docx(ft1, t.H2.1.ft, ft2, t.H2.2.ft, t.H3.1.ft, t.H3.3.ft, t.H4.1.ft, t.H4.2.ft, t.H6.1.ft, t.H6.2.ft, path = mypath)
# Regression output with stargazer (gives OR)
stargazer(H2.OLR, H3.OLR.1, H3.OLR.2, type="text", out="stargazer.doc", apply.coef = exp, apply.se = exp, p.auto = F,
star.char = c("+", "*", "**", "***"), star.cutoffs = c(.1, .05, .01, .001),
notes.append =F, notes = c("+: p<0.1; p<0.05; *p<0.01; ***p<0.001"))
##
## ============================================================================
## Dependent variable:
## ---------------------------------------
## Proactive_Steps_zsmf
## (1) (2) (3)
## ----------------------------------------------------------------------------
## ITR 1.337+
## (1.167)
##
## Considering_future_costs_zsmfNeutral 2.615* 1.617
## (1.619) (1.682)
##
## Considering_future_costs_zsmfLikely 5.994*** 2.994*
## (1.604) (1.662)
##
## Economic_Mindset_zsmfNeutral 2.522*
## (1.598)
##
## Economic_Mindset_zsmfLarge extent 7.040***
## (1.618)
##
## ----------------------------------------------------------------------------
## Observations 146 150 149
## ============================================================================
## Note: +: p<0.1; p<0.05; *p<0.01; ***p<0.001
stargazer(H6.OLR.1, H6.OLR.2, type="text", out="stargazer.doc", apply.coef = exp, apply.se = exp, p.auto = F,
star.char = c("+", "*", "**", "***"), star.cutoffs = c(.1, .05, .01, .001),
notes.append =F, notes = c("+: p<0.1; p<0.05; *p<0.01; ***p<0.001"))
##
## ======================================================================
## Dependent variable:
## --------------------------------------
## Frequency_usage
## (1) (2)
## ----------------------------------------------------------------------
## Understanding_tools_zsmfNeutral 7.613* 6.613*
## (2.290) (2.403)
##
## Understanding_tools_zsmfGood 20.823*** 10.645**
## (2.311) (2.458)
##
## Sufficiency_zsmfNeutral 2.478
## (2.446)
##
## Sufficiency_zsmfAgree 11.872**
## (2.524)
##
## ----------------------------------------------------------------------
## Observations 81 81
## ======================================================================
## Note: +: p<0.1; p<0.05; *p<0.01; ***p<0.001