Survey Correlation Analysis

Introduction

This report analyzes Likert-scale survey responses using Pearson correlation to test several hypotheses related to remote work and productivity.

Load Required Packages

if (!require(readxl)) install.packages("readxl")

## Loading required package: readxl

library(readxl)

Load and Clean Data

# Load the Excel file (adjust path if needed)
df <- read_excel("F:/College/Classes/Business_Research_Methods/Module_2/Survey_Results.xlsx")

# Clean column names
names(df) <- gsub("[\r\n\t]+", " ", names(df))           # remove newlines/tabs
names(df) <- gsub("[^[:print:]]", "", names(df))            # remove non-printable chars
names(df) <- trimws(names(df))                              # trim whitespace

Convert Likert Responses to Numeric

likert_to_num <- function(response) {
  switch(response,
         "Completely disagree" = 1,
         "Somewhat disagree" = 2,
         "Neutral" = 3,
         "Somewhat agree" = 4,
         "Completely agree" = 5,
         NA)
}

df$schedule <- sapply(df$`I like setting my own daily schedule`, likert_to_num)
df$productivity <- sapply(df$`I feel I'm accomplishing more than I do in in the workplace.`, likert_to_num)
df$stress <- sapply(df$`I'm less stressed about my work.`, likert_to_num)
df$task_difficulty <- sapply(df$`I sometimes have difficulty understanding tasks while working remotely.`, likert_to_num)
df$social_env <- sapply(df$`I miss the social environment at work.`, likert_to_num)

# Convert remote preference
# Find column with 'decline a job offer' in its name (in case of hidden spaces)
col_remote <- grep("decline a job offer", names(df), value = TRUE)

# Now use that column reliably
df$remote_pref <- ifelse(df[[col_remote]] == "Yes", 3,
                         ifelse(df[[col_remote]] == "Maybe", 2,
                                ifelse(df[[col_remote]] == "No", 1, NA)))

Pearson Correlation Results

# H2: Schedule vs Productivity
cor(df$schedule, df$productivity, use = "complete.obs")

## [1] 0.6896716

# H3: Stress vs Remote Preference
cor(df$stress, df$remote_pref, use = "complete.obs")

## [1] 0.09563651

# H4: Task Difficulty vs Missing Social Environment
cor(df$task_difficulty, df$social_env, use = "complete.obs")

## [1] 0.441389

Visualization

plot(df$schedule, df$productivity,
     xlab = "Schedule Control (Numeric)",
     ylab = "Perceived Productivity (Numeric)",
     main = "Scatterplot of Schedule vs. Productivity")
abline(lm(df$productivity ~ df$schedule), col = "blue")

Survey Correlation Analysis

Brian Piltin

Introduction

Load Required Packages

Load and Clean Data

Convert Likert Responses to Numeric

Pearson Correlation Results

Visualization