We begin by loading one survey student teacher Qualtrics Survey using R

The first thing to do is get your API token and save it as a file called “qualtrics.txt” in the same folder as your R file

knitr::opts_chunk$set(echo = TRUE)

Be sure to load your libraries

packages <- c('qualtRics', 'psych', 'tidyverse', 'shiny', 'dplyr', 'stringr', 'reticulate', 'lubridate', 'ggplot2', 'plotly', 'DT')

lapply(packages, library, character.only = TRUE)

This gives us a list of all the surveys under the API

api_key <- readLines("qualtrics.txt", warn = FALSE)

# Use the api_key in your function
qualtrics_api_credentials(
  api_key = api_key,
  base_url = "uwyo.az1.qualtrics.com",
  install = TRUE,
  overwrite = TRUE
)

allsurveys<- qualtRics::all_surveys()

write.csv(allsurveys, "all_surveys.csv")

We select our survey ID and load the survey here

stperc<- fetch_survey(surveyID = "SV_9EOhqVCpXkqmcvk", label = TRUE, convert = TRUE, force_request = TRUE, breakout_sets = FALSE)%>%
  mutate(Survey_ID = "SV_0cyJMzOsOYepZSm")
##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%
qual<- stperc%>%
  dplyr::select(Q10, Q11)

write.csv(qual, "qualresponses.csv")

write.csv(stperc, "stperc.csv")

We munge the data for reporting and write the into a .csv file

library(dplyr)
library(tidyr)
library(sjlabelled)  # For extracting variable labels

# Extract variable labels (question text) from stperc
question_labels <- get_label(stperc)

# Convert labels to a data frame
question_labels_df <- data.frame(Question = names(question_labels),
                                 Label = unlist(question_labels), 
                                 stringsAsFactors = FALSE)

# Pivot and summarize data
Aggregated <- stperc %>%
  filter(!is.na(IPAddress)) %>%
  select(Q2_1, Q2_2, Q2_3, Q2_4, Q2_5, Q2_6, Q3, Q4_1, Q4_2, Q4_3, Q4_4, Q5, Q6, 
         Q7_1, Q7_2, Q7_3, Q7_4, Q7_5, Q8_1, Q8_2, Q8_3, Q8_4, Q8_5, 
         Q9_1, Q9_2, Q9_3, Q9_4, Q9_5, Q9_6, Q9_7) %>%
  pivot_longer(cols = everything(), names_to = "Question", values_to = "Response") %>%
  na.omit() %>%
  group_by(Question, Response) %>%
  summarise(Count = n(), .groups = "drop") %>%
  group_by(Question) %>%
  mutate(Proportion = Count / sum(Count)) %>%
  arrange(Question, desc(Count)) %>%
  left_join(question_labels_df, by = "Question")%>%
  select(Question, Label, Response, Count, Proportion)

write.csv(Aggregated, "question_counts.csv")

We switch to Python code

Setup by installing Python library

library(reticulate)
use_python("C:/Users/mperki17/AppData/Local/Programs/Python/Python312/python.exe")

We knit again for Python

knitr::opts_chunk$set(echo = TRUE, error=TRUE)

Install Python libraries

import sys
!pip install QualtricsAPI
!pip install pandas
!pip install requests
!pip install beautifulsoup4

This code will pull all surveys into one dataset just by using Python programming

Be sure to have your qualtrics.txt file in the same folder as this file

import os
import pandas as pd
import requests
import re
import numpy as np
from bs4 import BeautifulSoup
from QualtricsAPI.Survey import Responses
from QualtricsAPI.Setup import Credentials


# Read API key from a .txt file
with open("qualtrics.txt", "r") as f:
    api_key = f.read().strip()
    
    
def fetch_survey_data(api_key):
    base_url = "https://uwyo.az1.qualtrics.com/API/v3/surveys"
    headers = {"x-api-token": api_key, "Content-Type": "application/json"}

    try:
        response = requests.get(base_url, headers=headers)
        response.raise_for_status()  # Raise an error for non-2xx status codes

        surveys = response.json().get("result", {}).get("elements", [])
        if not surveys:
            print("No surveys found.")
            return None

        survey_data = [{"SurveyID": survey["id"], "SurveyName": survey["name"]} for survey in surveys]
        survey_df = pd.DataFrame(survey_data)
        print("Survey IDs fetched successfully.")
        return survey_df

    except requests.exceptions.RequestException as e:
        print("Error fetching survey IDs:", str(e))
        return None



# Call the fetch_survey_data function
survey_df = fetch_survey_data(api_key)
# Print survey_df to check if it contains the expected data
print("Survey DataFrame:")
print(survey_df)
# Function to fetch survey definition
def fetch_survey_definition(api_key, survey_id):
    headers = {
        "X-API-TOKEN": api_key
    }
    url = f"https://uwyo.az1.qualtrics.com/API/v3/surveys/{survey_id}/details"
    response = requests.get(url, headers=headers)
    return response.json()

# Define output directory before its usage
output_directory = "output"
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print("Output directory created:", output_directory)
else:
    print("Output directory already exists:", output_directory)
# Initialize an empty list to store pivoted DataFrames
pivoted_dataframes = []

# Initialize an instance of Credentials
c = Credentials()

# Call the qualtrics_api_credentials() method
c.qualtrics_api_credentials(token=api_key, data_center='uwyo.az1')

# Initialize an instance of Responses
r = Responses()

# Iterate over each survey ID and name
for index, row in survey_df.iterrows():
    survey_id = row['SurveyID']
    survey_name = row['SurveyName']
    try:
        # Fetch survey responses using the existing instance of Responses
        survey_responses = r.get_survey_responses(survey_id, useLabels=True)  # Fetch responses with labels

        if not survey_responses.empty:
            # Add a new column with the survey name
            survey_responses['SurveyName'] = survey_name
            
            # Remove specified columns
            columns_to_drop = ['EndDate', 'Status', 'IPAddress', 'Progress', 
                               'Duration (in seconds)', 'Finished', 'RecordedDate', 
                               'RecipientLastName', 'RecipientFirstName', 'RecipientEmail', 
                               'ExternalReference', 'LocationLatitude', 'LocationLongitude', 
                               'DistributionChannel', 'UserLanguage']
            survey_responses.drop(columns=columns_to_drop, inplace=True)

            # Remove {"ImportId":"_recordId"} from ResponseId
            survey_responses['ResponseId'] = survey_responses['ResponseId'].apply(lambda x: x.split('{"ImportId":"_recordId"}')[-1])

            # Pivot the DataFrame longer
            pivoted_df = survey_responses.melt(id_vars=['ResponseId', 'StartDate', 'SurveyName'], 
                                                var_name='Question', 
                                                value_name='Response')

            # Append pivoted DataFrame to the list
            pivoted_dataframes.append(pivoted_df)

            print("Survey data processed for:", survey_id)
        else:
            print("No survey data available for:", survey_id)
    except Exception as e:
        print("Error processing survey data for:", survey_id, "-", str(e))
# Concatenate all pivoted DataFrames into one DataFrame if DataFrames were successfully generated

if pivoted_dataframes:
    combined_ids = pd.concat(pivoted_dataframes, ignore_index=True)

    # Specify the output file path
    output_file = os.path.join(output_directory, "combinedid.csv")

    try:
        # Write combined DataFrame to a CSV file
        combined_ids.to_csv(output_file, index=False)
        print("CSV file saved:", output_file)
    except Exception as e:
        print("Failed to write CSV file:", str(e))
else:
    print("No survey data available to create combined DataFrame.")
#################################################################################
########################################Get Question Text########################


# Filter rows where 'ResponseId' appears in 'ResponseId' column
filtered_data = combined_ids[combined_ids['ResponseId'] == 'Response ID']

# Rename the 'Response' column to 'QuestionText'
filtered_data.rename(columns={'Response': 'QuestionName'}, inplace=True)

# Assign the filtered dataset to the new name 'processed_surveys'
processed_surveys = filtered_data

#################################################################################
########################################Merge the dataframes########################


# Merge the datasets based on 'SurveyName' and 'Question'
complete_survey = pd.merge(combined_ids, processed_surveys, on=['SurveyName', 'Question'], how='left')


# Reorder the columns as per your requirement
complete_survey = complete_survey[['ResponseId_x', 'StartDate', 'SurveyName', 'Question', 'QuestionName', 'Response']]
# Replace all newline characters with spaces within each cell in the 'Description' column
complete_survey['QuestionName'] = complete_survey['QuestionName'].str.replace("\n", " ")
complete_survey['ResponseId_x'].replace("", np.nan, inplace=True)
complete_survey.dropna(subset=['ResponseId_x'], inplace=True)
complete_survey = complete_survey[complete_survey['ResponseId_x'] != "Response ID"]


def trim_to_last_sentence(text):
    if isinstance(text, str):
        # Find all occurrences of the punctuation marks
        matches = [m.start() for m in re.finditer(r'[.:?!]', text)]

        # Ensure there are at least two occurrences of the punctuation marks
        if len(matches) >= 2:
            # Get the position of the second to last punctuation mark
            second_last_pos = matches[-2]
            # Extract the text after the second to last punctuation mark
            result = text[second_last_pos + 1:].strip()
            return result
        else:
            return text.strip()
    else:
        return text

# Apply the function to the 'Description' column
complete_survey['QuestionName'] = complete_survey['QuestionName'].apply(trim_to_last_sentence)


# Print the updated DataFrame
print("Updated DataFrame:")
print(complete_survey)

# Save the updated DataFrame to a new CSV file
complete_survey.to_csv("completed.csv", index=False)
processed_surveys.to_csv("processed_survey.csv", index=False)
combined_ids.to_csv("combinedid.csv", index=False)

Pull data into R to create analytical tools

We also develop a dashboard to analyze our teacher survey data

library(tidyverse)
library(stringr)
library(lubridate)
library(shiny)
library(ggplot2)
library(plotly)
library(dplyr)
library(DT)

Employer <- read.csv("completed.csv") %>%
  filter(str_detect(SurveyName, regex("employer", ignore_case = TRUE))) %>%
  filter(!Question %in% c("Q1", "Q0", "Q10", "Q11", "Q8", "Q9", "Teacher")) %>%  # Use '!' to exclude values
  filter(!str_detect(Question, "TEXT")) %>%  # Exclude rows with 'TEXT' in 'Question'
  mutate(
    Response = na_if(Response, ""),
    StartDate = ymd_hms(StartDate_x),  # Ensure Date is in proper format
    Year = year(StartDate_x),
    Month = month(StartDate_x),
    Grad_Year = ifelse(
      Month >= 8, 
      paste0(Year - 1, "-", Year),  # Adjusted to one less year
      paste0(Year - 2, "-", Year - 1)  # Adjusted to reflect previous academic year
    ),
    Response = gsub("\\[.*?\\]", "", Response)  # Remove content inside brackets in 'Response'
  ) %>%
  na.omit() %>%  # Remove rows with NA values after transformations
  select(-Year, -Month) 

Employer_Summary<- Employer%>%
  group_by(SurveyName, Question, QuestionName, Grad_Year, Response) %>%
      summarise(count = n()) %>%
      group_by(SurveyName, Question, QuestionName, Grad_Year) %>%
      mutate(percentage = count / sum(count) * 100)


ui <- fluidPage(
  
  # Application title
  titlePanel("Employer Survey Responses"),
  
  # Sidebar with a question selection input (allow multiple selections)
  sidebarLayout(
    sidebarPanel(
      selectInput("question", "Select Question(s):",
                  choices = unique(Employer$QuestionName),
                  selected = unique(Employer$QuestionName)[1],
                  multiple = FALSE),
      # New download button for "Download all Data"
      downloadButton("download_all_data", "Download all Data")
    ),
    
    # Main panel for displaying the plot and datatable
    mainPanel(
      plotlyOutput("bar_plot", height = "600px"),  # Set a fixed height for the plot
      DTOutput("data_table"),
      downloadButton("download_data", "Download Filtered Data")
    )
  )
)

# Define server logic
server <- function(input, output) {
  
  # Reactive expression to filter data based on selected QuestionName(s)
  filtered_data <- reactive({
    Employer %>%
      filter(QuestionName %in% input$question)  # Filter for selected QuestionName(s)
  })
  
  # Create the plot
  output$bar_plot <- renderPlotly({
    
    # Get the filtered data
    data <- filtered_data()
    
    # Calculate the count and percentage for each response within each Grad_Year
    data_summary <- data %>%
      group_by(Grad_Year, Response) %>%
      summarise(count = n()) %>%
      group_by(Grad_Year) %>%
      mutate(percentage = count / sum(count) * 100)  # Calculate percentage
    
    # Create the ggplot
    p <- ggplot(data_summary, aes(x = count, y = Response, fill = Grad_Year)) +
      geom_bar(stat = "identity") +  # Use stat = "identity" to plot the counts
      geom_text(
        aes(label = gsub("\\[.*?\\]", "", paste0(count, "\n(", round(percentage, 1), "%)"))),  # Remove text within brackets
        hjust = -0.1  # Adjust the position of the labels to avoid overlap
      ) +
      labs(x = "Count of Distinct Responses", y = "Response") +
      theme_minimal() +
      theme(
        axis.text.y = element_text(angle = 45, hjust = 1),  # Rotate Y-axis text by 45 degrees
        axis.text.x = element_text(angle = 0),  # Keep X-axis text readable
        strip.text = element_text(size = 12),  # Adjust size of facet labels
        plot.margin = margin(t = 20, r = 20, b = 20, l = 50)  # Adjust plot margins to avoid clipping
      ) +
      facet_wrap(~Grad_Year)  # Create separate plots for each Grad_Year

    # Convert ggplot to plotly for interactivity
    ggplotly(p)
  })
  
  # Create the aggregated datatable for the filtered data
  output$data_table <- renderDT({
    # Get the filtered data
    data <- filtered_data()
    
    # Aggregate the data just like the plot
    data_summary <- data %>%
      group_by(Grad_Year, Response) %>%
      summarise(count = n()) %>%
      group_by(Grad_Year) %>%
      mutate(percentage = count / sum(count) * 100)  # Calculate percentage
    
    # Return the aggregated data to the table
    data_summary %>%
      select(Grad_Year, Response, count, percentage) %>%
      arrange(Grad_Year, Response)  # Sort the data
  })
  
  # Create a download button to download the aggregated data as CSV (filtered data)
  output$download_data <- downloadHandler(
    filename = function() {
      paste("aggregated_data_", Sys.Date(), ".csv", sep = "")
    },
    content = function(file) {
      # Get the filtered and aggregated data
      data <- filtered_data()
      
      # Aggregate the data just like the plot
      data_summary <- data %>%
        group_by(Grad_Year, Response) %>%
        summarise(count = n()) %>%
        group_by(Grad_Year) %>%
        mutate(percentage = count / sum(count) * 100)  # Calculate percentage
      
      # Write the aggregated data to the CSV file
      write.csv(data_summary, file, row.names = FALSE)
    }
  )
  
  # Create a download button to download the entire Employer_Summary data as CSV
  output$download_all_data <- downloadHandler(
    filename = function() {
      paste("Employer_Summary_", Sys.Date(), ".csv", sep = "")
    },
    content = function(file) {
      # Write the Employer_Summary data to the CSV file
      write.csv(Employer_Summary, file, row.names = FALSE)
    }
  )
}

# Run the application
shinyApp(ui = ui, server = server)