knitr::opts_chunk$set(echo = TRUE)
packages <- c('qualtRics', 'psych', 'tidyverse', 'shiny', 'dplyr', 'stringr', 'reticulate', 'lubridate', 'ggplot2', 'plotly', 'DT')
lapply(packages, library, character.only = TRUE)
api_key <- readLines("qualtrics.txt", warn = FALSE)
# Use the api_key in your function
qualtrics_api_credentials(
api_key = api_key,
base_url = "uwyo.az1.qualtrics.com",
install = TRUE,
overwrite = TRUE
)
allsurveys<- qualtRics::all_surveys()
write.csv(allsurveys, "all_surveys.csv")
stperc<- fetch_survey(surveyID = "SV_9EOhqVCpXkqmcvk", label = TRUE, convert = TRUE, force_request = TRUE, breakout_sets = FALSE)%>%
mutate(Survey_ID = "SV_0cyJMzOsOYepZSm")
## | | | 0% | |======================================================================| 100%
qual<- stperc%>%
dplyr::select(Q10, Q11)
write.csv(qual, "qualresponses.csv")
write.csv(stperc, "stperc.csv")
library(dplyr)
library(tidyr)
library(sjlabelled) # For extracting variable labels
# Extract variable labels (question text) from stperc
question_labels <- get_label(stperc)
# Convert labels to a data frame
question_labels_df <- data.frame(Question = names(question_labels),
Label = unlist(question_labels),
stringsAsFactors = FALSE)
# Pivot and summarize data
Aggregated <- stperc %>%
filter(!is.na(IPAddress)) %>%
select(Q2_1, Q2_2, Q2_3, Q2_4, Q2_5, Q2_6, Q3, Q4_1, Q4_2, Q4_3, Q4_4, Q5, Q6,
Q7_1, Q7_2, Q7_3, Q7_4, Q7_5, Q8_1, Q8_2, Q8_3, Q8_4, Q8_5,
Q9_1, Q9_2, Q9_3, Q9_4, Q9_5, Q9_6, Q9_7) %>%
pivot_longer(cols = everything(), names_to = "Question", values_to = "Response") %>%
na.omit() %>%
group_by(Question, Response) %>%
summarise(Count = n(), .groups = "drop") %>%
group_by(Question) %>%
mutate(Proportion = Count / sum(Count)) %>%
arrange(Question, desc(Count)) %>%
left_join(question_labels_df, by = "Question")%>%
select(Question, Label, Response, Count, Proportion)
write.csv(Aggregated, "question_counts.csv")
library(reticulate)
use_python("C:/Users/mperki17/AppData/Local/Programs/Python/Python312/python.exe")
knitr::opts_chunk$set(echo = TRUE, error=TRUE)
import sys
!pip install QualtricsAPI
!pip install pandas
!pip install requests
!pip install beautifulsoup4
import os
import pandas as pd
import requests
import re
import numpy as np
from bs4 import BeautifulSoup
from QualtricsAPI.Survey import Responses
from QualtricsAPI.Setup import Credentials
# Read API key from a .txt file
with open("qualtrics.txt", "r") as f:
api_key = f.read().strip()
def fetch_survey_data(api_key):
base_url = "https://uwyo.az1.qualtrics.com/API/v3/surveys"
headers = {"x-api-token": api_key, "Content-Type": "application/json"}
try:
response = requests.get(base_url, headers=headers)
response.raise_for_status() # Raise an error for non-2xx status codes
surveys = response.json().get("result", {}).get("elements", [])
if not surveys:
print("No surveys found.")
return None
survey_data = [{"SurveyID": survey["id"], "SurveyName": survey["name"]} for survey in surveys]
survey_df = pd.DataFrame(survey_data)
print("Survey IDs fetched successfully.")
return survey_df
except requests.exceptions.RequestException as e:
print("Error fetching survey IDs:", str(e))
return None
# Call the fetch_survey_data function
survey_df = fetch_survey_data(api_key)
# Print survey_df to check if it contains the expected data
print("Survey DataFrame:")
print(survey_df)
# Function to fetch survey definition
def fetch_survey_definition(api_key, survey_id):
headers = {
"X-API-TOKEN": api_key
}
url = f"https://uwyo.az1.qualtrics.com/API/v3/surveys/{survey_id}/details"
response = requests.get(url, headers=headers)
return response.json()
# Define output directory before its usage
output_directory = "output"
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print("Output directory created:", output_directory)
else:
print("Output directory already exists:", output_directory)
# Initialize an empty list to store pivoted DataFrames
pivoted_dataframes = []
# Initialize an instance of Credentials
c = Credentials()
# Call the qualtrics_api_credentials() method
c.qualtrics_api_credentials(token=api_key, data_center='uwyo.az1')
# Initialize an instance of Responses
r = Responses()
# Iterate over each survey ID and name
for index, row in survey_df.iterrows():
survey_id = row['SurveyID']
survey_name = row['SurveyName']
try:
# Fetch survey responses using the existing instance of Responses
survey_responses = r.get_survey_responses(survey_id, useLabels=True) # Fetch responses with labels
if not survey_responses.empty:
# Add a new column with the survey name
survey_responses['SurveyName'] = survey_name
# Remove specified columns
columns_to_drop = ['EndDate', 'Status', 'IPAddress', 'Progress',
'Duration (in seconds)', 'Finished', 'RecordedDate',
'RecipientLastName', 'RecipientFirstName', 'RecipientEmail',
'ExternalReference', 'LocationLatitude', 'LocationLongitude',
'DistributionChannel', 'UserLanguage']
survey_responses.drop(columns=columns_to_drop, inplace=True)
# Remove {"ImportId":"_recordId"} from ResponseId
survey_responses['ResponseId'] = survey_responses['ResponseId'].apply(lambda x: x.split('{"ImportId":"_recordId"}')[-1])
# Pivot the DataFrame longer
pivoted_df = survey_responses.melt(id_vars=['ResponseId', 'StartDate', 'SurveyName'],
var_name='Question',
value_name='Response')
# Append pivoted DataFrame to the list
pivoted_dataframes.append(pivoted_df)
print("Survey data processed for:", survey_id)
else:
print("No survey data available for:", survey_id)
except Exception as e:
print("Error processing survey data for:", survey_id, "-", str(e))
# Concatenate all pivoted DataFrames into one DataFrame if DataFrames were successfully generated
if pivoted_dataframes:
combined_ids = pd.concat(pivoted_dataframes, ignore_index=True)
# Specify the output file path
output_file = os.path.join(output_directory, "combinedid.csv")
try:
# Write combined DataFrame to a CSV file
combined_ids.to_csv(output_file, index=False)
print("CSV file saved:", output_file)
except Exception as e:
print("Failed to write CSV file:", str(e))
else:
print("No survey data available to create combined DataFrame.")
#################################################################################
########################################Get Question Text########################
# Filter rows where 'ResponseId' appears in 'ResponseId' column
filtered_data = combined_ids[combined_ids['ResponseId'] == 'Response ID']
# Rename the 'Response' column to 'QuestionText'
filtered_data.rename(columns={'Response': 'QuestionName'}, inplace=True)
# Assign the filtered dataset to the new name 'processed_surveys'
processed_surveys = filtered_data
#################################################################################
########################################Merge the dataframes########################
# Merge the datasets based on 'SurveyName' and 'Question'
complete_survey = pd.merge(combined_ids, processed_surveys, on=['SurveyName', 'Question'], how='left')
# Reorder the columns as per your requirement
complete_survey = complete_survey[['ResponseId_x', 'StartDate', 'SurveyName', 'Question', 'QuestionName', 'Response']]
# Replace all newline characters with spaces within each cell in the 'Description' column
complete_survey['QuestionName'] = complete_survey['QuestionName'].str.replace("\n", " ")
complete_survey['ResponseId_x'].replace("", np.nan, inplace=True)
complete_survey.dropna(subset=['ResponseId_x'], inplace=True)
complete_survey = complete_survey[complete_survey['ResponseId_x'] != "Response ID"]
def trim_to_last_sentence(text):
if isinstance(text, str):
# Find all occurrences of the punctuation marks
matches = [m.start() for m in re.finditer(r'[.:?!]', text)]
# Ensure there are at least two occurrences of the punctuation marks
if len(matches) >= 2:
# Get the position of the second to last punctuation mark
second_last_pos = matches[-2]
# Extract the text after the second to last punctuation mark
result = text[second_last_pos + 1:].strip()
return result
else:
return text.strip()
else:
return text
# Apply the function to the 'Description' column
complete_survey['QuestionName'] = complete_survey['QuestionName'].apply(trim_to_last_sentence)
# Print the updated DataFrame
print("Updated DataFrame:")
print(complete_survey)
# Save the updated DataFrame to a new CSV file
complete_survey.to_csv("completed.csv", index=False)
processed_surveys.to_csv("processed_survey.csv", index=False)
combined_ids.to_csv("combinedid.csv", index=False)
library(tidyverse)
library(stringr)
library(lubridate)
library(shiny)
library(ggplot2)
library(plotly)
library(dplyr)
library(DT)
Employer <- read.csv("completed.csv") %>%
filter(str_detect(SurveyName, regex("employer", ignore_case = TRUE))) %>%
filter(!Question %in% c("Q1", "Q0", "Q10", "Q11", "Q8", "Q9", "Teacher")) %>% # Use '!' to exclude values
filter(!str_detect(Question, "TEXT")) %>% # Exclude rows with 'TEXT' in 'Question'
mutate(
Response = na_if(Response, ""),
StartDate = ymd_hms(StartDate_x), # Ensure Date is in proper format
Year = year(StartDate_x),
Month = month(StartDate_x),
Grad_Year = ifelse(
Month >= 8,
paste0(Year - 1, "-", Year), # Adjusted to one less year
paste0(Year - 2, "-", Year - 1) # Adjusted to reflect previous academic year
),
Response = gsub("\\[.*?\\]", "", Response) # Remove content inside brackets in 'Response'
) %>%
na.omit() %>% # Remove rows with NA values after transformations
select(-Year, -Month)
Employer_Summary<- Employer%>%
group_by(SurveyName, Question, QuestionName, Grad_Year, Response) %>%
summarise(count = n()) %>%
group_by(SurveyName, Question, QuestionName, Grad_Year) %>%
mutate(percentage = count / sum(count) * 100)
ui <- fluidPage(
# Application title
titlePanel("Employer Survey Responses"),
# Sidebar with a question selection input (allow multiple selections)
sidebarLayout(
sidebarPanel(
selectInput("question", "Select Question(s):",
choices = unique(Employer$QuestionName),
selected = unique(Employer$QuestionName)[1],
multiple = FALSE),
# New download button for "Download all Data"
downloadButton("download_all_data", "Download all Data")
),
# Main panel for displaying the plot and datatable
mainPanel(
plotlyOutput("bar_plot", height = "600px"), # Set a fixed height for the plot
DTOutput("data_table"),
downloadButton("download_data", "Download Filtered Data")
)
)
)
# Define server logic
server <- function(input, output) {
# Reactive expression to filter data based on selected QuestionName(s)
filtered_data <- reactive({
Employer %>%
filter(QuestionName %in% input$question) # Filter for selected QuestionName(s)
})
# Create the plot
output$bar_plot <- renderPlotly({
# Get the filtered data
data <- filtered_data()
# Calculate the count and percentage for each response within each Grad_Year
data_summary <- data %>%
group_by(Grad_Year, Response) %>%
summarise(count = n()) %>%
group_by(Grad_Year) %>%
mutate(percentage = count / sum(count) * 100) # Calculate percentage
# Create the ggplot
p <- ggplot(data_summary, aes(x = count, y = Response, fill = Grad_Year)) +
geom_bar(stat = "identity") + # Use stat = "identity" to plot the counts
geom_text(
aes(label = gsub("\\[.*?\\]", "", paste0(count, "\n(", round(percentage, 1), "%)"))), # Remove text within brackets
hjust = -0.1 # Adjust the position of the labels to avoid overlap
) +
labs(x = "Count of Distinct Responses", y = "Response") +
theme_minimal() +
theme(
axis.text.y = element_text(angle = 45, hjust = 1), # Rotate Y-axis text by 45 degrees
axis.text.x = element_text(angle = 0), # Keep X-axis text readable
strip.text = element_text(size = 12), # Adjust size of facet labels
plot.margin = margin(t = 20, r = 20, b = 20, l = 50) # Adjust plot margins to avoid clipping
) +
facet_wrap(~Grad_Year) # Create separate plots for each Grad_Year
# Convert ggplot to plotly for interactivity
ggplotly(p)
})
# Create the aggregated datatable for the filtered data
output$data_table <- renderDT({
# Get the filtered data
data <- filtered_data()
# Aggregate the data just like the plot
data_summary <- data %>%
group_by(Grad_Year, Response) %>%
summarise(count = n()) %>%
group_by(Grad_Year) %>%
mutate(percentage = count / sum(count) * 100) # Calculate percentage
# Return the aggregated data to the table
data_summary %>%
select(Grad_Year, Response, count, percentage) %>%
arrange(Grad_Year, Response) # Sort the data
})
# Create a download button to download the aggregated data as CSV (filtered data)
output$download_data <- downloadHandler(
filename = function() {
paste("aggregated_data_", Sys.Date(), ".csv", sep = "")
},
content = function(file) {
# Get the filtered and aggregated data
data <- filtered_data()
# Aggregate the data just like the plot
data_summary <- data %>%
group_by(Grad_Year, Response) %>%
summarise(count = n()) %>%
group_by(Grad_Year) %>%
mutate(percentage = count / sum(count) * 100) # Calculate percentage
# Write the aggregated data to the CSV file
write.csv(data_summary, file, row.names = FALSE)
}
)
# Create a download button to download the entire Employer_Summary data as CSV
output$download_all_data <- downloadHandler(
filename = function() {
paste("Employer_Summary_", Sys.Date(), ".csv", sep = "")
},
content = function(file) {
# Write the Employer_Summary data to the CSV file
write.csv(Employer_Summary, file, row.names = FALSE)
}
)
}
# Run the application
shinyApp(ui = ui, server = server)