This analysis examines survey data comparing customer perceptions of Delta and Spirit Airlines. The survey includes responses from 24 participants on various aspects including pricing satisfaction, delays, overall preferences, and more.
# Load necessary libraries
library(tidyverse)
library(knitr)
library(scales)
library(ggthemes)
library(gridExtra)
# Load the data
data <- read.csv("Delta v. Spirit_April 28, 2025_16.csv")
# View dimensions of dataset
cat("Dataset dimensions:", dim(data)[1], "rows and", dim(data)[2], "columns\n")
## Dataset dimensions: 25 rows and 39 columns
# Clean column names for easier reference
# Create a more readable version of question names
question_labels <- c(
"Q1" = "Chosen Spirit over Delta due to lower fare",
"Q2" = "Final Spirit price different from Delta",
"Q3" = "Spirit total cost better value than Delta",
"Q4" = "Prefer higher base fare with fewer fees",
"Q5" = "Experienced delay with Spirit",
"Q7" = "Experienced delay with Delta",
"Q11" = "Airline with more frequent on-time departures",
"Q16" = "Flown with Spirit",
"Q13" = "Satisfaction with Spirit flight",
"Q15" = "Filed complaint against Spirit",
"Q17" = "Flown with Delta",
"Q14" = "Satisfaction with Delta flight",
"Q18" = "Filed complaint against Delta",
"Q21" = "Overall airline preference"
)
# Count complete responses
complete_responses <- sum(data$Finished == 1)
cat("Complete responses:", complete_responses, "out of", nrow(data), "\n")
## Complete responses: 23 out of 25
# Q1: Have you ever chosen Spirit Airlines over Delta primarily due to a lower base fare?
q1_counts <- data %>%
filter(!is.na(Q1)) %>%
count(Q1) %>%
mutate(percentage = n / sum(n) * 100,
Q1_label = ifelse(Q1 == 1, "Yes", "No"))
# Create a pie chart for Q1
ggplot(q1_counts, aes(x = "", y = percentage, fill = Q1_label)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
geom_text(aes(label = paste0(round(percentage), "%")),
position = position_stack(vjust = 0.5)) +
labs(title = "Have you chosen Spirit over Delta due to lower fare?",
fill = "Response") +
theme_minimal() +
theme(axis.text = element_blank(),
axis.title = element_blank())
# Q2: When booking with Spirit, did you find the final total price (after add-ons)
# to be significantly different from Delta's fare?
q2_counts <- data %>%
filter(!is.na(Q2)) %>%
count(Q2) %>%
mutate(percentage = n / sum(n) * 100,
Q2_label = case_when(
Q2 == 1 ~ "No, similar final price",
Q2 == 2 ~ "Somewhat different",
Q2 == 3 ~ "Yes, significantly different",
TRUE ~ "Unknown"
))
ggplot(q2_counts, aes(x = reorder(Q2_label, -n), y = n, fill = Q2_label)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage), "%")),
position = position_stack(vjust = 0.9)) +
labs(title = "Was Spirit's final price different from Delta's?",
x = "Response", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none")
# Q3: Do you feel Spirit's total cost after fees is still a better value than Delta?
q3_counts <- data %>%
filter(!is.na(Q3)) %>%
count(Q3) %>%
mutate(percentage = n / sum(n) * 100,
Q3_label = ifelse(Q3 == 1, "Yes", "No"))
ggplot(q3_counts, aes(x = Q3_label, y = n, fill = Q3_label)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage), "%")),
position = position_stack(vjust = 0.9)) +
labs(title = "Is Spirit still a better value after fees?",
x = "Response", y = "Count") +
theme_minimal() +
theme(legend.position = "none")
# Q4: If given the option, would you prefer a slightly higher base fare with fewer additional fees?
q4_counts <- data %>%
filter(!is.na(Q4)) %>%
count(Q4) %>%
mutate(percentage = n / sum(n) * 100,
Q4_label = ifelse(Q4 == 1, "Yes", "No"))
ggplot(q4_counts, aes(x = Q4_label, y = n, fill = Q4_label)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage), "%")),
position = position_stack(vjust = 0.9)) +
labs(title = "Preference for higher base fare with fewer fees",
x = "Response", y = "Count") +
theme_minimal() +
scale_fill_brewer(palette = "Set2") +
theme(legend.position = "none")
# Cross tab analysis of price perception vs airline preference
price_vs_preference <- data %>%
filter(!is.na(Q3) & !is.na(Q21)) %>%
count(Q3, Q21) %>%
mutate(Q3_label = ifelse(Q3 == 1, "Spirit better value", "Delta better value"),
Q21_label = case_when(
Q21 == 1 ~ "Prefer Spirit",
Q21 == 4 ~ "Prefer Delta",
TRUE ~ "No strong preference"
))
ggplot(price_vs_preference, aes(x = Q3_label, y = n, fill = Q21_label)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Value Perception vs Airline Preference",
x = "Value Perception", y = "Count", fill = "Airline Preference") +
theme_minimal()
# Q5: Have you experienced a flight delay or cancellation with Spirit?
q5_counts <- data %>%
filter(!is.na(Q5)) %>%
count(Q5) %>%
mutate(percentage = n / sum(n) * 100,
Q5_label = case_when(
Q5 == 3 ~ "No",
Q5 == 4 ~ "Yes, minor delay",
Q5 == 5 ~ "Yes, significant delay/cancellation",
TRUE ~ "Unknown"
))
# Q7: Have you experienced a flight delay of cancellation with Delta?
q7_counts <- data %>%
filter(!is.na(Q7)) %>%
count(Q7) %>%
mutate(percentage = n / sum(n) * 100,
Q7_label = case_when(
Q7 == 1 ~ "No",
Q7 == 2 ~ "Yes, minor delay",
Q7 == 3 ~ "Yes, significant delay/cancellation",
TRUE ~ "Unknown"
))
# Create side-by-side comparison
p1 <- ggplot(q5_counts, aes(x = Q5_label, y = n, fill = Q5_label)) +
geom_bar(stat = "identity") +
labs(title = "Spirit Delays",
x = "", y = "Count") +
theme_minimal() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1))
p2 <- ggplot(q7_counts, aes(x = Q7_label, y = n, fill = Q7_label)) +
geom_bar(stat = "identity") +
labs(title = "Delta Delays",
x = "", y = "Count") +
theme_minimal() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1))
grid.arrange(p1, p2, ncol = 2, top = "Flight Delay Experience Comparison")
# Q11: Which airline has more frequent on-time departures?
q11_counts <- data %>%
filter(!is.na(Q11)) %>%
count(Q11) %>%
mutate(percentage = n / sum(n) * 100,
Q11_label = case_when(
Q11 == 1 ~ "Spirit more on-time",
Q11 == 2 ~ "About the same",
Q11 == 3 ~ "Delta more on-time",
TRUE ~ "Unknown"
))
ggplot(q11_counts, aes(x = reorder(Q11_label, -n), y = n, fill = Q11_label)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage), "%")),
position = position_stack(vjust = 0.9)) +
labs(title = "Which airline has more frequent on-time departures?",
x = "Response", y = "Count") +
theme_minimal() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1))
# Create a combined dataset for satisfaction comparisons
satisfaction_data <- data %>%
select(Q13, Q14) %>%
rename(Spirit = Q13, Delta = Q14) %>%
pivot_longer(cols = c(Spirit, Delta),
names_to = "Airline",
values_to = "Satisfaction") %>%
filter(!is.na(Satisfaction))
# Plot satisfaction comparison
ggplot(satisfaction_data, aes(x = factor(Satisfaction), fill = Airline)) +
geom_bar(position = "dodge") +
scale_x_discrete(labels = c("1" = "Very Dissatisfied",
"2" = "Dissatisfied",
"3" = "Neutral",
"4" = "Satisfied",
"5" = "Very Satisfied")) +
labs(title = "Satisfaction Comparison between Airlines",
x = "Satisfaction Level", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Calculate average satisfaction
avg_satisfaction <- satisfaction_data %>%
group_by(Airline) %>%
summarise(
Average_Satisfaction = mean(Satisfaction, na.rm = TRUE),
Sample_Size = n()
)
# Plot average satisfaction
ggplot(avg_satisfaction, aes(x = Airline, y = Average_Satisfaction, fill = Airline)) +
geom_col() +
geom_text(aes(label = round(Average_Satisfaction, 2)), vjust = -0.5) +
ylim(0, 5) +
labs(title = "Average Satisfaction Rating by Airline",
x = "Airline", y = "Average Satisfaction (1-5 scale)") +
theme_minimal() +
theme(legend.position = "none")
# Q21: Overall, which airline do you prefer?
q21_counts <- data %>%
filter(!is.na(Q21)) %>%
count(Q21) %>%
mutate(percentage = n / sum(n) * 100,
Q21_label = case_when(
Q21 == 1 ~ "Spirit",
Q21 == 4 ~ "Delta",
TRUE ~ "No strong preference"
))
ggplot(q21_counts, aes(x = reorder(Q21_label, -n), y = n, fill = Q21_label)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage), "%")),
position = position_stack(vjust = 0.9)) +
labs(title = "Overall Airline Preference",
x = "Preferred Airline", y = "Count") +
theme_minimal() +
scale_fill_manual(values = c("Delta" = "blue", "Spirit" = "yellow", "No strong preference" = "gray")) +
theme(legend.position = "none")
# Q23 preference factors analysis
q23_data <- data %>%
select(Q23_1, Q23_2, Q23_3, Q23_4) %>%
rename(
"Airline" = Q23_1,
"Cost" = Q23_2,
"Assigned_Seating" = Q23_3,
"Layover" = Q23_4
) %>%
pivot_longer(cols = c(Airline, Cost, Assigned_Seating, Layover),
names_to = "Factor",
values_to = "Choice") %>%
filter(!is.na(Choice))
# Calculate factor preferences
factor_prefs <- q23_data %>%
group_by(Factor) %>%
count(Choice) %>%
mutate(
percentage = n / sum(n) * 100,
Choice_label = case_when(
Choice == 1 ~ "Spirit",
Choice == 2 ~ "Delta",
TRUE ~ "Either"
)
)
# Plot factor preferences
ggplot(factor_prefs, aes(x = Factor, y = percentage, fill = Choice_label)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(aes(label = paste0(round(percentage), "%")),
position = position_stack(vjust = 0.5)) +
labs(title = "Preference Factors Analysis",
x = "Decision Factor", y = "Percentage", fill = "Preference") +
theme_minimal() +
scale_fill_manual(values = c("Delta" = "blue", "Spirit" = "yellow", "Either" = "gray")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Create summary of key metrics
summary_stats <- data.frame(
Metric = c(
"% Choosing Spirit over Delta due to lower fare",
"% Finding Spirit's final price similar to Delta",
"% Feeling Spirit is better value after fees",
"% Preferring higher base fare with fewer fees",
"% Experiencing delay with Spirit",
"% Experiencing delay with Delta",
"% Rating Delta more on-time than Spirit",
"% Overall preference for Delta"
),
Value = c(
sum(data$Q1 == 1, na.rm = TRUE) / sum(!is.na(data$Q1)) * 100,
sum(data$Q2 == 1, na.rm = TRUE) / sum(!is.na(data$Q2)) * 100,
sum(data$Q3 == 1, na.rm = TRUE) / sum(!is.na(data$Q3)) * 100,
sum(data$Q4 == 2, na.rm = TRUE) / sum(!is.na(data$Q4)) * 100,
sum(data$Q5 >= 4, na.rm = TRUE) / sum(!is.na(data$Q5)) * 100,
sum(data$Q7 >= 2, na.rm = TRUE) / sum(!is.na(data$Q7)) * 100,
sum(data$Q11 == 3, na.rm = TRUE) / sum(!is.na(data$Q11)) * 100,
sum(data$Q21 == 4, na.rm = TRUE) / sum(!is.na(data$Q21)) * 100
)
)
summary_stats$Value <- round(summary_stats$Value, 1)
kable(summary_stats, col.names = c("Key Metric", "Percentage (%)"),
caption = "Summary of Key Findings")
Key Metric | Percentage (%) |
---|---|
% Choosing Spirit over Delta due to lower fare | 72 |
% Finding Spirit’s final price similar to Delta | 36 |
% Feeling Spirit is better value after fees | 64 |
% Preferring higher base fare with fewer fees | 92 |
% Experiencing delay with Spirit | 80 |
% Experiencing delay with Delta | 60 |
% Rating Delta more on-time than Spirit | 44 |
% Overall preference for Delta | 0 |
Based on the survey data analysis, we can conclude:
These findings suggest that while Spirit’s low-cost model attracts budget-conscious travelers, Delta’s more inclusive pricing structure and perceived reliability create higher overall satisfaction and preference.