Find and Import Data

all_files <- list.files()
csv_files <- list.files(pattern = "\\.csv$", ignore.case = TRUE)

print(all_files)
##  [1] "bank.csv"                                           
##  [2] "Chapter 12 - Watching Competitors.pdf"              
##  [3] "desktop.ini"                                        
##  [4] "DodgersData.csv"                                    
##  [5] "grapeJuice (1).csv"                                 
##  [6] "grapeJuice.csv"                                     
##  [7] "MKTG 4000 - Group 2 Survey_April 15, 2026_16.21.csv"
##  [8] "MKTG+4000+-+Group+2+Survey_April+15,+2026_16.21"    
##  [9] "MKTG+4000+-+Group+2+Survey_April+15,+2026_16.21.zip"
## [10] "Screenshot 2026-03-04 161737.png"                   
## [11] "syntax - t-test.txt"                                
## [12] "textbook_platform_analysis (1).Rmd"                 
## [13] "textbook_platform_analysis.Rmd"                     
## [14] "textbook_platform_analysis_autofind_csv.Rmd"
print(csv_files)
## [1] "bank.csv"                                           
## [2] "DodgersData.csv"                                    
## [3] "grapeJuice (1).csv"                                 
## [4] "grapeJuice.csv"                                     
## [5] "MKTG 4000 - Group 2 Survey_April 15, 2026_16.21.csv"
target_file <- "MKTG 4000 - Group 2 Survey_April 15, 2026_16.21.csv"

if (target_file %in% csv_files) {
  survey_file <- target_file
} else if (length(csv_files) >= 1) {
  survey_file <- csv_files[1]
} else {
  stop("No CSV file was found in your project folder. Upload your survey CSV into the same Posit Cloud project as this .Rmd file.")
}

survey_raw <- read.csv(survey_file, stringsAsFactors = FALSE)

survey <- survey_raw[-c(1,2), ] %>%
  clean_names() %>%
  rename(
    academic_status = q1,
    textbook_format = q2,
    user_friendly = q3,
    purchase_pref = q4,
    other_platform = q5,
    spending = q6,
    price_perception = q7
  ) %>%
  mutate(
    academic_status = recode(academic_status, "Sophmore" = "Sophomore"),
    across(everything(), trimws)
  )

Data Check

head(survey)
##            start_date            end_date     status      ip_address progress
## 3 2026-03-23 16:03:17 2026-03-23 16:04:12 IP Address 136.168.214.221      100
## 4 2026-03-23 16:04:02 2026-03-23 16:04:53 IP Address 136.168.214.222      100
## 5 2026-03-23 16:42:25 2026-03-23 16:43:14 IP Address  136.168.55.238      100
## 6 2026-03-25 16:47:32 2026-03-25 16:48:59 IP Address 136.168.214.250      100
## 7 2026-04-08 16:22:23 2026-04-08 16:25:57 IP Address 136.168.214.221      100
## 8 2026-04-08 16:29:29 2026-04-08 16:30:03 IP Address 136.168.214.236      100
##   duration_in_seconds finished       recorded_date       response_id
## 3                  55     True 2026-03-23 16:04:13 R_1Wxx2PnUIHxzjO6
## 4                  51     True 2026-03-23 16:04:53 R_3nYJdpmkLal6m1P
## 5                  48     True 2026-03-23 16:43:15 R_1pefVBQWLQl73hf
## 6                  86     True 2026-03-25 16:48:59 R_7QKE8qLhktJR321
## 7                 214     True 2026-04-08 16:25:59 R_6mkmStYyf2Lpjrj
## 8                  34     True 2026-04-08 16:30:04 R_70NOV29NJLyWCFD
##   recipient_last_name recipient_first_name recipient_email external_reference
## 3                                                                            
## 4                                                                            
## 5                                                                            
## 6                                                                            
## 7                                                                            
## 8                                                                            
##   location_latitude location_longitude distribution_channel user_language
## 3           35.3044          -119.1031            anonymous            EN
## 4           35.3044          -119.1031            anonymous            EN
## 5           35.3288          -118.9748            anonymous            EN
## 6           35.3044          -119.1031            anonymous            EN
## 7           35.3878           -118.936            anonymous            EN
## 8           35.3878           -118.936            anonymous            EN
##   academic_status textbook_format user_friendly             purchase_pref
## 3          Senior      electronic   McGraw Hill               McGraw Hill
## 4          Senior      electronic       Pearson I don't have a preference
## 5          Senior      electronic   McGraw Hill                   Pearson
## 6          Senior      electronic       Pearson I don't have a preference
## 7          Senior      electronic   McGraw Hill               McGraw Hill
## 8          Junior      electronic       Pearson I don't have a preference
##   other_platform      spending                         price_perception
## 3            N/A       $50-$75                  They are about the same
## 4            N/A less than $50 I don't pay attention to textbook prices
## 5            n/a less than $50                Pearson is more expensive
## 6                     $75-$100                  They are about the same
## 7            N/A       $50-$75                  They are about the same
## 8                less than $50            McGraw Hill is more expensive
##   q_ballot_box_stuffing
## 3                      
## 4                      
## 5                      
## 6                      
## 7                      
## 8
nrow(survey)
## [1] 27

Graph 1: Preferred Textbook Format

survey %>%
  count(textbook_format) %>%
  ggplot(aes(x = textbook_format, y = n, fill = textbook_format)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = n), vjust = -0.3) +
  labs(title = "Preferred Textbook Format", x = "Format", y = "Responses") +
  theme_minimal()

Graph 2: User-Friendly Platform

survey %>%
  count(user_friendly) %>%
  ggplot(aes(x = user_friendly, y = n, fill = user_friendly)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = n), vjust = -0.3) +
  labs(title = "Most User-Friendly Platform", x = "Platform", y = "Responses") +
  theme_minimal()

Graph 3: Purchase Preference

survey %>%
  count(purchase_pref) %>%
  ggplot(aes(x = purchase_pref, y = n, fill = purchase_pref)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = n), hjust = -0.2) +
  coord_flip() +
  labs(title = "Preferred Platform for Purchasing Textbooks", x = "", y = "Responses") +
  theme_minimal()

Graph 4: Expected Spending

survey %>%
  count(spending) %>%
  ggplot(aes(x = spending, y = n, fill = spending)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = n), vjust = -0.3) +
  labs(title = "Expected Spending Per Textbook", x = "Spending Range", y = "Responses") +
  theme_minimal()

Graph 5: Price Perception

survey %>%
  count(price_perception) %>%
  ggplot(aes(x = price_perception, y = n, fill = price_perception)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = n), hjust = -0.2) +
  coord_flip() +
  labs(title = "Which Platform Seems More Expensive?", x = "", y = "Responses") +
  theme_minimal()