surf_data2 <- read.csv("surf_data2.csv")
library(leaflet)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(viridis)
## Loading required package: viridisLite
# Make magma the default for discrete scales
scale_colour_discrete <- function(...) scale_color_viridis_d(option = "magma", ...)
scale_fill_discrete <- function(...) scale_fill_viridis_d(option = "magma", ...)
# Renamed using python preprocessing
#surf_data2 <- surf_data2 %>%
# rename(
# Participation_Match = Q1a,
# Role = Q1b,
# Surf_Years = Q2,
# Experience = Q3,
# )
surf_data2 <- surf_data2 %>%
rename(
Years_Surfing = Skill,
LocationLatitude = Location.Latitude,
LocationLongitude = Location.Longitude
)
library(stringr)
# regex anatomy: \\ is an escape character, \d grabs digit, + grabs however many digits there are
surf_data2$Experience <- as.numeric(stringr::str_extract(surf_data2$Experience, "\\d+")) # extract just numeric answers from data
surf_data2$Years_Surfing <- as.numeric(stringr::str_extract(surf_data2$Years_Surfing, "\\d+"))
surf_data2$AvgBoardCost <- as.numeric(stringr::str_extract(surf_data2$AvgBoardCost, "\\d+"))
surf_data2$NumBoardsTotal <- as.numeric(stringr::str_extract(surf_data2$NumBoardsTotal, "\\d+"))
surf_data2$NumBoardsYearly <- as.numeric(stringr::str_extract(surf_data2$NumBoardsYearly, "\\d+"))
surf_data2$AvgBoardLabor <- as.numeric(stringr::str_extract(surf_data2$AvgBoardLabor, "\\d+"))
surf_data2$AvgBoardMarkup <- as.numeric(stringr::str_extract(surf_data2$AvgBoardMarkup, "\\d+"))
surf_data2$PriceConsumersWilling <- as.numeric(stringr::str_extract(surf_data2$PriceConsumersWilling, "\\d+"))
surf_data2$Age <- as.numeric(stringr::str_extract(surf_data2$Age, "\\d+"))
surf_data2$LocationLatitude <- as.numeric(surf_data2$LocationLatitude)
## Warning: NAs introduced by coercion
surf_data2$LocationLongitude <- as.numeric(surf_data2$LocationLongitude)
## Warning: NAs introduced by coercion
surf_data2$Duration..in.seconds. <- as.numeric(surf_data2$Duration..in.seconds.)
## Warning: NAs introduced by coercion
surf_data2$AvgBoardCost <- as.numeric(surf_data2$AvgBoardCost)
surf_data2$NumBoardsTotal <- as.numeric(surf_data2$NumBoardsTotal)
surf_data2$NumBoardsYearly <- as.numeric(surf_data2$NumBoardsYearly)
surf_data2$log_AvgBoardCost <- log(surf_data2$AvgBoardCost)
surf_data2$log_NumBoardsTotal <- log(surf_data2$NumBoardsTotal + 1)
head(surf_data2$log_NumBoardsTotal)
## [1] 1.609438 1.609438 0.000000 3.135494 1.609438 NA
library(leaflet)
library(viridisLite)
make_leaflet_magma <- function(data, value_col,
palette_range = c(0.0, 0.5),
title = NULL,
lat_col = "LocationLatitude",
lng_col = "LocationLongitude") {
# does this tilda thing: ~
val <- rlang::sym(value_col)
lat <- rlang::sym(lat_col)
lng <- rlang::sym(lng_col)
pal <- colorNumeric(
palette = viridisLite::magma(256, begin = palette_range[1], end = palette_range[2]),
domain = data[[value_col]]
)
# Default title if none provided
if (is.null(title)) title <- value_col
leaflet(data = data) %>%
addTiles() %>%
addCircleMarkers(
lng = rlang::new_formula(NULL, lng),
lat = rlang::new_formula(NULL, lat),
color = rlang::new_formula(NULL, call("pal", val)),
fillOpacity = 0.8,
radius = 5
) %>%
addLegend(
position = "topright",
pal = pal,
values = rlang::new_formula(NULL, val),
title = title,
opacity = 1
)
}
make_leaflet_magma(
data = surf_data2,
value_col = "Years_Surfing",
palette_range = c(0.0, 0.5),
title = "Surfing Experience (years)"
)
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
make_leaflet_magma(
data = surf_data2,
value_col = "Experience",
palette_range = c(0.0, 0.5),
title = "Shaping Experience (years)"
)
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
make_leaflet_magma(
data = surf_data2,
value_col = "log_AvgBoardCost", #AvgBoardCost is non-log
palette_range = c(0.0, 0.5),
title = "Log Avg Board Cost ($)"
)
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
make_leaflet_magma(
data = surf_data2,
value_col = "log_NumBoardsTotal",
palette_range = c(0.0, 0.5),
title = "Log Num Boards Total"
)
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
library(corrplot)
## corrplot 0.92 loaded
# coerce all cols to be numeric
numeric_surf_data2 <- as.data.frame(lapply(surf_data2, function(x) {
suppressWarnings(as.numeric(as.character(x)))
}))
# remove all NA cols (don't include in corrplot)
numeric_surf_data2 <- numeric_surf_data2[, colSums(!is.na(numeric_surf_data2)) > 0]
numeric_surf_data2 <- numeric_surf_data2[, 5:16]
cor_matrix <- cor(numeric_surf_data2, use = "pairwise.complete.obs")
corrplot(cor_matrix, method = "color", type = "upper", tl.cex=.7, number.cex = 0.5)

library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(wordcloud2)
# Q4 is Example of a Specific Scientific Advancement in Board Design
text_column <- surf_data2$Surfing.Achievement
# From column, combine into one mega string
text_combined <- paste(text_column, collapse = " ")
corpus <- Corpus(VectorSource(text_combined))
# do cleaning, remove stopwords
corpus <- tm_map(corpus, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
corpus <- tm_map(corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
corpus <- tm_map(corpus, removeNumbers)
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
corpus <- tm_map(corpus, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords("english")):
## transformation drops documents
corpus <- tm_map(corpus, removeWords, c(
"example"
))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, c("example")):
## transformation drops documents
tdm <- TermDocumentMatrix(corpus)
matrix <- as.matrix(tdm)
word_freqs <- sort(rowSums(matrix), decreasing = TRUE)
# word_freqs
# use wordcloud2
wordcloud_df <- data.frame(word = names(word_freqs), freq = word_freqs)
wordcloud2(wordcloud_df, size = 0.8, color = viridis(nrow(wordcloud_df), begin = 0.3, end = 1), shape = 'circle')
# color options: https://www.datanovia.com/en/blog/top-r-color-palettes-to-know-for-great-data-visualization/
library(tidytext)
library(widyr)
library(tidyr)
library(igraph)
##
## Attaching package: 'igraph'
## The following object is masked from 'package:tidyr':
##
## crossing
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(ggraph)
# grab literally every word (except in stopwords)
tokens <- surf_data2 %>%
select(Response.ID, Surfing.Achievement) %>%
unnest_tokens(word, Surfing.Achievement) %>% # this shifts every response column from response id, sentence -> response id, word1, response id, word2, etc.
filter(!word %in% stop_words$word)
# co occurence (number of times words appear together with the SAME response id)
word_pairs <- tokens %>%
pairwise_count(word, Response.ID, sort = TRUE, upper = FALSE)
# filter (otherwise it's a pretty stupid looking graph)
strong_pairs <- word_pairs
# %>% filter(n >= 2)
# I haven't done this before but sure
graph <- strong_pairs %>%
graph_from_data_frame()
# Plot the network
ggraph(graph, layout = "fr") +
geom_edge_link(aes(width = n), alpha = 0.95, color = "gray") +
geom_node_point(size = 5) +
geom_node_text(aes(label = name), repel = TRUE, size = 4) +
theme_void() +
ggtitle("Greatest Achievement Co-Occurrence")
## Warning: The `trans` argument of `continuous_scale()` is deprecated as of ggplot2 3.5.0.
## ℹ Please use the `transform` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 100 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
This isn’t helpful – too much text data variability here.
ggplot(surf_data2, aes(x = Role_Single, y = Experience, fill = Role_Single)) +
geom_boxplot(width = 0.6, outlier.shape = 21, color = "gray25") +
scale_fill_viridis_d(option = "magma", begin = 0, end = 0.6, direction = -1) +
labs(
title = "Years Shaping by Industry Role",
subtitle = "Boxplots",
x = NULL,
y = "Experience Shaping (Years)"
) +
theme_minimal(base_size = 11) +
theme(
plot.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.position = "top",
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()
)
## Warning: Removed 16 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
# TODO: Note, I hand coded a lot of these to the best of my ability –
should discuss optimal hand coding practices (Shaper takes precedent,
etc.)
ggplot(surf_data2, aes(x = Role_Single, y = Years_Surfing, fill = Role_Single)) +
geom_boxplot(width = 0.6, outlier.shape = 21, color = "gray25") +
scale_fill_viridis_d(option = "plasma", begin = 0, end = 0.6, direction = -1) +
labs(
title = "Years Surfing by Industry Role",
subtitle = "Boxplots",
x = NULL,
y = "Experience Surfing (Years)"
) +
theme_minimal(base_size = 11) +
theme(
plot.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.position = "top",
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()
)
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ggplot(surf_data2, aes(y = NumBoardsTotal)) +
geom_boxplot(fill = viridisLite::magma(1, begin = 0.4, end = 1)) +
labs(
title = "Distribution of Total Boards Shaped",
y = "Total Boards",
x = NULL
) +
theme_minimal(base_size = 11) +
theme(
plot.title = element_text(face = "bold"),
axis.title = element_text(face = "bold")
)
## Warning: Removed 21 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggplot(surf_data2, aes(y = log(NumBoardsTotal))) +
geom_boxplot(fill = viridisLite::magma(1, begin = 0.4, end = 1)) +
labs(
title = "Log Distribution of Total Boards Shaped",
y = "Total Boards (Log Transformed)",
x = NULL
) +
theme_minimal(base_size = 11) +
theme(
plot.title = element_text(face = "bold"),
axis.title = element_text(face = "bold")
)
## Warning: Removed 30 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggplot(surf_data2, aes(y = NumBoardsYearly)) +
geom_boxplot(fill = viridisLite::magma(1, begin = 0.4, end = 1)) +
labs(
title = "Distribution of Num Boards Yearly",
y = "Num Boards Yearly",
x = NULL
) +
theme_minimal(base_size = 11) +
theme(
plot.title = element_text(face = "bold"),
axis.title = element_text(face = "bold")
)
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
library(knitr)
print("years surfing median, years surfing mean, Years shaping median, Years Shaping Mean")
## [1] "years surfing median, years surfing mean, Years shaping median, Years Shaping Mean"
median(surf_data2$Years_Surfing, na.rm = TRUE)
## [1] 35
mean(surf_data2$Years_Surfing, na.rm = TRUE)
## [1] 34.78125
median(surf_data2$Experience, na.rm = TRUE)
## [1] 18
mean(surf_data2$Experience, na.rm = TRUE)
## [1] 22.11111
summary_df <- data.frame(
Variable = c("Years Surfing", "Years Shaping"),
Median = c(median(surf_data2$Years_Surfing, na.rm = TRUE),
median(surf_data2$Experience, na.rm = TRUE)),
Mean = c(mean(surf_data2$Years_Surfing, na.rm = TRUE),
mean(surf_data2$Experience, na.rm = TRUE)),
Max = c(max(surf_data2$Years_Surfing, na.rm = TRUE),
max(surf_data2$Experience, na.rm = TRUE)),
Min = c(min(surf_data2$Years_Surfing, na.rm = TRUE),
min(surf_data2$Experience, na.rm = TRUE))
)
library(knitr)
kable(summary_df, caption = "Summary Statistics Years Experience")
| Variable | Median | Mean | Max | Min |
|---|---|---|---|---|
| Years Surfing | 35 | 34.78125 | 65 | 0 |
| Years Shaping | 18 | 22.11111 | 62 | 0 |
summary_df <- data.frame(
Variable = c("Num Boards Yearly", "Num Boards Total"),
Median = c(median(surf_data2$NumBoardsYearly, na.rm = TRUE),
median(surf_data2$NumBoardsTotal, na.rm = TRUE)),
Mean = c(mean(surf_data2$NumBoardsYearly, na.rm = TRUE),
mean(surf_data2$NumBoardsTotal, na.rm = TRUE)),
Max = c(max(surf_data2$NumBoardsYearly, na.rm = TRUE),
max(surf_data2$NumBoardsTotal, na.rm = TRUE)),
Min = c(min(surf_data2$NumBoardsYearly, na.rm = TRUE),
min(surf_data2$NumBoardsTotal, na.rm = TRUE))
)
library(knitr)
kable(summary_df, caption = "Summary Statistics Num Boards")
| Variable | Median | Mean | Max | Min |
|---|---|---|---|---|
| Num Boards Yearly | 40 | 246.6078 | 5e+03 | 0 |
| Num Boards Total | 500 | 17665.3448 | 6e+05 | 0 |
summary_df <- data.frame(
Variable = c("Avg Board Cost", "Avg Board Labor", "Avg Board Markup", "Price Consumers Willing"),
Median = c(median(surf_data2$AvgBoardCost, na.rm = TRUE),
median(surf_data2$AvgBoardLabor, na.rm = TRUE),
median(surf_data2$AvgBoardMarkup, na.rm = TRUE),
median(surf_data2$PriceConsumersWilling, na.rm = TRUE)),
Mean = c(mean(surf_data2$AvgBoardCost, na.rm = TRUE),
mean(surf_data2$AvgBoardLabor, na.rm = TRUE),
mean(surf_data2$AvgBoardMarkup, na.rm = TRUE),
mean(surf_data2$PriceConsumersWilling, na.rm = TRUE)),
Max = c(max(surf_data2$AvgBoardCost, na.rm = TRUE),
max(surf_data2$AvgBoardLabor, na.rm = TRUE),
max(surf_data2$AvgBoardMarkup, na.rm = TRUE),
max(surf_data2$PriceConsumersWilling, na.rm = TRUE)),
Min = c(min(surf_data2$AvgBoardCost, na.rm = TRUE),
min(surf_data2$AvgBoardLabor, na.rm = TRUE),
min(surf_data2$AvgBoardMarkup, na.rm = TRUE),
min(surf_data2$PriceConsumersWilling, na.rm = TRUE))
)
library(knitr)
kable(summary_df, caption = "Summary Statistics Num Boards")
| Variable | Median | Mean | Max | Min |
|---|---|---|---|---|
| Avg Board Cost | 412.5 | 539.07500 | 5000 | 3 |
| Avg Board Labor | 10.0 | 91.48718 | 900 | 2 |
| Avg Board Markup | 300.0 | 293.48649 | 800 | 1 |
| Price Consumers Willing | 850.0 | 949.07317 | 3453 | 1 |
summary_df <- data.frame(
Variable = "Age",
Median = median(surf_data2$Age, na.rm = TRUE),
Mean = mean(surf_data2$Age, na.rm = TRUE),
Max = max(surf_data2$Age, na.rm = TRUE),
Min = min(surf_data2$Age, na.rm = TRUE))
library(knitr)
kable(summary_df, caption = "Summary Statistics Num Boards")
| Variable | Median | Mean | Max | Min |
|---|---|---|---|---|
| Age | 49.5 | 50.61364 | 134 | 16 |
surf_data2$AI_CustomBoardDesign <- as.numeric(as.character(surf_data2$AI_CustomBoardDesign))
## Warning: NAs introduced by coercion
surf_data2$AI_Materials <- as.numeric(as.character(surf_data2$AI_Materials))
## Warning: NAs introduced by coercion
surf_data2$AI_PrecisionShaping <- as.numeric(as.character(surf_data2$AI_PrecisionShaping))
## Warning: NAs introduced by coercion
surf_data2$AI_NewBoardDesign <- as.numeric(as.character(surf_data2$AI_NewBoardDesign))
## Warning: NAs introduced by coercion
surf_data2$AI_PerformanceModeling<- as.numeric(as.character(surf_data2$AI_PerformanceModeling))
## Warning: NAs introduced by coercion
surf_data2$AI_WorkflowAutomation <- as.numeric(as.character(surf_data2$AI_WorkflowAutomation))
## Warning: NAs introduced by coercion
surf_data2$AI_Marketing <- as.numeric(as.character(surf_data2$AI_Marketing))
## Warning: NAs introduced by coercion
surf_data2$AI_QualityControl <- as.numeric(as.character(surf_data2$AI_QualityControl))
## Warning: NAs introduced by coercion
surf_data2$AI_NoImpact <- as.numeric(as.character(surf_data2$AI_NoImpact))
## Warning: NAs introduced by coercion
cols <- c("AI_CustomBoardDesign", "AI_Materials", "AI_PrecisionShaping", "AI_NewBoardDesign",
"AI_PerformanceModeling", "AI_WorkflowAutomation", "AI_Marketing", "AI_QualityControl",
"AI_NoImpact")
surf_data2[cols] <- lapply(surf_data2[cols], function(x) ifelse(is.na(x), NA, 10 - x))
boxplot_data <- surf_data2[, cols]
# Rename the columns for better readability (optional)
colnames(boxplot_data) <- c("Custom Board Design", "Materials", "Precision Shaping", "New Board Design", "Performance Modeling",
"Workflow Automation", "Marketing", "Quality Control", "No Impact")
# Calculate medians for each column
medians <- apply(boxplot_data, 2, median, na.rm = TRUE)
# Reorder columns based on median values
ordered_data <- boxplot_data[, order(medians)]
par(mar = c(10, 4, 4, 2) + 0.1) # bottom, left, top, right
boxplot(ordered_data,
main = "AI Impact Rankings (Least to Most)",
ylab = "Relative Ranking",
col = gray.colors(ncol(ordered_data)),
outline = TRUE,
las = 2)
# Convert each Material_* column to numeric
surf_data2$Material_Durability <- as.numeric(as.character(surf_data2$Material_Durability))
## Warning: NAs introduced by coercion
surf_data2$Material_EcoFriendly <- as.numeric(as.character(surf_data2$Material_EcoFriendly))
## Warning: NAs introduced by coercion
surf_data2$Material_Performance <- as.numeric(as.character(surf_data2$Material_Performance))
## Warning: NAs introduced by coercion
surf_data2$Material_Cost <- as.numeric(as.character(surf_data2$Material_Cost))
## Warning: NAs introduced by coercion
surf_data2$Material_Weight <- as.numeric(as.character(surf_data2$Material_Weight))
## Warning: NAs introduced by coercion
surf_data2$Material_Compatability <- as.numeric(as.character(surf_data2$Material_Compatability))
## Warning: NAs introduced by coercion
surf_data2$Material_Workability <- as.numeric(as.character(surf_data2$Material_Workability))
## Warning: NAs introduced by coercion
# Select the relevant columns
cols <- c("Material_Durability", "Material_EcoFriendly", "Material_Performance",
"Material_Cost", "Material_Weight", "Material_Compatability", "Material_Workability")
surf_data2[cols] <- lapply(surf_data2[cols], function(x) ifelse(is.na(x), NA, 8 - x)) # note: this is n + 1 - x
boxplot_data <- surf_data2[, cols]
# Rename the columns for better readability
colnames(boxplot_data) <- c("Durability", "Eco-Friendly", "Performance",
"Cost", "Weight", "Compatibility", "Workability")
# Calculate medians for each column
medians <- apply(boxplot_data, 2, median, na.rm = TRUE)
# Reorder columns based on median values
ordered_data <- boxplot_data[, order(medians)]
# Adjust margins so labels aren't cut off
par(mar = c(10, 4, 4, 2) + 0.1)
# Create the boxplot
boxplot(ordered_data,
main = "Material Importance Rankings (Least to Most)",
ylab = "Relative Ranking",
col = gray.colors(ncol(ordered_data)),
outline = TRUE,
las = 2)
# Convert each Limitations_* column to numeric
surf_data2$Limitations_Cost <- as.numeric(as.character(surf_data2$Limitations_Cost))
## Warning: NAs introduced by coercion
surf_data2$Limitations_Sourcing <- as.numeric(as.character(surf_data2$Limitations_Sourcing))
## Warning: NAs introduced by coercion
surf_data2$Limitations_Knowledge <- as.numeric(as.character(surf_data2$Limitations_Knowledge))
## Warning: NAs introduced by coercion
surf_data2$Limitations_Tooling <- as.numeric(as.character(surf_data2$Limitations_Tooling))
## Warning: NAs introduced by coercion
surf_data2$Limitations_Fear <- as.numeric(as.character(surf_data2$Limitations_Fear))
## Warning: NAs introduced by coercion
surf_data2$Limitations_LackSpace <- as.numeric(as.character(surf_data2$Limitations_LackSpace))
## Warning: NAs introduced by coercion
surf_data2$Limitations_GovtRegulations<- as.numeric(as.character(surf_data2$Limitations_GovtRegulations))
## Warning: NAs introduced by coercion
# Select relevant columns
cols <- c("Limitations_Cost", "Limitations_Sourcing", "Limitations_Knowledge",
"Limitations_Tooling", "Limitations_Fear", "Limitations_LackSpace",
"Limitations_GovtRegulations")
# Flip the 1–7 scale (1 = most limiting → 7 = least limiting)
surf_data2[cols] <- lapply(surf_data2[cols], function(x) ifelse(is.na(x), NA, 8 - x))
# Prepare boxplot data
boxplot_data <- surf_data2[, cols]
# Rename columns for readability
colnames(boxplot_data) <- c("Cost", "Sourcing", "Knowledge", "Tooling", "Fear", "Lack of Space", "Govt Regulations")
# Calculate medians for each column
medians <- apply(boxplot_data, 2, median, na.rm = TRUE)
# Reorder columns based on median values
ordered_data <- boxplot_data[, order(medians)]
# Adjust margins and plot
par(mar = c(10, 4, 4, 2) + 0.1)
boxplot(ordered_data,
main = "Limiting Factors (Least to Most Impactful)",
ylab = "Relative Limitation (Higher = More Limiting)",
col = gray.colors(ncol(ordered_data)),
outline = TRUE,
las = 2)
# Convert each Innovation_* column to numeric
surf_data2$Innovation_Materials <- as.numeric(as.character(surf_data2$Innovation_Materials))
## Warning: NAs introduced by coercion
surf_data2$Innovation_Shape <- as.numeric(as.character(surf_data2$Innovation_Shape))
## Warning: NAs introduced by coercion
surf_data2$Innovation_Distribution <- as.numeric(as.character(surf_data2$Innovation_Distribution))
## Warning: NAs introduced by coercion
surf_data2$Innovation_TechModeling <- as.numeric(as.character(surf_data2$Innovation_TechModeling))
## Warning: NAs introduced by coercion
surf_data2$Innovation_Attitudes <- as.numeric(as.character(surf_data2$Innovation_Attitudes))
## Warning: NAs introduced by coercion
# Select relevant columns
cols <- c("Innovation_Materials", "Innovation_Shape", "Innovation_Distribution",
"Innovation_TechModeling", "Innovation_Attitudes")
# Flip the 1–7 scale (1 = most innovative → 7 = least innovative)
surf_data2[cols] <- lapply(surf_data2[cols], function(x) ifelse(is.na(x), NA, 6 - x))
# Prepare boxplot data
boxplot_data <- surf_data2[, cols]
# Rename columns for readability
colnames(boxplot_data) <- c("Materials", "Shape", "Distribution", "Tech Modeling", "Attitudes")
# Calculate medians for each column
medians <- apply(boxplot_data, 2, median, na.rm = TRUE)
# Reorder columns based on median values
ordered_data <- boxplot_data[, order(medians)]
# Adjust margins and create the boxplot
par(mar = c(10, 4, 4, 2) + 0.1)
boxplot(ordered_data,
main = "Innovation Dimensions",
ylab = "Relative Innovation (Higher = More Innovative)",
col = gray.colors(ncol(ordered_data)),
outline = TRUE,
las = 2)
# Convert to numeric
surf_data2$Challenges_Funding <- as.numeric(as.character(surf_data2$Challenges_Funding))
## Warning: NAs introduced by coercion
surf_data2$Challenges_Time <- as.numeric(as.character(surf_data2$Challenges_Time))
## Warning: NAs introduced by coercion
surf_data2$Challenge_ScienceKnowledge <- as.numeric(as.character(surf_data2$Challenge_ScienceKnowledge))
## Warning: NAs introduced by coercion
surf_data2$Challenges_CustomerDemand <- as.numeric(as.character(surf_data2$Challenges_CustomerDemand))
## Warning: NAs introduced by coercion
surf_data2$Challenges_LackCollab <- as.numeric(as.character(surf_data2$Challenges_LackCollab))
## Warning: NAs introduced by coercion
surf_data2$Challenges_ResistanceChange <- as.numeric(as.character(surf_data2$Challenges_ResistanceChange))
## Warning: NAs introduced by coercion
surf_data2$Challenges_LackTech <- as.numeric(as.character(surf_data2$Challenges_LackTech))
## Warning: NAs introduced by coercion
surf_data2$Challenges_LackLaws <- as.numeric(as.character(surf_data2$Challenges_LackLaws))
## Warning: NAs introduced by coercion
surf_data2$Challenges_Business <- as.numeric(as.character(surf_data2$Challenges_Business))
## Warning: NAs introduced by coercion
# Select columns
cols <- c("Challenges_Funding","Challenges_Time","Challenge_ScienceKnowledge",
"Challenges_CustomerDemand","Challenges_LackCollab","Challenges_ResistanceChange",
"Challenges_LackTech","Challenges_LackLaws","Challenges_Business")
# Flip 1–9 scale so higher = more challenging
surf_data2[cols] <- lapply(surf_data2[cols], function(x) ifelse(is.na(x), NA, 10 - x))
# Prep for plot
boxplot_data <- surf_data2[, cols]
colnames(boxplot_data) <- c("Funding","Time","Science Knowledge","Customer Demand",
"Lack of Collaboration","Resistance to Change",
"Lack of Tech","Lack of Laws","Business")
# Order by median and plot
medians <- apply(boxplot_data, 2, median, na.rm = TRUE)
ordered_data <- boxplot_data[, order(medians)]
par(mar = c(10, 4, 4, 2) + 0.1)
boxplot(ordered_data,
main = "Adoption Challenges (Least to Most)",
ylab = "Relative Challenge (Higher = More Challenging)",
col = gray.colors(ncol(ordered_data)),
outline = TRUE,
las = 2)