This document presents a comprehensive sentiment analysis framework
for analyzing text data across time series and cross-sectional
dimensions. The analysis uses the tidytext package for text
processing and sentiment scoring, combined with interactive
visualizations using plotly and professionally formatted
tables using gt.
The following flowchart illustrates the complete sentiment analysis workflow from raw data to insights:
flowchart <- grViz("
digraph sentiment_workflow {
# Graph attributes
graph [layout = dot, rankdir = TB,
label = 'Sentiment Analysis - Call Flow', labelloc = 't', fontsize = 20,
fontname = Helvetica, bgcolor = '#f8f9fa', nodesep = 0.8, ranksep = 0.8]
# Node attributes
node [shape = box, style = filled, fontname = Helvetica, fontsize = 15, width = 3, height = 1, fixedsize = true]
# Define nodes with colors
start [label = 'Raw Text Data\n(Reviews, Comments, etc.)',
fillcolor = '#e3f2fd', color = '#1976d2', penwidth = 2]
load [label = 'Load Data\n(Date, Product, Region, Text)',
fillcolor = '#fff3e0', color = '#f57c00']
tokenize [label = 'Tokenization\n(Break text into words)',
fillcolor = '#f3e5f5', color = '#7b1fa2']
clean [label = 'Remove Stop Words\n(the, and, is, etc.)',
fillcolor = '#f3e5f5', color = '#7b1fa2']
lexicon [label = 'Apply Sentiment Lexicons\n(AFINN & Bing)',
fillcolor = '#e8f5e9', color = '#388e3c']
score [label = 'Calculate Sentiment Scores\n• Total Score\n• Avg Intensity\n• Positive/Negative Count',
fillcolor = '#e8f5e9', color = '#388e3c']
categorize [label = 'Categorize Sentiment\n(Positive, Neutral, Negative)',
fillcolor = '#e8f5e9', color = '#388e3c']
aggregate [label = 'Aggregate Data\n• By Time (Daily/Weekly)\n• By Product\n• By Region',
fillcolor = '#fff9c4', color = '#f9a825']
visualize [label = 'Create Visualizations\n• Time Series Plots\n• Geographic Maps\n• Bar Charts',
fillcolor = '#fce4ec', color = '#c2185b']
tables [label = 'Generate Tables\n• Summary Statistics\n• Trend Analysis\n• Cross-sectional Data',
fillcolor = '#fce4ec', color = '#c2185b']
insights [label = 'Insights & Reporting\n✓ Sentiment Trends\n✓ Product Comparison\n✓ Regional Patterns',
fillcolor = '#c8e6c9', color = '#2e7d32', penwidth = 3]
# Define edges with increased spacing
start -> load [penwidth = 2]
load -> tokenize [penwidth = 2]
tokenize -> clean [penwidth = 2]
clean -> lexicon [penwidth = 2]
lexicon -> score [penwidth = 2]
score -> categorize [penwidth = 2]
categorize -> aggregate [penwidth = 2]
aggregate -> visualize [penwidth = 2]
aggregate -> tables [penwidth = 2]
visualize -> insights [penwidth = 2]
tables -> insights [penwidth = 2]
# Subgraph for legend
subgraph cluster_legend {
label = 'Process Stages'
fontsize = 12
style = dashed
color = grey
node [width = 2.5, height = 0.8]
legend1 [label = 'Data Input', fillcolor = '#e3f2fd', color = '#1976d2']
legend2 [label = 'Text Processing', fillcolor = '#f3e5f5', color = '#7b1fa2']
legend3 [label = 'Sentiment Analysis', fillcolor = '#e8f5e9', color = '#388e3c']
legend4 [label = 'Aggregation', fillcolor = '#fff9c4', color = '#f9a825']
legend5 [label = 'Output', fillcolor = '#fce4ec', color = '#c2185b']
legend1 -> legend2 -> legend3 -> legend4 -> legend5 [style = invis]
}
}
")
flowchart# Generate example dataset
set.seed(123)
generate_sample_data <- function(n_products = 4, n_days = 90) {
products <- c("Laptop", "Smartphone", "Tablet", "Smartwatch")
# Sample reviews with varying sentiment
positive_reviews <- c(
"Absolutely love this product! Best purchase ever.",
"Outstanding quality and great performance.",
"Exceeded my expectations. Highly recommend!",
"Amazing features and very easy to use.",
"Perfect product, worth every penny!"
)
negative_reviews <- c(
"Terrible quality, broke after one week.",
"Very disappointed with this purchase.",
"Poor performance and bad customer service.",
"Not worth the money, very frustrating.",
"Worst product I've ever bought."
)
neutral_reviews <- c(
"It's okay, nothing special but works.",
"Average product for the price.",
"Does what it's supposed to do.",
"Not bad, not great either.",
"Acceptable quality for everyday use."
)
data <- tibble(
date = sample(seq(today() - days(n_days), today(), by = "day"),
n_products * 30, replace = TRUE),
product = sample(products, n_products * 30, replace = TRUE),
region = sample(c("North America", "Europe", "Asia", "South America"),
n_products * 30, replace = TRUE),
review = sample(c(positive_reviews, negative_reviews, neutral_reviews),
n_products * 30, replace = TRUE)
) %>%
arrange(date)
return(data)
}# Function to perform sentiment analysis
perform_sentiment_analysis <- function(data, text_col = "review") {
# Get sentiment lexicons
afinn <- get_sentiments("afinn")
bing <- get_sentiments("bing")
# Tokenize and analyze sentiment
sentiment_data <- data %>%
mutate(review_id = row_number()) %>%
unnest_tokens(word, !!sym(text_col)) %>%
anti_join(stop_words, by = "word") %>%
left_join(afinn, by = "word") %>%
left_join(bing, by = "word") %>%
group_by(review_id, date, product, region) %>%
summarise(
afinn_score = sum(value, na.rm = TRUE),
n_words = n(),
n_sentiment_words = sum(!is.na(value)),
positive_words = sum(sentiment == "positive", na.rm = TRUE),
negative_words = sum(sentiment == "negative", na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
# Normalize by number of sentiment words to get average intensity
avg_word_sentiment = ifelse(n_sentiment_words > 0,
afinn_score / n_sentiment_words,
0),
net_sentiment = positive_words - negative_words,
sentiment_category = case_when(
afinn_score > 2 ~ "Positive",
afinn_score < -2 ~ "Negative",
TRUE ~ "Neutral"
)
)
return(sentiment_data)
}# Generate sample data
review_data <- generate_sample_data(n_products = 4, n_days = 90)
# Display first few rows
head(review_data) %>%
gt() %>%
tab_header(title = "Sample Raw Data") %>%
tab_options(table.font.size = px(12))| Sample Raw Data | |||
| date | product | region | review |
|---|---|---|---|
| 2025-09-13 | Laptop | Europe | Perfect product, worth every penny! |
| 2025-09-14 | Smartwatch | North America | Worst product I've ever bought. |
| 2025-09-15 | Smartphone | North America | Exceeded my expectations. Highly recommend! |
| 2025-09-16 | Smartwatch | South America | Acceptable quality for everyday use. |
| 2025-09-16 | Smartphone | South America | Not worth the money, very frustrating. |
| 2025-09-16 | Smartwatch | Europe | Does what it's supposed to do. |
# Perform sentiment analysis
sentiment_results <- perform_sentiment_analysis(review_data)
# Display sample results
head(sentiment_results) %>%
gt() %>%
tab_header(title = "Sample Sentiment Analysis Results") %>%
fmt_number(columns = c(afinn_score, avg_word_sentiment), decimals = 2) %>%
tab_options(table.font.size = px(12))| Sample Sentiment Analysis Results | |||||||||||
| review_id | date | product | region | afinn_score | n_words | n_sentiment_words | positive_words | negative_words | avg_word_sentiment | net_sentiment | sentiment_category |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 2025-09-13 | Laptop | Europe | 5.00 | 4 | 2 | 2 | 0 | 2.50 | 2 | Positive |
| 2 | 2025-09-14 | Smartwatch | North America | −3.00 | 3 | 1 | 0 | 1 | −3.00 | -1 | Negative |
| 3 | 2025-09-15 | Smartphone | North America | 2.00 | 4 | 1 | 2 | 0 | 2.00 | 2 | Neutral |
| 4 | 2025-09-16 | Smartwatch | South America | 0.00 | 3 | 0 | 0 | 0 | 0.00 | 0 | Neutral |
| 5 | 2025-09-16 | Smartphone | South America | 0.00 | 3 | 2 | 1 | 1 | 0.00 | 0 | Neutral |
| 6 | 2025-09-16 | Smartwatch | Europe | 0.00 | 1 | 0 | 0 | 0 | 0.00 | 0 | Neutral |
# Time series aggregation by day
ts_daily <- sentiment_results %>%
group_by(date) %>%
summarise(
avg_sentiment = mean(afinn_score, na.rm = TRUE),
total_reviews = n(),
pct_positive = sum(sentiment_category == "Positive") / n() * 100,
pct_negative = sum(sentiment_category == "Negative") / n() * 100,
.groups = "drop"
)
# Plot: Time series of average sentiment
plot_ts <- plot_ly(ts_daily, x = ~date, y = ~avg_sentiment, type = 'scatter',
mode = 'lines+markers',
name = 'Average Sentiment',
line = list(color = '#1f77b4', width = 2),
marker = list(size = 6)) %>%
add_trace(y = 0, type = 'scatter', mode = 'lines',
line = list(color = 'gray', dash = 'dash', width = 1),
showlegend = FALSE) %>%
layout(title = "Sentiment Score Over Time",
xaxis = list(title = "Date"),
yaxis = list(title = "Average AFINN Sentiment Score"),
hovermode = 'x unified')
plot_ts# Sentiment by product
product_sentiment <- sentiment_results %>%
group_by(product) %>%
summarise(
avg_sentiment = mean(afinn_score, na.rm = TRUE),
median_sentiment = median(afinn_score, na.rm = TRUE),
std_sentiment = sd(afinn_score, na.rm = TRUE),
avg_intensity = mean(avg_word_sentiment, na.rm = TRUE),
total_reviews = n(),
pct_positive = sum(sentiment_category == "Positive") / n() * 100,
pct_negative = sum(sentiment_category == "Negative") / n() * 100,
pct_neutral = sum(sentiment_category == "Neutral") / n() * 100,
.groups = "drop"
) %>%
arrange(desc(avg_sentiment))
# Plot: Bar chart of average sentiment by product
plot_product <- plot_ly(product_sentiment, x = ~product, y = ~avg_sentiment,
type = 'bar',
marker = list(color = ~avg_sentiment,
colorscale = list(c(0, 'red'),
c(0.5, 'gray'),
c(1, 'green')),
showscale = TRUE,
colorbar = list(title = "Sentiment"))) %>%
layout(title = "Average Sentiment by Product",
xaxis = list(title = "Product"),
yaxis = list(title = "Average Sentiment Score"))
plot_product# Sentiment by region
region_sentiment <- sentiment_results %>%
group_by(region) %>%
summarise(
avg_sentiment = mean(afinn_score, na.rm = TRUE),
total_reviews = n(),
pct_positive = sum(sentiment_category == "Positive") / n() * 100,
pct_negative = sum(sentiment_category == "Negative") / n() * 100,
.groups = "drop"
) %>%
mutate(
# Map regions to country codes for choropleth
location = case_when(
region == "North America" ~ "USA",
region == "Europe" ~ "DEU", # Germany as representative
region == "Asia" ~ "CHN", # China as representative
region == "South America" ~ "BRA" # Brazil as representative
),
hover_text = paste0(
"<b>", region, "</b><br>",
"Avg Sentiment: ", round(avg_sentiment, 2), "<br>",
"Total Reviews: ", total_reviews, "<br>",
"Positive: ", round(pct_positive, 1), "%<br>",
"Negative: ", round(pct_negative, 1), "%"
)
)
# Plot: Choropleth map of sentiment by region
plot_region <- plot_ly(
region_sentiment,
type = 'choropleth',
locations = ~location,
z = ~avg_sentiment,
text = ~hover_text,
hoverinfo = 'text',
colorscale = list(
c(0, '#d62728'), # Red for negative
c(0.5, '#ffeda0'), # Yellow for neutral
c(1, '#2ca02c') # Green for positive
),
zmin = -5,
zmax = 5,
colorbar = list(
title = "Avg<br>Sentiment",
tickmode = "linear",
tick0 = -5,
dtick = 2
),
marker = list(line = list(color = 'rgb(255,255,255)', width = 2))
) %>%
layout(
title = list(
text = "Global Sentiment Distribution by Region",
font = list(size = 16)
),
geo = list(
showframe = FALSE,
showcoastlines = TRUE,
projection = list(type = 'natural earth')
)
)
plot_region# Time series by product (panel data)
panel_data <- sentiment_results %>%
mutate(week = floor_date(date, "week")) %>%
group_by(week, product) %>%
summarise(
avg_sentiment = mean(afinn_score, na.rm = TRUE),
n_reviews = n(),
.groups = "drop"
)
# Plot: Panel time series
plot_panel <- plot_ly(panel_data, x = ~week, y = ~avg_sentiment,
color = ~product, type = 'scatter', mode = 'lines+markers') %>%
layout(title = "Sentiment Trends by Product Over Time",
xaxis = list(title = "Week"),
yaxis = list(title = "Average Sentiment Score"),
hovermode = 'x unified')
plot_paneltable_product <- product_sentiment %>%
gt() %>%
tab_header(
title = "Sentiment Analysis by Product",
subtitle = "Summary statistics across all time periods"
) %>%
fmt_number(
columns = c(avg_sentiment, median_sentiment, std_sentiment, avg_intensity),
decimals = 2
) %>%
fmt_number(
columns = c(pct_positive, pct_negative, pct_neutral),
decimals = 1
) %>%
cols_label(
product = "Product",
avg_sentiment = "Avg Score",
median_sentiment = "Median Score",
std_sentiment = "Std Dev",
avg_intensity = "Avg Intensity",
total_reviews = "Reviews",
pct_positive = "Positive (%)",
pct_negative = "Negative (%)",
pct_neutral = "Neutral (%)"
) %>%
data_color(
columns = avg_sentiment,
colors = scales::col_numeric(
palette = c("#d62728", "#ffeda0", "#2ca02c"),
domain = c(-5, 5)
)
) %>%
data_color(
columns = avg_intensity,
colors = scales::col_numeric(
palette = c("#d62728", "#f7f7f7", "#2ca02c"),
domain = c(-2, 2)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()
) %>%
tab_options(
table.font.size = px(12),
heading.background.color = "#1f77b4",
heading.title.font.size = px(16),
column_labels.background.color = "#d9d9d9"
)
table_product| Sentiment Analysis by Product | ||||||||
| Summary statistics across all time periods | ||||||||
| Product | Avg Score | Median Score | Std Dev | Avg Intensity | Reviews | Positive (%) | Negative (%) | Neutral (%) |
|---|---|---|---|---|---|---|---|---|
| Smartphone | 1.00 | 0.00 | 2.93 | 0.88 | 25 | 32.0 | 16.0 | 52.0 |
| Smartwatch | 0.43 | 0.00 | 2.96 | 0.26 | 35 | 25.7 | 28.6 | 45.7 |
| Laptop | 0.37 | 0.00 | 3.29 | 0.47 | 35 | 31.4 | 25.7 | 42.9 |
| Tablet | −0.32 | 0.00 | 3.44 | −0.14 | 25 | 24.0 | 36.0 | 40.0 |
recent_trends <- sentiment_results %>%
mutate(week = floor_date(date, "week")) %>%
group_by(week, product) %>%
summarise(
avg_sentiment = mean(afinn_score, na.rm = TRUE),
reviews = n(),
.groups = "drop"
) %>%
arrange(desc(week)) %>%
head(20)
table_trends <- recent_trends %>%
gt() %>%
tab_header(
title = "Recent Sentiment Trends",
subtitle = "Weekly averages by product"
) %>%
fmt_number(
columns = avg_sentiment,
decimals = 2
) %>%
fmt_date(
columns = week,
date_style = 6
) %>%
cols_label(
week = "Week",
product = "Product",
avg_sentiment = "Avg Sentiment",
reviews = "# Reviews"
) %>%
data_color(
columns = avg_sentiment,
colors = scales::col_numeric(
palette = c("#d62728", "#f7f7f7", "#2ca02c"),
domain = c(-3, 3)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()
)
table_trends| Recent Sentiment Trends | |||
| Weekly averages by product | |||
| Week | Product | Avg Sentiment | # Reviews |
|---|---|---|---|
| Dec 7, 2025 | Laptop | 0.00 | 2 |
| Dec 7, 2025 | Smartphone | 2.50 | 2 |
| Dec 7, 2025 | Smartwatch | 0.00 | 1 |
| Dec 7, 2025 | Tablet | 2.50 | 2 |
| Nov 30, 2025 | Laptop | −1.00 | 3 |
| Nov 30, 2025 | Smartphone | 5.00 | 1 |
| Nov 30, 2025 | Smartwatch | 2.60 | 5 |
| Nov 23, 2025 | Laptop | −4.00 | 3 |
| Nov 23, 2025 | Smartphone | −3.33 | 3 |
| Nov 23, 2025 | Smartwatch | −3.00 | 1 |
| Nov 23, 2025 | Tablet | 1.25 | 4 |
| Nov 16, 2025 | Laptop | 0.67 | 3 |
| Nov 16, 2025 | Smartphone | 1.75 | 4 |
| Nov 16, 2025 | Smartwatch | −3.50 | 2 |
| Nov 16, 2025 | Tablet | 4.00 | 2 |
| Nov 9, 2025 | Laptop | −1.00 | 2 |
| Nov 9, 2025 | Smartphone | −2.00 | 1 |
| Nov 9, 2025 | Tablet | −1.50 | 2 |
| Nov 2, 2025 | Laptop | 3.00 | 1 |
| Nov 2, 2025 | Smartphone | 0.00 | 1 |
cross_sectional <- sentiment_results %>%
group_by(product, region) %>%
summarise(
avg_sentiment = mean(afinn_score, na.rm = TRUE),
reviews = n(),
.groups = "drop"
) %>%
pivot_wider(
names_from = region,
values_from = avg_sentiment
)
table_cross <- cross_sectional %>%
gt() %>%
tab_header(
title = "Sentiment Analysis: Product × Region",
subtitle = "Average sentiment scores"
) %>%
fmt_number(
columns = -c(product, reviews),
decimals = 2
) %>%
cols_label(
product = "Product",
reviews = "Total Reviews"
) %>%
data_color(
columns = -c(product, reviews),
colors = scales::col_numeric(
palette = c("#d62728", "#fff7bc", "#2ca02c"),
domain = c(-3, 3)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()
)
table_cross| Sentiment Analysis: Product × Region | |||||
| Average sentiment scores | |||||
| Product | Total Reviews | Asia | Europe | North America | South America |
|---|---|---|---|---|---|
| Laptop | 10 | −0.30 | 0.80 | NA | NA |
| Laptop | 11 | NA | NA | 1.55 | NA |
| Laptop | 4 | NA | NA | NA | −2.25 |
| Smartphone | 7 | 0.86 | NA | NA | NA |
| Smartphone | 4 | NA | 0.00 | NA | NA |
| Smartphone | 9 | NA | NA | 1.22 | NA |
| Smartphone | 5 | NA | NA | NA | 1.60 |
| Smartwatch | 6 | 0.67 | NA | NA | −0.83 |
| Smartwatch | 7 | NA | 0.43 | NA | NA |
| Smartwatch | 16 | NA | NA | 0.81 | NA |
| Tablet | 4 | −1.25 | NA | NA | −2.00 |
| Tablet | 10 | NA | −1.00 | NA | NA |
| Tablet | 7 | NA | NA | 2.14 | NA |
## === SENTIMENT ANALYSIS SUMMARY ===
## Total reviews analyzed: 120
cat("Date range:", format(min(sentiment_results$date), "%Y-%m-%d"),
"to", format(max(sentiment_results$date), "%Y-%m-%d"), "\n")## Date range: 2025-09-13 to 2025-12-09
## Overall average sentiment: 0.38
## Overall sentiment standard deviation: 3.15
##
## Sentiment Distribution:
cat(" Positive:", sum(sentiment_results$sentiment_category == "Positive"),
"(", round(sum(sentiment_results$sentiment_category == "Positive")/nrow(sentiment_results)*100, 1), "%)\n")## Positive: 34 ( 28.3 %)
cat(" Neutral:", sum(sentiment_results$sentiment_category == "Neutral"),
"(", round(sum(sentiment_results$sentiment_category == "Neutral")/nrow(sentiment_results)*100, 1), "%)\n")## Neutral: 54 ( 45 %)
cat(" Negative:", sum(sentiment_results$sentiment_category == "Negative"),
"(", round(sum(sentiment_results$sentiment_category == "Negative")/nrow(sentiment_results)*100, 1), "%)\n")## Negative: 32 ( 26.7 %)
This sentiment analysis framework provides comprehensive insights into text data across multiple dimensions:
The combination of interactive visualizations (Plotly) and professionally formatted tables (GT) creates a complete analytical toolkit suitable for research, business intelligence, and market analysis applications.
Note: This analysis uses sample data. Replace the
generate_sample_data() function with your actual data
loading code to analyze real datasets.