=============================
# Load necessary packages
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.0
## ✔ ggplot2 4.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
# Create dataset
set.seed(42)
obesity_data <- tibble(
Region = c("Norte", "Centro", "Lisboa", "Alentejo", "Algarve"),
Obesity = sample(18:30, 5, replace = TRUE),
Diabetes = sample(5:12, 5, replace = TRUE),
Fruits = sample(150:300, 5, replace = TRUE),
Vegetables = sample(150:250, 5, replace = TRUE),
SugarDrinks = sample(50:200, 5, replace = TRUE),
ExerciseHours = sample(0:12, 5, replace = TRUE),
SmokingRate = sample(5:35, 5, replace = TRUE),
Population = c(350000, 250000, 500000, 150000, 200000),
IncomePerCapita = c(20000, 18000, 25000, 17000, 22000),
RandomVar1 = runif(5, 0, 1), # irrelevant
RandomVar2 = runif(5, 0, 100) # irrelevant
)
# =============================
# Part 1: Bar chart Obesity by Region
# =============================
ggplot(obesity_data, aes(x = reorder(Region, Obesity), y = Obesity, fill = Region)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = paste0(Obesity, "%")), vjust = -0.5) +
labs(title = "Obesity percentage by region",
x = "Region", y = "Obesity (%)") +
theme_minimal()

# =============================
# Part 2: Scatterplot Fruits vs Obesity (interactive)
# =============================
p2 <- ggplot(obesity_data, aes(x = Fruits, y = Obesity,
text = paste("Region:", Region))) +
geom_point(aes(color = Region, size = IncomePerCapita)) +
labs(title = "Fruit consumption vs Obesity",
x = "Fruits (g/day)", y = "Obesity (%)",
color = "Region", size = "Income per capita (€)") +
theme_minimal()
ggplotly(p2, tooltip = "text")
# =============================
# Part 3: Boxplot for Obesity, Diabetes, ExerciseHours
# =============================
obesity_long <- obesity_data %>%
select(Region, Obesity, Diabetes, ExerciseHours) %>%
pivot_longer(cols = c(Obesity, Diabetes, ExerciseHours),
names_to = "Indicator", values_to = "Value")
ggplot(obesity_long, aes(x = Region, y = Value, fill = Indicator)) +
geom_boxplot() +
labs(title = "Distribution of health indicators by region",
x = "Region", y = "Value", fill = "Indicator") +
theme_minimal()

# =============================
# Part 4: Dumbbell chart Fruits vs Vegetables
# =============================
ggplot(obesity_data, aes(x = Fruits, y = Vegetables, label = Region, color = Region)) +
geom_point(size = 4) +
geom_text(nudge_y = 3) +
labs(title = "Fruits vs Vegetables by Region",
x = "Fruits (g/day)", y = "Vegetables (g/day)") +
theme_minimal()

# =============================
# Part 5: Density plot ExerciseHours
# =============================
ggplot(obesity_data, aes(x = ExerciseHours, fill = Region)) +
geom_density(alpha = 0.5) +
labs(title = "Exercise hours distribution by region",
x = "Exercise hours per week", y = "Density") +
theme_minimal()
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_density()`).

# =============================
# Part 6: Scatterplot SugarDrinks vs Obesity
# =============================
ggplot(obesity_data, aes(x = SugarDrinks, y = Obesity, label = Region)) +
geom_point(color="red", size=4) +
geom_text(nudge_y = 0.5) +
labs(title="Sugar-Sweetened Drinks vs Obesity",
x="Sugar Drinks (ml/day)", y="Obesity (%)") +
theme_minimal()

# =============================
# Part 7: Grouped bar chart Obesity & Diabetes
# =============================
grouped_long <- obesity_data %>%
select(Region, Obesity, Diabetes) %>%
pivot_longer(cols = c(Obesity, Diabetes),
names_to = "Indicator", values_to = "Value")
ggplot(grouped_long, aes(x = Region, y = Value, fill = Indicator)) +
geom_col(position = position_dodge(width = 0.8)) +
labs(title = "Obesity and Diabetes by Region",
x = "Region", y = "Percentage (%)", fill = "Indicator") +
theme_minimal()

# =============================
# Part 8: Heatmap Vegetables vs Region
# =============================
ggplot(obesity_data, aes(x = Region, y = Vegetables, fill = Vegetables)) +
geom_tile(color = "white") +
scale_fill_viridis_c(option = "plasma") +
labs(title = "Vegetable consumption by region",
x = "Region", y = "Vegetables (g/day)") +
theme_minimal()
