library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(scales)
library(gapminder)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(gganimate)
library(ggrepel)

# This is an Histogram showing the distribution of Life Expectancy in the dataset

ggplot(gapminder, aes(x=lifeExp))+
  geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
  theme_minimal()+
  labs(title = "Distribution of Life Expectancy", x = "Life Expectancy", y = "Count")

# This is a Density plot showing a view of life expectancy distribution

ggplot(gapminder, aes(x = lifeExp)) +
  geom_density(fill = "lightgreen", alpha = 0.5) +
  theme_minimal() +
  labs(title = "Density Plot of Life Expectancy", x = "Life Expectancy", y = "Density")

# This is a boxplot  showing the continental expectancy

ggplot(gapminder, aes(x = continent, y = lifeExp)) +
  geom_boxplot(fill = "lightcoral", color = "black") +
  theme_minimal() +
  labs(title = "Life Expectancy by Continent", x = "Continent", y = "Life Expectancy")

# This is a scatterplot showing the GDP per capita


ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point(alpha = 0.6) +
  scale_x_log10() +  # Log scale for better visibility
  geom_smooth(method = "lm") +  # Adds a linear regression line
  theme_minimal() +
  labs(title = "Life Expectancy vs. GDP Per Capita",
       x = "GDP Per Capita (Log Scale)",
       y = "Life Expectancy")
## `geom_smooth()` using formula = 'y ~ x'

# this is a Line Plot - Trend over time

ggplot(gapminder, aes(x = year, y = lifeExp, group = country, color = continent)) +
  geom_line(alpha = 0.5) +
  theme_minimal() +
  labs(title = "Life Expectancy Trends Over Time", x = "Year", y = "Life Expectancy")

# Bar chart - Average GDP per capita by continent

gapminder_summary <- gapminder %>%
  group_by(continent) %>%
  summarise(avg_gdp = mean(gdpPercap))

ggplot(gapminder_summary, aes(x = continent, y = avg_gdp, fill = continent)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "Average GDP Per Capita by Continent", x = "Continent", y = "Average GDP Per Capita")

# 3D Scatter Plot - GDP Per Capita, Life Exp and Population

fig <- plot_ly(gapminder, x = ~gdpPercap, y = ~lifeExp, z = ~pop, color = ~continent, type = "scatter3d", mode = "markers")
fig <- fig %>% layout(title = "3D Scatter Plot: GDP vs Life Expectancy vs Population",
                      scene = list(xaxis = list(title = "GDP Per Capita"),
                                   yaxis = list(title = "Life Expectancy"),
                                   zaxis = list(title = "Population")))
fig
# Us Population Over time

gapminder %>%
  filter(country == "United States") %>%
  ggplot(aes(x = year, y = gdpPercap)) +
  geom_point(color = "blue", alpha = 0.6) +  # Scatter points
  geom_line(color = "red") +  # Line connecting points
  theme_minimal() +
  labs(title = "U.S. GDP Per Capita over Time",
       x = "Year",
       y = "Population")

# Greece life expectancy over time

gapminder %>%
  filter(country == "Greece") %>%
  ggplot(aes(x = year, y = lifeExp)) +
  geom_point ( color= "red", alpha = 0.6) + #points
  geom_line(color = "black") + #Line
  theme_grey() +
  labs(title= "Russia Life Expectancy Over Time",
       x = "Year",
       y = "Life Expectancy")

# This plot is showing the GDP per capita comparison between two countries

gapminder %>%
  filter(country %in% c("United States", "Greece")) %>%
  ggplot(aes(x = country, y = gdpPercap, fill = country)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "GDP Per Capita Comparison: U.S. vs. Greece",
       x = "Country",
       y = "GDP Per Capita")

# Relationship between life expectancy and GDP with scatterplot

ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point(alpha = 0.6, size = 2) +
  geom_smooth(method = "lm") +
  scale_x_log10() +  # Log transformation for GDP per capita
  theme_minimal() +
  labs(
    title = "Life Expectancy vs. GDP Per Capita",
    subtitle = "Colored by Continent (1952–2007)",
    x = "GDP Per Capita (Log Scale)",
    y = "Life Expectancy",
    color = "Continent"
  )
## `geom_smooth()` using formula = 'y ~ x'

# Other visualizations to explore but not needed for this article

library(hexbin)

ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +
  geom_hex(bins = 30) +
  scale_x_log10() +
  theme_minimal() +
  labs(
    title = "Hexbin Plot: Life Expectancy vs GDP Per Capita",
    x = "GDP Per Capita (Log Scale)",
    y = "Life Expectancy"
  )

ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +
  stat_density_2d(aes(fill = after_stat(level)), geom = "polygon") +
  scale_x_log10() +
  theme_minimal() +
  labs(
    title = "2D Density Contour: GDP vs Life Expectancy",
    x = "GDP Per Capita (Log Scale)",
    y = "Life Expectancy"
  )

ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, size = pop, color = continent)) +
  geom_point(alpha = 0.5) +
  scale_size(range = c(1, 10)) +
  scale_x_log10() +
  theme_minimal() +
  labs(
    title = "Bubble Plot: GDP vs Life Expectancy (Sized by Population)",
    x = "GDP Per Capita (Log Scale)",
    y = "Life Expectancy",
    size = "Population"
  )

# Time Series 

countries <- c("United States", "Greece")

gapminder %>%
  filter(country %in% countries) %>%
  ggplot(aes(x = year, y = pop, color = country)) +
  geom_line(size = 1) +
  geom_point(size = 2, alpha = 0.7) +
  theme_minimal() +
  labs(
    title = "Population Growth: U.S. vs. Greece",
    x = "Year",
    y = "Population",
    color = "Country"
  ) +
  scale_y_continuous(labels = scales::comma)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# showing population trends 

countries <- c("United States", "Greece")

gapminder %>%
  filter(country %in% countries) %>%
  ggplot(aes(x = year, y = pop)) +
  geom_line(aes(color = country), size = 1.2) +
  geom_point(aes(color = country), size = 3, shape = 21, fill = "white") +
  facet_wrap(~ country, scales = "free_y") +
  scale_color_manual(values = c("United States" = "#1f77b4", "Greece" = "#e15759")) +
  scale_y_continuous(labels = comma) +
  theme_minimal(base_size = 14) +
  labs(
    title = "Population Trends (1952–2007)",
    subtitle = "United States vs. Greece (Faceted by Country)",
    x = "Year",
    y = "Population",
    color = "Country"
  ) +
  theme(
    plot.title = element_text(face = "bold", size = 18),
    plot.subtitle = element_text(size = 13),
    axis.title = element_text(face = "bold"),
    strip.text = element_text(face = "bold", size = 14),
    legend.position = "none"
  )

#Population with log scale following direct labeling 

countries <- c("United States", "Greece")

gapminder %>%
  filter(country %in% countries) %>%
  ggplot(aes(x = year, y = pop, group = country, color = country)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  geom_text_repel(
    data = . %>% group_by(country) %>% filter(year == max(year)),
    aes(label = country),
    nudge_x = 1,
    direction = "y",
    hjust = 0,
    segment.color = NA,
    size = 5
  ) +
  scale_y_log10(labels = comma) +
  theme_minimal(base_size = 14) +
  labs(
    title = "Population Over Time (Log Scale)",
    subtitle = "Direct labeling for clarity (Wilke, 20.2)",
    x = "Year",
    y = "Population (log scale)"
  ) +
  theme(legend.position = "none")

# comparison in log scale

countries <- c("United States", "Greece")

gapminder %>%
  filter(country %in% countries) %>%
  group_by(country) %>%
  mutate(pop_indexed = pop / first(pop)) %>%
  ggplot(aes(x = year, y = pop_indexed, color = country)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  geom_text_repel(
    data = . %>% filter(year == max(year)),
    aes(label = country),
    nudge_x = 1,
    direction = "y",
    hjust = 0,
    segment.color = NA,
    size = 5
  ) +
  scale_y_log10(labels = label_number(accuracy = 0.01)) +
  theme_minimal(base_size = 14) +
  labs(
    title = "Relative Population Growth (Log Scale)",
    subtitle = "Population indexed to 1952 = 1",
    x = "Year",
    y = "Population Growth (relative to 1952)"
  ) +
  theme(legend.position = "none")

# make a bubble plot for the year 2007

gapminder %>%
  filter(year == 2007) %>% 
  arrange(pop) %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp, size = pop, color = continent)) +
  geom_point(alpha = 0.7) +
  scale_x_log10() +
  scale_size(range = c(1, 20), name = "Population") +
  theme_minimal(base_size = 14) +
  labs(
    title = "Bubble Plot: Life Expectancy vs. GDP Per Capita (2007)",
    x = "GDP Per Capita (Log Scale)",
    y = "Life Expectancy",
    color = "Continent"
  )

# Animated over Time

ggplot(gapminder, aes(x = gdpPercap, y = lifeExp,
                      size = pop, color = continent)) +
  geom_point(alpha = 0.7, show.legend = TRUE) +
  scale_x_log10() +
  scale_size(range = c(2, 12), name = "Population") +
  scale_y_continuous(limits = c(20, 90)) +
  theme_minimal(base_size = 14) +
  labs(
    title = 'Life Expectancy vs. GDP per Capita',
    subtitle = 'Year: {frame_time}',
    x = 'GDP per Capita (log scale)',
    y = 'Life Expectancy',
    color = 'Continent'
  ) +
  transition_time(year) +
  ease_aes('linear') +
  enter_fade() + exit_fade()

# Split Frames by continent 

gapminder %>%
  ggplot(aes(x = gdpPercap, y = lifeExp, 
             size = pop, color = continent)) +
  geom_point(alpha = 0.7, show.legend = FALSE) +
  scale_x_log10() +
  scale_size(range = c(2, 12)) +
  facet_wrap(~ continent) +
  theme_minimal(base_size = 14) +
  labs(
    title = "Animated Bubble Plot by Continent",
    subtitle = "Year: {frame_time}",
    x = "GDP per Capita (log scale)",
    y = "Life Expectancy"
  ) +
  transition_time(year) +
  ease_aes("linear") +
  enter_fade() + 
  exit_fade()