Over the last decade, birth rate trends have emerged as a significant topic amid global economic uncertainty, wealth inequality, climate change, urbanization, and an ever-changing geopolitical landscape. Should we be more concerned about overpopulation or a population plateau? Will the increasing costs of child-rearing deter people from having children altogether? These are critical questions we aim to explore in this project.
For this analysis, we focus on fertility rates across a carefully selected set of countries. Using data from the World Bank (births per 1,000 people and per woman), we will compare fertility trends with other key indicators such as GDP, death rate, and urbanization. The goal is to uncover interesting correlations that can contribute to the discussion on population dynamics and economic factors.
Countries:
Niger, Japan, US, China, Benin, Angola, Czech Republic, Italy, Egypt,
Bangladesh, South Korea
Project Group:
Ahmed Hassan, Zach Rose, Amish Rasheed, Allie Wrubel
Collaboration Tools:
To facilitate communication and efficient teamwork, we will
utilize several collaboration tools throughout the project.
Slack will serve as our primary communication platform,
allowing for real-time discussions and file sharing.
Email for formal communication and sharing larger
documents or updates. Google Colab for collaboratively
developing and running R scripts. All files will be stored in a shared
public repository on GitHub <https://github.com/awrubes/Data607_project3>.
Data Sources:
Datasets are sourced from the World Bank Group website <https://www.worldbank.org>
Birth Rate <https://github.com/awrubes/Data607_project3/tree/main/data/birth_rate>
Death Rate <https://github.com/awrubes/Data607_project3/tree/main/data/death_rate>
Fertility Replacement Rate <https://github.com/awrubes/Data607_project3/tree/main/data/fertility_replacement_rate>
GDP <https://github.com/awrubes/Data607_project3/tree/main/data/gdp>
Population <https://github.com/awrubes/Data607_project3/tree/main/data/population>
Urbanization <https://github.com/awrubes/Data607_project3/tree/main/data/urbanization>
All of our data is available as csv files in a public GitHub repository. In order to import the data using the below R script you must clone the github repo to your local computer.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
library(dplyr)
library(ggplot2)
library(httr)
library(purrr)
library(readr)
library(scales)
library(lubridate)
read_and_clean_csv_files <- function(data_directory, pattern, suffix) {
csv_files <- list.files(path = data_directory, pattern = pattern, full.names = TRUE)
read_and_clean_csv <- function(file) {
country_name <- gsub(suffix, "", basename(file))
country_name <- gsub("-", " ", country_name)
country_name <- str_to_title(country_name)
df <- read.csv(file, stringsAsFactors = FALSE, sep = ",")
df$country_name <- country_name
return(df)
}
list_of_dfs <- map(csv_files, read_and_clean_csv)
combined_data <- bind_rows(list_of_dfs)
return(combined_data)
}
# Load different datasets using the generic function
combined_birth_data <- read_and_clean_csv_files("data/birth_rate", "*.csv", "_birth_rate.csv")
combined_death_data <- read_and_clean_csv_files("data/death_rate", "*.csv", "_death_rate.csv")
combined_fertility_data <- read_and_clean_csv_files("data/fertility_replacement_rate", "*.csv", "_fertility_replacement_rate.csv")
combined_gdp_data <- read_and_clean_csv_files("data/gdp", "*.csv", "-gdp-gross-domestic-product.csv")
combined_population_data <- read_and_clean_csv_files("data/population", "*.csv", "-population-2024-10-12.csv")
combined_urbanization_data <- read_and_clean_csv_files("data/urbanization", "*.csv", "-urban-population.csv")
global_population_data_df <- read.csv("data/population/world_population.csv", stringsAsFactors = FALSE, sep = ",")
global_fertility_data_df <- read.csv("data/fertility_replacement_rate/world_fertility_rate.csv", stringsAsFactors = FALSE, sep = ",")
Global Data
global_population_data_df$date <- as.Date(global_population_data_df$date, format = "%m/%d/%Y")
ggplot(global_population_data_df, aes(x = date, y = Population)) +
geom_line(color = "blue") +
geom_point(color = "red") +
labs(title = "Global Population Over Time",
x = "Year",
y = "Population (in Billions)") +
scale_y_continuous(
breaks = seq(1e9, 8e9, by = 1e9),
labels = function(x) paste0(x / 1e9, "B")
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
global_fertility_data_df$date <- as.Date(global_fertility_data_df$date, format = "%m/%d/%Y")
ggplot(global_fertility_data_df, aes(x = date, y = Births.per.Woman)) +
geom_line(color = "green") +
geom_point(color = "red") +
labs(title = "Global Fertility Rate Since 1950",
x = "Year",
y = "Births per Woman") +
scale_y_continuous(breaks = seq(min(global_fertility_data_df$Births.per.Woman),
max(global_fertility_data_df$Births.per.Woman),
by = 0.5)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Birth rates
combined_birth_data$date <- as.Date(combined_birth_data$date, format = "%m/%d/%Y")
combined_birth_data <- combined_birth_data %>%
filter(!is.na(date) & !is.na(Births.per.1000.People))
plot_country_trends <- function(df, x_column, y_column, title, x_label, y_label) {
ggplot(df, aes_string(x = x_column, y = y_column, color = "country_name", group = "country_name")) +
geom_line(size = 1) +
geom_point(size = 1.5) +
labs(title = title, x = x_label, y = y_label) +
scale_x_date(date_breaks = "10 years", date_labels = "%Y",
limits = c(as.Date("1950-01-01"), as.Date("2024-12-31"))) +
theme_minimal() +
theme(legend.title = element_blank())
}
birth_rate_plot <- plot_country_trends(
df = combined_birth_data,
x_column = "date",
y_column = "Births.per.1000.People",
title = "Birth Rate by Country",
x_label = "Year",
y_label = "Births per 1000 People"
)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(birth_rate_plot)
## Warning: Removed 760 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 760 rows containing missing values or values outside the scale range
## (`geom_point()`).
Death Rate
combined_death_data$date <- as.Date(combined_death_data$date, format = "%Y-%m-%d")
combined_death_data <- combined_death_data %>%
filter(!is.na(date) & !is.na(Deaths.per.1000.People) &
date >= as.Date("1950-12-31") & date <= as.Date("2024-12-31"))
plot_country_trends <- function(df, x_column, y_column, title, x_label, y_label) {
ggplot(df, aes_string(x = x_column, y = y_column, color = "country_name", group = "country_name")) +
geom_line(size = 1) +
geom_point(size = 1.5) +
labs(title = title, x = x_label, y = y_label) +
scale_x_date(date_breaks = "10 years", date_labels = "%Y") +
theme_minimal() +
theme(legend.title = element_blank())
}
death_rate_plot <- plot_country_trends(
df = combined_death_data,
x_column = "date",
y_column = "Deaths.per.1000.People",
title = "Death Rate by Country",
x_label = "Year",
y_label = "Deaths per 1000 People"
)
print(death_rate_plot)
fertility rate
combined_fertility_data$date <- as.Date(combined_fertility_data$date, format = "%Y-%m-%d")
combined_fertility_data <- combined_fertility_data %>%
filter(!is.na(date) & !is.na(Births.per.Woman) &
date >= as.Date("1950-01-01") & date <= as.Date("2024-12-31"))
plot_country_trends <- function(df, x_column, y_column, title, x_label, y_label) {
ggplot(df, aes_string(x = x_column, y = y_column, color = "country_name", group = "country_name")) +
geom_line(size = 1) +
geom_point(size = 1.5) +
labs(title = title, x = x_label, y = y_label) +
scale_x_date(date_breaks = "10 years", date_labels = "%Y") +
theme_minimal() +
theme(legend.title = element_blank())
}
fertility_rate_plot <- plot_country_trends(
df = combined_fertility_data,
x_column = "date",
y_column = "Births.per.Woman",
title = "Fertility Rate by Country",
x_label = "Year",
y_label = "Births per Woman"
)
print(fertility_rate_plot)
GDP
combined_gdp_data$date <- as.Date(combined_gdp_data$date, format = "%Y-%m-%d")
combined_gdp_data <- combined_gdp_data %>%
filter(!is.na(date) & !is.na(GDP...Billions.of.US...) &
date >= as.Date("1950-01-01") & date <= as.Date("2024-12-31"))
plot_country_trends <- function(df, x_column, y_column, title, x_label, y_label) {
ggplot(df, aes_string(x = x_column, y = y_column, color = "country_name", group = "country_name")) +
geom_line(size = 1) +
geom_point(size = 1.5) +
labs(title = title, x = x_label, y = y_label) +
scale_x_date(date_breaks = "10 years", date_labels = "%Y") +
scale_y_continuous(labels = function(x) paste0(x / 1e3, "B")) +
theme_minimal() +
theme(legend.title = element_blank())
}
advanced_economies <- combined_gdp_data %>%
filter(country_name %in% c("United States", "China", "Japan", "Italy", "South Korea"))
gdp_other_countries <- combined_gdp_data %>%
filter(!country_name %in% c("United States", "China", "Japan", "Italy", "South Korea"))
advanced_economies_plot <- plot_country_trends(
df = advanced_economies,
x_column = "date",
y_column = "GDP...Billions.of.US...",
title = "GDP for USA, China, Japan, Italy, and South Korea",
x_label = "Year",
y_label = "GDP in Billions of US Dollars"
)
gdp_other_countries_plot <- plot_country_trends(
df = gdp_other_countries,
x_column = "date",
y_column = "GDP...Billions.of.US...",
title = "GDP for Other Countries",
x_label = "Year",
y_label = "GDP in Billions of US Dollars"
)
print(advanced_economies_plot)
print(gdp_other_countries_plot)
population
combined_population_data$date <- as.Date(combined_population_data$date, format = "%Y-%m-%d")
combined_population_data <- combined_population_data %>%
filter(!is.na(date) & !is.na(Population) &
date >= as.Date("1950-01-01") & date <= as.Date("2024-12-31"))
china_population_data <- combined_population_data %>%
filter(country_name == "China")
rest_of_countries_population_data <- combined_population_data %>%
filter(country_name != "China")
plot_country_trends <- function(df, x_column, y_column, title, x_label, y_label) {
ggplot(df, aes_string(x = x_column, y = y_column, color = "country_name", group = "country_name")) +
geom_line(size = 1) +
geom_point(size = 1.5) +
labs(title = title, x = x_label, y = y_label) +
scale_x_date(date_breaks = "10 years", date_labels = "%Y") +
scale_y_continuous(
breaks = seq(0, max(df[[y_column]], na.rm = TRUE), by = 50e6),
labels = function(x) paste0(x / 1e6, "M")
) +
theme_minimal() +
theme(legend.title = element_blank())
}
china_population_plot <- plot_country_trends(
df = china_population_data,
x_column = "date",
y_column = "Population",
title = "Population for China",
x_label = "Year",
y_label = "Population (in Millions)"
)
rest_of_countries_population_plot <- plot_country_trends(
df = rest_of_countries_population_data,
x_column = "date",
y_column = "Population",
title = "Population for Other Countries",
x_label = "Year",
y_label = "Population (in Millions)"
)
print(china_population_plot)
print(rest_of_countries_population_plot)
urbanization
combined_urbanization_data$date <- as.Date(combined_urbanization_data$date, format = "%Y-%m-%d")
combined_urbanization_data <- combined_urbanization_data %>%
filter(!is.na(date) & !is.na(Urban.Population) &
date >= as.Date("1950-01-01") & date <= as.Date("2024-12-31"))
china_urbanization_data <- combined_urbanization_data %>%
filter(country_name == "China")
japan_usa_urbanization_data <- combined_urbanization_data %>%
filter(country_name %in% c("Japan", "United States"))
rest_of_countries_urbanization_data <- combined_urbanization_data %>%
filter(!country_name %in% c("China", "Japan", "United States"))
plot_country_trends <- function(df, x_column, y_column, title, x_label, y_label, y_break) {
ggplot(df, aes_string(x = x_column, y = y_column, color = "country_name", group = "country_name")) +
geom_line(size = 1) +
geom_point(size = 1.5) +
labs(title = title, x = x_label, y = y_label) +
scale_x_date(date_breaks = "10 years", date_labels = "%Y") +
scale_y_continuous(
breaks = seq(0, max(df[[y_column]], na.rm = TRUE), by = y_break),
labels = function(x) paste0(x / 1e6, "M")
) +
theme_minimal() +
theme(legend.title = element_blank())
}
china_urban_population_plot <- plot_country_trends(
df = china_urbanization_data,
x_column = "date",
y_column = "Urban.Population",
title = "Urban Population for China",
x_label = "Year",
y_label = "Urban Population (in Millions)",
y_break = 1e8
)
japan_usa_urban_population_plot <- plot_country_trends(
df = japan_usa_urbanization_data,
x_column = "date",
y_column = "Urban.Population",
title = "Urban Population for Japan and USA",
x_label = "Year",
y_label = "Urban Population (in Millions)",
y_break = 5e7
)
rest_of_countries_urban_population_plot <- plot_country_trends(
df = rest_of_countries_urbanization_data,
x_column = "date",
y_column = "Urban.Population",
title = "Urban Population for Other Countries",
x_label = "Year",
y_label = "Urban Population (in Millions)",
y_break = 1e7
)
print(china_urban_population_plot)
print(japan_usa_urban_population_plot)
print(rest_of_countries_urban_population_plot)
library(DBI)
## Warning: package 'DBI' was built under R version 4.3.3
library(RMySQL)
## Warning: package 'RMySQL' was built under R version 4.3.3
host <- "database-1.cdyqsq4eynri.us-east-2.rds.amazonaws.com"
port <- 3306
dbname <- "Project_3"
user <- "admin"
password <- "Data607_Project3"
con <- dbConnect(RMySQL::MySQL(),
dbname = dbname,
host = host,
port = port,
user = user,
password = password)
library(dplyr)
library(ggplot2)
birth_data <- dbGetQuery(con, "SELECT country_name, year, birth_rate_value AS birth_rate FROM birth_rate")
## Warning in dbSendQuery(conn, statement, ...): Decimal MySQL column 2 imported
## as numeric
urbanization_data <- dbGetQuery(con, "SELECT country_name, year, urban_population_value FROM urbanization")
## Warning in dbSendQuery(conn, statement, ...): Decimal MySQL column 2 imported
## as numeric
merged_data <- birth_data %>%
left_join(urbanization_data, by = c("country_name", "year"))
merged_data <- na.omit(merged_data)
country_models <- merged_data %>%
group_by(country_name) %>%
do(model = lm(birth_rate ~ urban_population_value, data = .))
model_summaries <- country_models %>%
summarise(
country_name,
model_summary = list(summary(model))
)
for (i in 1:nrow(model_summaries)) {
cat("Country:", model_summaries$country_name[i], "\n")
print(model_summaries$model_summary[[i]])
cat("\n")
}
## Country: italy
##
## Call:
## lm(formula = birth_rate ~ urban_population_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0628 -1.4345 0.2116 1.1704 2.4041
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.141e+01 2.111e+00 24.36 <2e-16 ***
## urban_population_value -1.053e-06 5.578e-08 -18.87 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.411 on 61 degrees of freedom
## Multiple R-squared: 0.8538, Adjusted R-squared: 0.8514
## F-statistic: 356.2 on 1 and 61 DF, p-value: < 2.2e-16
##
##
## Country: japan
##
## Call:
## lm(formula = birth_rate ~ urban_population_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9897 -1.3071 -0.0551 0.8993 3.7216
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.282e+01 1.040e+00 31.54 <2e-16 ***
## urban_population_value -2.184e-07 1.080e-08 -20.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.47 on 61 degrees of freedom
## Multiple R-squared: 0.8702, Adjusted R-squared: 0.8681
## F-statistic: 409.1 on 1 and 61 DF, p-value: < 2.2e-16
##
##
## Country: niger
##
## Call:
## lm(formula = birth_rate ~ urban_population_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0278 -0.4454 -0.1182 0.6175 0.7684
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.917e+01 1.218e-01 485.86 <2e-16 ***
## urban_population_value -3.310e-06 6.125e-08 -54.05 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5652 on 61 degrees of freedom
## Multiple R-squared: 0.9795, Adjusted R-squared: 0.9792
## F-statistic: 2921 on 1 and 61 DF, p-value: < 2.2e-16
##
##
## Country: south korea
##
## Call:
## lm(formula = birth_rate ~ urban_population_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2006 -1.4620 0.1739 0.8176 5.6976
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.193e+01 6.201e-01 67.62 <2e-16 ***
## urban_population_value -8.186e-07 2.003e-08 -40.88 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.909 on 61 degrees of freedom
## Multiple R-squared: 0.9648, Adjusted R-squared: 0.9642
## F-statistic: 1671 on 1 and 61 DF, p-value: < 2.2e-16
##
##
## Country: united states
##
## Call:
## lm(formula = birth_rate ~ urban_population_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1342 -0.6606 0.0657 0.3879 4.0259
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.467e+01 7.166e-01 34.42 <2e-16 ***
## urban_population_value -4.741e-08 3.507e-09 -13.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.267 on 61 degrees of freedom
## Multiple R-squared: 0.7497, Adjusted R-squared: 0.7456
## F-statistic: 182.8 on 1 and 61 DF, p-value: < 2.2e-16
urban_coefficients_df <- country_models %>%
summarise(
country_name,
coefficient = coef(summary(model))["urban_population_value", "Estimate"]
)
print(urban_coefficients_df )
## # A tibble: 5 × 2
## country_name coefficient
## <chr> <dbl>
## 1 italy -0.00000105
## 2 japan -0.000000218
## 3 niger -0.00000331
## 4 south korea -0.000000819
## 5 united states -0.0000000474
ggplot(urban_coefficients_df, aes(x = reorder(country_name, coefficient), y = coefficient)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() + # Flip coordinates for better visibility
labs(title = "Effect of Urbanization on Birth Rate by Country",
x = "Country",
y = "Coefficient of Urban Population") +
theme_minimal()
death_data <- dbGetQuery(con, "SELECT country_name, year, death_rate_value AS death_rate FROM death_rate")
## Warning in dbSendQuery(conn, statement, ...): Decimal MySQL column 2 imported
## as numeric
merged_data <- birth_data %>%
left_join(death_data, by = c("country_name", "year"))
merged_data <- na.omit(merged_data)
country_models <- merged_data %>%
group_by(country_name) %>%
do(model = lm(birth_rate ~ death_rate, data = .))
model_summaries <- country_models %>%
summarise(
country_name,
model_summary = list(summary(model))
)
for (i in 1:nrow(model_summaries)) {
cat("Country:", model_summaries$country_name[i], "\n")
print(model_summaries$model_summary[[i]])
cat("\n")
}
## Country: angola
##
## Call:
## lm(formula = birth_rate ~ death_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.850 -5.564 1.492 4.703 9.609
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.53731 0.92457 24.38 <2e-16 ***
## death_rate 1.18777 0.06202 19.15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.118 on 149 degrees of freedom
## Multiple R-squared: 0.7111, Adjusted R-squared: 0.7092
## F-statistic: 366.8 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: bangladesh
##
## Call:
## lm(formula = birth_rate ~ death_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.277 -10.935 1.986 11.392 15.250
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.9621 2.2922 1.292 0.198
## death_rate 1.7148 0.1802 9.513 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.88 on 149 degrees of freedom
## Multiple R-squared: 0.3779, Adjusted R-squared: 0.3737
## F-statistic: 90.5 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: benin
##
## Call:
## lm(formula = birth_rate ~ death_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.8996 -6.6335 0.8011 7.1681 11.1540
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.66386 1.16471 18.60 <2e-16 ***
## death_rate 0.90827 0.07674 11.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.633 on 149 degrees of freedom
## Multiple R-squared: 0.4846, Adjusted R-squared: 0.4811
## F-statistic: 140.1 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: china
##
## Call:
## lm(formula = birth_rate ~ death_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.230 -9.436 -2.712 10.053 22.046
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.2277 2.2817 2.291 0.0234 *
## death_rate 0.9543 0.1806 5.284 4.42e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.07 on 149 degrees of freedom
## Multiple R-squared: 0.1578, Adjusted R-squared: 0.1521
## F-statistic: 27.92 on 1 and 149 DF, p-value: 4.416e-07
##
##
## Country: egypt
##
## Call:
## lm(formula = birth_rate ~ death_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.392 -5.752 1.727 5.330 10.584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.5155 1.1408 7.464 6.48e-12 ***
## death_rate 1.8533 0.1054 17.591 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.729 on 149 degrees of freedom
## Multiple R-squared: 0.675, Adjusted R-squared: 0.6728
## F-statistic: 309.4 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: italy
##
## Call:
## lm(formula = birth_rate ~ death_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9742 -2.5657 -0.3183 1.2446 6.6633
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.9838 1.3046 16.851 <2e-16 ***
## death_rate -1.0037 0.1065 -9.428 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.039 on 149 degrees of freedom
## Multiple R-squared: 0.3736, Adjusted R-squared: 0.3694
## F-statistic: 88.88 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: japan
##
## Call:
## lm(formula = birth_rate ~ death_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.3209 -2.8988 -0.1982 1.1616 15.2330
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.93333 0.96350 20.69 <2e-16 ***
## death_rate -0.85063 0.08139 -10.45 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.602 on 149 degrees of freedom
## Multiple R-squared: 0.423, Adjusted R-squared: 0.4191
## F-statistic: 109.2 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: niger
##
## Call:
## lm(formula = birth_rate ~ death_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.6058 -3.8409 -0.5412 6.7992 11.3783
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.60250 1.02891 23.91 <2e-16 ***
## death_rate 1.25826 0.06298 19.98 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.512 on 149 degrees of freedom
## Multiple R-squared: 0.7282, Adjusted R-squared: 0.7264
## F-statistic: 399.2 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: south korea
##
## Call:
## lm(formula = birth_rate ~ death_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.156 -7.406 -6.689 2.545 30.138
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.15865 2.28627 5.756 4.76e-08 ***
## death_rate 0.06468 0.17935 0.361 0.719
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.31 on 149 degrees of freedom
## Multiple R-squared: 0.000872, Adjusted R-squared: -0.005834
## F-statistic: 0.13 on 1 and 149 DF, p-value: 0.7189
##
##
## Country: united states
##
## Call:
## lm(formula = birth_rate ~ death_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9785 -1.6734 -1.1075 0.0941 10.7214
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.8183 3.4112 9.621 < 2e-16 ***
## death_rate -1.9973 0.3525 -5.665 7.34e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.508 on 149 degrees of freedom
## Multiple R-squared: 0.1772, Adjusted R-squared: 0.1717
## F-statistic: 32.1 on 1 and 149 DF, p-value: 7.342e-08
death_coefficients_df <- country_models %>%
summarise(
country_name,
coefficient = coef(summary(model))["death_rate", "Estimate"]
)
ggplot(death_coefficients_df, aes(x = reorder(country_name, coefficient), y = coefficient)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() + # Flip coordinates for better visibility
labs(title = "Effect of Death Rate on Birth Rate by Country",
x = "Country",
y = "Coefficient of Death Rate") +
theme_minimal()
gdp_data <- dbGetQuery(con, "SELECT country_name, year, gdp_value FROM gdp")
## Warning in dbSendQuery(conn, statement, ...): Decimal MySQL column 2 imported
## as numeric
merged_data <- birth_data %>%
left_join(gdp_data, by = c("country_name", "year"))
merged_data <- na.omit(merged_data)
country_models <- merged_data %>%
group_by(country_name) %>%
do(model = lm(birth_rate ~ gdp_value, data = .))
model_summaries <- country_models %>%
summarise(
country_name,
model_summary = list(summary(model))
)
for (i in 1:nrow(model_summaries)) {
cat("Country:", model_summaries$country_name[i], "\n")
print(model_summaries$model_summary[[i]])
cat("\n")
}
## Country: angola
##
## Call:
## lm(formula = birth_rate ~ gdp_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4208 -0.7999 0.4482 1.2130 3.3679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.704775 0.305648 162.62 < 2e-16 ***
## gdp_value -0.065275 0.006453 -10.12 1.14e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.987 on 61 degrees of freedom
## Multiple R-squared: 0.6265, Adjusted R-squared: 0.6204
## F-statistic: 102.3 on 1 and 61 DF, p-value: 1.145e-14
##
##
## Country: bangladesh
##
## Call:
## lm(formula = birth_rate ~ gdp_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.899 -7.092 1.212 6.778 12.406
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.134460 1.120798 35.809 < 2e-16 ***
## gdp_value -0.077082 0.008402 -9.174 4.34e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.254 on 61 degrees of freedom
## Multiple R-squared: 0.5798, Adjusted R-squared: 0.5729
## F-statistic: 84.16 on 1 and 61 DF, p-value: 4.34e-13
##
##
## Country: benin
##
## Call:
## lm(formula = birth_rate ~ gdp_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5041 -0.8663 0.1368 0.8891 1.6261
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.66354 0.17755 262.82 <2e-16 ***
## gdp_value -0.72273 0.02616 -27.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.061 on 61 degrees of freedom
## Multiple R-squared: 0.926, Adjusted R-squared: 0.9248
## F-statistic: 763.4 on 1 and 61 DF, p-value: < 2.2e-16
##
##
## Country: china
##
## Call:
## lm(formula = birth_rate ~ gdp_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.248 -6.837 -1.568 5.711 14.314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.2557569 1.1952592 21.130 < 2e-16 ***
## gdp_value -0.0011594 0.0002064 -5.617 5.08e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.034 on 61 degrees of freedom
## Multiple R-squared: 0.3409, Adjusted R-squared: 0.3301
## F-statistic: 31.56 on 1 and 61 DF, p-value: 5.081e-07
##
##
## Country: egypt
##
## Call:
## lm(formula = birth_rate ~ gdp_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.449 -5.209 1.753 3.575 9.270
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.021186 0.862944 44.060 < 2e-16 ***
## gdp_value -0.042368 0.005492 -7.714 1.37e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.28 on 61 degrees of freedom
## Multiple R-squared: 0.4938, Adjusted R-squared: 0.4855
## F-statistic: 59.5 on 1 and 61 DF, p-value: 1.368e-10
##
##
## Country: italy
##
## Call:
## lm(formula = birth_rate ~ gdp_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4238 -1.5673 -0.2536 1.7440 3.3284
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.7386264 0.4038154 38.98 <2e-16 ***
## gdp_value -0.0038827 0.0003095 -12.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.95 on 61 degrees of freedom
## Multiple R-squared: 0.7207, Adjusted R-squared: 0.7161
## F-statistic: 157.4 on 1 and 61 DF, p-value: < 2.2e-16
##
##
## Country: japan
##
## Call:
## lm(formula = birth_rate ~ gdp_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6648 -0.7969 0.1478 0.7383 2.3937
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.737e+01 2.653e-01 65.45 <2e-16 ***
## gdp_value -1.802e-03 7.354e-05 -24.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.239 on 61 degrees of freedom
## Multiple R-squared: 0.9078, Adjusted R-squared: 0.9063
## F-statistic: 600.6 on 1 and 61 DF, p-value: < 2.2e-16
##
##
## Country: niger
##
## Call:
## lm(formula = birth_rate ~ gdp_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0056 -0.6220 0.1334 0.8075 1.2503
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.57144 0.16543 348.02 <2e-16 ***
## gdp_value -0.91676 0.02853 -32.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9334 on 61 degrees of freedom
## Multiple R-squared: 0.9442, Adjusted R-squared: 0.9433
## F-statistic: 1033 on 1 and 61 DF, p-value: < 2.2e-16
##
##
## Country: south korea
##
## Call:
## lm(formula = birth_rate ~ gdp_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.046 -4.750 -1.728 3.757 16.074
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.938472 1.052350 24.65 < 2e-16 ***
## gdp_value -0.013648 0.001322 -10.32 5.18e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.138 on 61 degrees of freedom
## Multiple R-squared: 0.636, Adjusted R-squared: 0.6301
## F-statistic: 106.6 on 1 and 61 DF, p-value: 5.184e-15
##
##
## Country: united states
##
## Call:
## lm(formula = birth_rate ~ gdp_value, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1250 -0.7122 -0.2668 0.2492 5.3030
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.755e+01 3.001e-01 58.47 < 2e-16 ***
## gdp_value -2.831e-04 2.784e-05 -10.17 9.36e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.543 on 61 degrees of freedom
## Multiple R-squared: 0.629, Adjusted R-squared: 0.6229
## F-statistic: 103.4 on 1 and 61 DF, p-value: 9.356e-15
gdp_coefficients_df <- country_models %>%
summarise(
country_name,
coefficient = coef(summary(model))["gdp_value", "Estimate"]
)
ggplot(gdp_coefficients_df, aes(x = reorder(country_name, coefficient), y = coefficient)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() +
labs(title = "Effect of GDP on Birth Rate by Country",
x = "Country",
y = "Coefficient of GDP") +
theme_minimal()
fertility_data <- dbGetQuery(con, "SELECT country_name, year, fertility_rate_value AS fertility_rate FROM fertility_rate")
## Warning in dbSendQuery(conn, statement, ...): Decimal MySQL column 2 imported
## as numeric
merged_data <- birth_data %>%
left_join(fertility_data, by = c("country_name", "year"))
merged_data <- na.omit(merged_data)
country_models <- merged_data %>%
group_by(country_name) %>%
do(model = lm(birth_rate ~ fertility_rate, data = .))
model_summaries <- country_models %>%
summarise(
country_name,
model_summary = list(summary(model))
)
for (i in 1:nrow(model_summaries)) {
cat("Country:", model_summaries$country_name[i], "\n")
print(model_summaries$model_summary[[i]])
cat("\n")
}
## Country: angola
##
## Call:
## lm(formula = birth_rate ~ fertility_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6957 -1.4473 0.1783 0.9413 6.1621
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.57296 0.43944 10.41 <2e-16 ***
## fertility_rate 6.46684 0.08182 79.04 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.737 on 149 degrees of freedom
## Multiple R-squared: 0.9767, Adjusted R-squared: 0.9765
## F-statistic: 6247 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: bangladesh
##
## Call:
## lm(formula = birth_rate ~ fertility_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5947 -2.6923 -0.3835 2.3383 5.0008
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2379 0.4299 -0.553 0.581
## fertility_rate 7.1829 0.1132 63.435 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.847 on 149 degrees of freedom
## Multiple R-squared: 0.9643, Adjusted R-squared: 0.9641
## F-statistic: 4024 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: benin
##
## Call:
## lm(formula = birth_rate ~ fertility_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.15412 -0.90259 0.05851 1.01378 1.65059
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1679 0.2648 15.74 <2e-16 ***
## fertility_rate 6.3558 0.0543 117.05 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.103 on 149 degrees of freedom
## Multiple R-squared: 0.9892, Adjusted R-squared: 0.9892
## F-statistic: 1.37e+04 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: china
##
## Call:
## lm(formula = birth_rate ~ fertility_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.855 -2.244 -1.508 2.152 6.949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.9334 0.4344 -2.149 0.0333 *
## fertility_rate 6.8692 0.1466 46.867 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.765 on 149 degrees of freedom
## Multiple R-squared: 0.9365, Adjusted R-squared: 0.936
## F-statistic: 2197 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: egypt
##
## Call:
## lm(formula = birth_rate ~ fertility_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0239 -1.4536 -0.0835 1.0647 4.7417
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.82612 0.35165 2.349 0.0201 *
## fertility_rate 7.02772 0.08882 79.121 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.8 on 149 degrees of freedom
## Multiple R-squared: 0.9768, Adjusted R-squared: 0.9766
## F-statistic: 6260 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: italy
##
## Call:
## lm(formula = birth_rate ~ fertility_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.12768 -1.79400 -0.04313 1.46212 3.15430
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.1852 0.6330 -8.191 1.08e-13 ***
## fertility_rate 9.2279 0.3775 24.443 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.716 on 149 degrees of freedom
## Multiple R-squared: 0.8004, Adjusted R-squared: 0.7991
## F-statistic: 597.5 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: japan
##
## Call:
## lm(formula = birth_rate ~ fertility_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8205 -2.1327 -0.4062 2.0687 3.3994
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.0070 0.8087 -11.14 <2e-16 ***
## fertility_rate 11.5322 0.4710 24.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.116 on 149 degrees of freedom
## Multiple R-squared: 0.8009, Adjusted R-squared: 0.7996
## F-statistic: 599.5 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: niger
##
## Call:
## lm(formula = birth_rate ~ fertility_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.3355 -1.1621 0.0442 0.9917 4.5663
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.56700 0.52926 4.85 3.07e-06 ***
## fertility_rate 6.87081 0.08852 77.62 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.238 on 149 degrees of freedom
## Multiple R-squared: 0.9759, Adjusted R-squared: 0.9757
## F-statistic: 6025 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: south korea
##
## Call:
## lm(formula = birth_rate ~ fertility_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9535 -2.8703 -0.4689 1.7278 5.5789
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.9248 0.4043 -4.761 4.52e-06 ***
## fertility_rate 7.3675 0.1550 47.537 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.814 on 149 degrees of freedom
## Multiple R-squared: 0.9381, Adjusted R-squared: 0.9377
## F-statistic: 2260 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: united states
##
## Call:
## lm(formula = birth_rate ~ fertility_rate, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6648 -1.2379 -0.2820 0.5419 3.5155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.6976 0.5363 -3.166 0.00188 **
## fertility_rate 7.4912 0.2565 29.210 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.491 on 149 degrees of freedom
## Multiple R-squared: 0.8513, Adjusted R-squared: 0.8503
## F-statistic: 853.2 on 1 and 149 DF, p-value: < 2.2e-16
fertility_coefficients_df <- country_models %>%
summarise(
country_name,
coefficient = coef(summary(model))["fertility_rate", "Estimate"]
)
ggplot(fertility_coefficients_df, aes(x = reorder(country_name, coefficient), y = coefficient)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() + # Flip coordinates for better visibility
labs(title = "Effect of Fertility Rate on Birth Rate by Country",
x = "Country",
y = "Coefficient of Fertility Rate") +
theme_minimal()
population_data <- dbGetQuery(con, "SELECT country_name, year, population_value AS population FROM population")
## Warning in dbSendQuery(conn, statement, ...): Decimal MySQL column 2 imported
## as numeric
merged_data <- birth_data %>%
left_join(population_data, by = c("country_name", "year"))
merged_data <- na.omit(merged_data)
country_models <- merged_data %>%
group_by(country_name) %>%
do(model = lm(birth_rate ~ population, data = .))
model_summaries <- country_models %>%
summarise(
country_name,
model_summary = list(summary(model))
)
population_coefficients_df <- country_models %>%
summarise(
country_name,
coefficient = coef(summary(model))["population", "Estimate"]
)
for (i in 1:nrow(model_summaries)) {
cat("Country:", model_summaries$country_name[i], "\n")
print(model_summaries$model_summary[[i]])
cat("\n")
}
## Country: angola
##
## Call:
## lm(formula = birth_rate ~ population, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5772 -1.8108 0.1097 1.6348 2.7613
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.085e+01 2.265e-01 224.48 <2e-16 ***
## population -2.593e-07 3.369e-09 -76.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.783 on 149 degrees of freedom
## Multiple R-squared: 0.9755, Adjusted R-squared: 0.9753
## F-statistic: 5927 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: bangladesh
##
## Call:
## lm(formula = birth_rate ~ population, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.2297 -1.2146 0.5564 1.9864 3.4478
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.227e+01 5.997e-01 103.83 <2e-16 ***
## population -2.644e-07 3.758e-09 -70.36 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.575 on 149 degrees of freedom
## Multiple R-squared: 0.9708, Adjusted R-squared: 0.9706
## F-statistic: 4950 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: benin
##
## Call:
## lm(formula = birth_rate ~ population, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4147 -2.1532 -0.1038 2.1635 3.6999
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.645e+01 3.053e-01 152.14 <2e-16 ***
## population -7.082e-07 1.296e-08 -54.66 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.317 on 149 degrees of freedom
## Multiple R-squared: 0.9525, Adjusted R-squared: 0.9522
## F-statistic: 2988 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: china
##
## Call:
## lm(formula = birth_rate ~ population, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.275 -5.470 2.174 4.470 13.766
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.826e+01 2.853e+00 16.92 <2e-16 ***
## population -2.887e-08 2.524e-09 -11.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.004 on 149 degrees of freedom
## Multiple R-squared: 0.4677, Adjusted R-squared: 0.4641
## F-statistic: 130.9 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: egypt
##
## Call:
## lm(formula = birth_rate ~ population, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.8554 -1.8844 0.3184 1.7123 10.2288
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.652e+01 6.142e-01 75.74 <2e-16 ***
## population -1.771e-07 4.677e-09 -37.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.622 on 149 degrees of freedom
## Multiple R-squared: 0.9058, Adjusted R-squared: 0.9052
## F-statistic: 1433 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: italy
##
## Call:
## lm(formula = birth_rate ~ population, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.147 -2.621 -1.776 0.014 8.874
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.125e+00 2.351e+00 3.456 0.000715 ***
## population 3.466e-08 4.531e-08 0.765 0.445478
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.833 on 149 degrees of freedom
## Multiple R-squared: 0.003912, Adjusted R-squared: -0.002773
## F-statistic: 0.5852 on 1 and 149 DF, p-value: 0.4455
##
##
## Country: japan
##
## Call:
## lm(formula = birth_rate ~ population, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.447 -3.152 -2.894 1.351 15.895
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.305e+01 2.372e+00 5.501 1.6e-07 ***
## population -2.576e-08 2.224e-08 -1.158 0.249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.721 on 149 degrees of freedom
## Multiple R-squared: 0.008924, Adjusted R-squared: 0.002272
## F-statistic: 1.342 on 1 and 149 DF, p-value: 0.2486
##
##
## Country: niger
##
## Call:
## lm(formula = birth_rate ~ population, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1365 -3.6672 0.7692 2.9749 6.6127
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.516e+01 4.040e-01 136.56 <2e-16 ***
## population -2.677e-07 5.479e-09 -48.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.492 on 149 degrees of freedom
## Multiple R-squared: 0.9412, Adjusted R-squared: 0.9409
## F-statistic: 2387 on 1 and 149 DF, p-value: < 2.2e-16
##
##
## Country: south korea
##
## Call:
## lm(formula = birth_rate ~ population, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.2971 -7.4657 0.8455 5.4037 20.4056
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.968e+01 3.240e+00 12.248 < 2e-16 ***
## population -6.541e-07 7.991e-08 -8.185 1.12e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.396 on 149 degrees of freedom
## Multiple R-squared: 0.3102, Adjusted R-squared: 0.3055
## F-statistic: 67 on 1 and 149 DF, p-value: 1.119e-13
##
##
## Country: united states
##
## Call:
## lm(formula = birth_rate ~ population, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6648 -1.2379 -0.2820 0.5419 3.5155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.6976 0.5363 -3.166 0.00188 **
## population 7.4912 0.2565 29.210 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.491 on 149 degrees of freedom
## Multiple R-squared: 0.8513, Adjusted R-squared: 0.8503
## F-statistic: 853.2 on 1 and 149 DF, p-value: < 2.2e-16
positive_coefficients <- population_coefficients_df %>%
filter(coefficient > 0)
ggplot(positive_coefficients, aes(x = reorder(country_name, coefficient), y = coefficient)) +
geom_bar(stat = "identity", fill = "lightgreen") +
coord_flip() +
labs(title = "Positive Effect of Population on Birth Rate",
x = "Country",
y = "Coefficient of Population") +
theme_minimal()
negative_coefficients <- population_coefficients_df %>%
filter(coefficient <= 0)
ggplot(negative_coefficients, aes(x = reorder(country_name, coefficient), y = coefficient)) +
geom_bar(stat = "identity", fill = "salmon") +
coord_flip() +
labs(title = "Negative Effect of Population on Birth Rate",
x = "Country",
y = "Coefficient of Population") +
theme_minimal()