# Load necessary libraries
if (!requireNamespace("WDI", quietly = TRUE)) {
install.packages("WDI")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
library(WDI)
library(ggplot2)
# Specify the countries using their ISO-2 codes
country_codes <- c("US", "TR", "CN", "IN", "BR") # Example country codes
# Load population and agriculture data from WDI
population_data <- WDI(country = country_codes, indicator = "SP.POP.TOTL", start = 2000, end = 2020)
agriculture_data <- WDI(country = country_codes, indicator = "NV.AGR.TOTL.CD", start = 2000, end = 2020)
# Merge the data frames by country and year
data <- merge(population_data, agriculture_data, by = c("iso2c", "year"), suffixes = c("_population", "_agriculture"))
# Clean the data by removing NA values
data <- na.omit(data)
# Calculate statistics
average_population <- mean(data$SP.POP.TOTL)
average_agriculture <- mean(data$NV.AGR.TOTL.CD)
variance_population <- var(data$SP.POP.TOTL)
variance_agriculture <- var(data$NV.AGR.TOTL.CD)
sd_population <- sd(data$SP.POP.TOTL)
sd_agriculture <- sd(data$NV.AGR.TOTL.CD)
covariance <- cov(data$SP.POP.TOTL, data$NV.AGR.TOTL.CD)
# Perform linear regression
model <- lm(NV.AGR.TOTL.CD ~ SP.POP.TOTL, data = data)
# Summary of the regression model
summary(model)
##
## Call:
## lm(formula = NV.AGR.TOTL.CD ~ SP.POP.TOTL, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.878e+11 -4.959e+10 4.372e+09 3.009e+10 6.364e+11
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.637e+10 2.624e+10 0.624 0.534
## SP.POP.TOTL 3.387e+02 3.146e+01 10.767 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.758e+11 on 103 degrees of freedom
## Multiple R-squared: 0.5295, Adjusted R-squared: 0.525
## F-statistic: 115.9 on 1 and 103 DF, p-value: < 2.2e-16
# Create plots
# 1. Scatter plot with regression line
ggplot(data, aes(x = SP.POP.TOTL, y = NV.AGR.TOTL.CD)) +
geom_point() +
geom_smooth(method = "lm", col = "blue") +
labs(title = "Population vs Agriculture", x = "Total Population", y = "Agricultural Value (CD)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# 2. Bar plots for average values
avg_data <- data.frame(
Category = c("Average Population", "Average Agriculture"),
Value = c(average_population, average_agriculture)
)
ggplot(avg_data, aes(x = Category, y = Value, fill = Category)) +
geom_bar(stat = "identity") +
labs(title = "Average Population and Agriculture", y = "Value") +
theme_minimal()

# Print results
cat("Average Population:", average_population, "\n")
## Average Population: 630859470
cat("Average Agriculture:", average_agriculture, "\n")
## Average Agriculture: 230058634678
cat("Variance Population:", variance_population, "\n")
## Variance Population: 3.003832e+17
cat("Variance Agriculture:", variance_agriculture, "\n")
## Variance Agriculture: 6.508112e+22
cat("Standard Deviation Population:", sd_population, "\n")
## Standard Deviation Population: 548072219
cat("Standard Deviation Agriculture:", sd_agriculture, "\n")
## Standard Deviation Agriculture: 2.5511e+11
cat("Covariance:", covariance, "\n")
## Covariance: 1.017456e+20