library(WDI)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
1 Birth date, crude (per 1000 people)
data <- WDI(country = "all", indicator = "SP.DYN.CBRT.IN", start = 1960, end = 2023)
head(data)
## country iso2c iso3c year SP.DYN.CBRT.IN
## 1 Africa Eastern and Southern ZH AFE 2023 32.95529
## 2 Africa Eastern and Southern ZH AFE 2022 33.32043
## 3 Africa Eastern and Southern ZH AFE 2021 33.68134
## 4 Africa Eastern and Southern ZH AFE 2020 34.01047
## 5 Africa Eastern and Southern ZH AFE 2019 34.33588
## 6 Africa Eastern and Southern ZH AFE 2018 34.58361
Dünya çapında doğum oranı tablosu
data %>%
filter(!is.na(SP.DYN.CBRT.IN), country == "World") %>%
ggplot(aes(x = year, y = SP.DYN.CBRT.IN)) +
geom_line(color = "blue") +
labs(title = "Crude Birth Rate (World)", x = "Year", y = "Births per 1000 people")

Kazakistan doğum oranı tablosu
data %>%
filter(!is.na(SP.DYN.CBRT.IN), country == "Kazakhstan") %>%
ggplot(aes(year, SP.DYN.CBRT.IN)) +
geom_line(color = "red") +
labs(title = "Crude Birth Rate — Kazakhstan", y = "Births per 1000", x = "Year")

Basit ekonometrik model
df <- data %>% filter(!is.na(SP.DYN.CBRT.IN), country == "World")
model <- lm(SP.DYN.CBRT.IN ~ year, data = df)
summary(model)
##
## Call:
## lm(formula = SP.DYN.CBRT.IN ~ year, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.95355 -0.65342 0.05802 0.71839 2.73795
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 601.152048 13.259593 45.34 <2e-16 ***
## year -0.289120 0.006658 -43.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9839 on 62 degrees of freedom
## Multiple R-squared: 0.9682, Adjusted R-squared: 0.9677
## F-statistic: 1886 on 1 and 62 DF, p-value: < 2.2e-16
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
Anlamlılık testi
coeftest(model, vcov = vcovHC(model, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 601.1520478 17.1324141 35.089 < 2.2e-16 ***
## year -0.2891200 0.0085613 -33.771 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
2 Children out of school, primary, male
data_male <- WDI(country = "all", indicator = "SE.PRM.UNER.MA", start = 1960, end = 2023)
head(data_male)
## country iso2c iso3c year SE.PRM.UNER.MA
## 1 Africa Eastern and Southern ZH AFE 2023 NA
## 2 Africa Eastern and Southern ZH AFE 2022 NA
## 3 Africa Eastern and Southern ZH AFE 2021 NA
## 4 Africa Eastern and Southern ZH AFE 2020 NA
## 5 Africa Eastern and Southern ZH AFE 2019 NA
## 6 Africa Eastern and Southern ZH AFE 2018 NA
summary(data_male$SE.PRM.UNER.MA)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1 4478 25693 491035 150487 38300000 13537
DÜNYA PROGRAMI
data_male %>%
group_by(year) %>%
summarise(
world_male = mean(SE.PRM.UNER.MA, na.rm = TRUE)
) %>%
ggplot(aes(x = year, y = world_male)) +
geom_line() +
labs(
title = "Primary School Under-enrollment, Male (%) — World",
x = "Year",
y = "Under-enrollment (%)"
)
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

Kazakistan tablosu
data_male %>%
filter(country == "Kazakhstan", !is.na(SE.PRM.UNER.MA)) %>%
ggplot(aes(x = year, y = SE.PRM.UNER.MA)) +
geom_line(color = "blue") +
labs(
title = "Primary School Under-enrollment (Male) — Kazakhstan",
x = "Year",
y = "Under-enrollment (%)"
)

BASIT REGRESYON
df_male <- data_male %>%
filter(! is.na(SE.PRM.UNER.MA))
model_male <- lm(SE.PRM.UNER.MA ~ year, data = df_male)
summary(model_male)
##
## Call:
## lm(formula = SE.PRM.UNER.MA ~ year, data = df_male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -619152 -533240 -449138 -260305 37841148
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13485282 6151727 -2.192 0.0284 *
## year 6972 3069 2.272 0.0231 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2518000 on 3485 degrees of freedom
## Multiple R-squared: 0.001479, Adjusted R-squared: 0.001192
## F-statistic: 5.162 on 1 and 3485 DF, p-value: 0.02315
Sağlam standart hatalar
coeftest(model_male, vcov = vcovHC(model_male, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13485281.5 3732615.8 -3.6128 0.0003072 ***
## year 6972.1 1872.7 3.7229 0.0002001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Breusch-Pagan testi
bptest(model_male)
##
## studentized Breusch-Pagan test
##
## data: model_male
## BP = 3.2626, df = 1, p-value = 0.07088
Dubrin-Watson
dwtest(model_male)
##
## Durbin-Watson test
##
## data: model_male
## DW = 0.070347, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
3 Forest area (sq.km)
data_forest <- WDI(country = "all", indicator = "AG.LND.FRST.K2", start = 1960, end = 2023)
head(data_forest)
## country iso2c iso3c year AG.LND.FRST.K2
## 1 Africa Eastern and Southern ZH AFE 2023 NA
## 2 Africa Eastern and Southern ZH AFE 2022 4414515
## 3 Africa Eastern and Southern ZH AFE 2021 4446876
## 4 Africa Eastern and Southern ZH AFE 2020 4479395
## 5 Africa Eastern and Southern ZH AFE 2019 4511676
## 6 Africa Eastern and Southern ZH AFE 2018 4544315
summary(data_forest$AG.LND.FRST.K2)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 3337 36646 1503664 266100 41699049 8399
# DÜNYA PROGRAMI
data_forest %>%
filter(country == "World", !is.na(AG.LND.FRST.K2)) %>%
ggplot(aes(year, AG.LND.FRST.K2)) +
geom_line(color = "forestgreen") +
labs(title = "Forest Area (sq. km) — World", x = "Year", y = "sq. km")

Kazakistan forest area tablosu
data_forest %>%
filter(country == "Kazakhstan", !is.na(AG.LND.FRST.K2)) %>%
ggplot(aes(x = year, y = AG.LND.FRST.K2)) +
geom_line(color = "darkgreen") +
labs(
title = "Forest Area — Kazakhstan",
x = "Year",
y = "Square kilometers"
)

BASIT REGRESYON
df_forest <- data_forest %>% filter(!is.na(AG.LND.FRST.K2), country == "World")
model_forest <- lm(AG.LND.FRST.K2 ~ year, data = df_forest)
summary(model_forest)
##
## Call:
## lm(formula = AG.LND.FRST.K2 ~ year, data = df_forest)
##
## Residuals:
## Min 1Q Median 3Q Max
## -151771 -99662 11216 88164 198376
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 118025100 4162495 28.35 <2e-16 ***
## year -38397 2074 -18.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 103300 on 29 degrees of freedom
## Multiple R-squared: 0.922, Adjusted R-squared: 0.9193
## F-statistic: 342.8 on 1 and 29 DF, p-value: < 2.2e-16
Anlamlılık testi
coeftest(model_forest, vcov = vcovHC(model_forest, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 118025099.9 3837390.5 30.757 < 2.2e-16 ***
## year -38396.6 1909.5 -20.108 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bptest(model_forest)
##
## studentized Breusch-Pagan test
##
## data: model_forest
## BP = 2.4134, df = 1, p-value = 0.1203
dwtest(model_forest)
##
## Durbin-Watson test
##
## data: model_forest
## DW = 0.41032, p-value = 1.813e-09
## alternative hypothesis: true autocorrelation is greater than 0
4 Children out of school, primary, female
data_female <- WDI(country = "all", indicator = "SE.PRM.UNER.FE", start = 1960, end = 2023)
head(data_female)
## country iso2c iso3c year SE.PRM.UNER.FE
## 1 Africa Eastern and Southern ZH AFE 2023 NA
## 2 Africa Eastern and Southern ZH AFE 2022 NA
## 3 Africa Eastern and Southern ZH AFE 2021 NA
## 4 Africa Eastern and Southern ZH AFE 2020 NA
## 5 Africa Eastern and Southern ZH AFE 2019 NA
## 6 Africa Eastern and Southern ZH AFE 2018 NA
summary(data_female$SE.PRM.UNER.FE)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 10 3892 25780 534514 188644 41100000 13536
data_female %>%
group_by(year) %>%
summarise(
world_female = mean(SE.PRM.UNER.FE, na.rm = TRUE)
) %>%
ggplot(aes(x = year, y = world_female)) +
geom_line() +
labs(
title = "Primary School Under-enrollment, Female (%) — World",
x = "Year",
y = "Under-enrollment (%)"
)
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

Kazakistan tablosu
data_forest %>%
filter(country == "Kazakhstan", !is.na(AG.LND.FRST.K2)) %>%
ggplot(aes(x = year, y = AG.LND.FRST.K2)) +
geom_line(color = "darkgreen") +
labs(
title = "Forest Area — Kazakhstan",
x = "Year",
y = "Square kilometers"
)

BASIT REGRESYON
df_female <- data_female %>% filter(!is.na(SE.PRM.UNER.FE))
model_female <- lm(SE.PRM.UNER.FE ~ year, df_female)
summary(model_female)
##
## Call:
## lm(formula = SE.PRM.UNER.FE ~ year, data = df_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -602109 -551184 -489480 -321893 40582442
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6838530 6345333 -1.078 0.281
## year 3678 3165 1.162 0.245
##
## Residual standard error: 2598000 on 3486 degrees of freedom
## Multiple R-squared: 0.0003872, Adjusted R-squared: 0.0001004
## F-statistic: 1.35 on 1 and 3486 DF, p-value: 0.2453
Anlamlılık testi
coeftest(model_female, vcov = vcovHC(model_female, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6838530.4 3850684.6 -1.7759 0.07583 .
## year 3678.0 1930.2 1.9056 0.05679 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bptest(model_female)
##
## studentized Breusch-Pagan test
##
## data: model_female
## BP = 2.3805, df = 1, p-value = 0.1229
dwtest(model_female)
##
## Durbin-Watson test
##
## data: model_female
## DW = 0.085501, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0