library(WDI)

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)

1 Birth date, crude (per 1000 people)

data <- WDI(country = "all", indicator = "SP.DYN.CBRT.IN", start = 1960, end = 2023)
head(data)

##                       country iso2c iso3c year SP.DYN.CBRT.IN
## 1 Africa Eastern and Southern    ZH   AFE 2023       32.95529
## 2 Africa Eastern and Southern    ZH   AFE 2022       33.32043
## 3 Africa Eastern and Southern    ZH   AFE 2021       33.68134
## 4 Africa Eastern and Southern    ZH   AFE 2020       34.01047
## 5 Africa Eastern and Southern    ZH   AFE 2019       34.33588
## 6 Africa Eastern and Southern    ZH   AFE 2018       34.58361

Dünya çapında doğum oranı tablosu

data %>%
  filter(!is.na(SP.DYN.CBRT.IN), country == "World") %>%
  ggplot(aes(x = year, y = SP.DYN.CBRT.IN)) +
  geom_line(color = "blue") +
  labs(title = "Crude Birth Rate (World)", x = "Year", y = "Births per 1000 people")

Kazakistan doğum oranı tablosu

data %>%
  filter(!is.na(SP.DYN.CBRT.IN), country == "Kazakhstan") %>%
  ggplot(aes(year, SP.DYN.CBRT.IN)) +
  geom_line(color = "red") +
  labs(title = "Crude Birth Rate — Kazakhstan", y = "Births per 1000", x = "Year")

Basit ekonometrik model

df <- data %>% filter(!is.na(SP.DYN.CBRT.IN), country == "World")

model <- lm(SP.DYN.CBRT.IN ~ year, data = df)
summary(model)

## 
## Call:
## lm(formula = SP.DYN.CBRT.IN ~ year, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.95355 -0.65342  0.05802  0.71839  2.73795 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 601.152048  13.259593   45.34   <2e-16 ***
## year         -0.289120   0.006658  -43.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9839 on 62 degrees of freedom
## Multiple R-squared:  0.9682, Adjusted R-squared:  0.9677 
## F-statistic:  1886 on 1 and 62 DF,  p-value: < 2.2e-16

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(sandwich)

Anlamlılık testi

coeftest(model, vcov = vcovHC(model, type = "HC1"))

## 
## t test of coefficients:
## 
##                Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept) 601.1520478  17.1324141  35.089 < 2.2e-16 ***
## year         -0.2891200   0.0085613 -33.771 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2 Children out of school, primary, male

data_male <- WDI(country = "all", indicator = "SE.PRM.UNER.MA", start = 1960, end = 2023)
head(data_male)

##                       country iso2c iso3c year SE.PRM.UNER.MA
## 1 Africa Eastern and Southern    ZH   AFE 2023             NA
## 2 Africa Eastern and Southern    ZH   AFE 2022             NA
## 3 Africa Eastern and Southern    ZH   AFE 2021             NA
## 4 Africa Eastern and Southern    ZH   AFE 2020             NA
## 5 Africa Eastern and Southern    ZH   AFE 2019             NA
## 6 Africa Eastern and Southern    ZH   AFE 2018             NA

summary(data_male$SE.PRM.UNER.MA)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##        1     4478    25693   491035   150487 38300000    13537

DÜNYA PROGRAMI

data_male %>%
  group_by(year) %>%
  summarise(
    world_male = mean(SE.PRM.UNER.MA, na.rm = TRUE)
  ) %>%
  ggplot(aes(x = year, y = world_male)) +
  geom_line() +
  labs(
    title = "Primary School Under-enrollment, Male (%) — World",
    x = "Year",
    y = "Under-enrollment (%)"
  )

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

Kazakistan tablosu

data_male %>%
  filter(country == "Kazakhstan", !is.na(SE.PRM.UNER.MA)) %>%
  ggplot(aes(x = year, y = SE.PRM.UNER.MA)) +
  geom_line(color = "blue") +
  labs(
    title = "Primary School Under-enrollment (Male) — Kazakhstan",
    x = "Year",
    y = "Under-enrollment (%)"
  )

BASIT REGRESYON

df_male <- data_male %>% 
  filter(! is.na(SE.PRM.UNER.MA))

model_male <- lm(SE.PRM.UNER.MA ~ year, data = df_male)
summary(model_male)

## 
## Call:
## lm(formula = SE.PRM.UNER.MA ~ year, data = df_male)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
##  -619152  -533240  -449138  -260305 37841148 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -13485282    6151727  -2.192   0.0284 *
## year             6972       3069   2.272   0.0231 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2518000 on 3485 degrees of freedom
## Multiple R-squared:  0.001479,   Adjusted R-squared:  0.001192 
## F-statistic: 5.162 on 1 and 3485 DF,  p-value: 0.02315

Sağlam standart hatalar

coeftest(model_male, vcov = vcovHC(model_male, type = "HC1"))

## 
## t test of coefficients:
## 
##                Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept) -13485281.5   3732615.8 -3.6128 0.0003072 ***
## year             6972.1      1872.7  3.7229 0.0002001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Breusch-Pagan testi

bptest(model_male)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_male
## BP = 3.2626, df = 1, p-value = 0.07088

Dubrin-Watson

dwtest(model_male)

## 
##  Durbin-Watson test
## 
## data:  model_male
## DW = 0.070347, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

3 Forest area (sq.km)

data_forest <- WDI(country = "all", indicator = "AG.LND.FRST.K2", start = 1960, end = 2023)
head(data_forest)

##                       country iso2c iso3c year AG.LND.FRST.K2
## 1 Africa Eastern and Southern    ZH   AFE 2023             NA
## 2 Africa Eastern and Southern    ZH   AFE 2022        4414515
## 3 Africa Eastern and Southern    ZH   AFE 2021        4446876
## 4 Africa Eastern and Southern    ZH   AFE 2020        4479395
## 5 Africa Eastern and Southern    ZH   AFE 2019        4511676
## 6 Africa Eastern and Southern    ZH   AFE 2018        4544315

summary(data_forest$AG.LND.FRST.K2)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##        0     3337    36646  1503664   266100 41699049     8399

# DÜNYA PROGRAMI

data_forest %>%
  filter(country == "World", !is.na(AG.LND.FRST.K2)) %>%
  ggplot(aes(year, AG.LND.FRST.K2)) +
  geom_line(color = "forestgreen") +
  labs(title = "Forest Area (sq. km) — World", x = "Year", y = "sq. km")

Kazakistan forest area tablosu

data_forest %>%
  filter(country == "Kazakhstan", !is.na(AG.LND.FRST.K2)) %>%
  ggplot(aes(x = year, y = AG.LND.FRST.K2)) +
  geom_line(color = "darkgreen") +
  labs(
    title = "Forest Area — Kazakhstan",
    x = "Year",
    y = "Square kilometers"
  )

BASIT REGRESYON

df_forest <- data_forest %>% filter(!is.na(AG.LND.FRST.K2), country == "World")

model_forest <- lm(AG.LND.FRST.K2 ~ year, data = df_forest)
summary(model_forest)

## 
## Call:
## lm(formula = AG.LND.FRST.K2 ~ year, data = df_forest)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -151771  -99662   11216   88164  198376 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 118025100    4162495   28.35   <2e-16 ***
## year           -38397       2074  -18.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 103300 on 29 degrees of freedom
## Multiple R-squared:  0.922,  Adjusted R-squared:  0.9193 
## F-statistic: 342.8 on 1 and 29 DF,  p-value: < 2.2e-16

Anlamlılık testi

coeftest(model_forest, vcov = vcovHC(model_forest, type = "HC1"))

## 
## t test of coefficients:
## 
##                Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept) 118025099.9   3837390.5  30.757 < 2.2e-16 ***
## year           -38396.6      1909.5 -20.108 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

bptest(model_forest)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_forest
## BP = 2.4134, df = 1, p-value = 0.1203

dwtest(model_forest)

## 
##  Durbin-Watson test
## 
## data:  model_forest
## DW = 0.41032, p-value = 1.813e-09
## alternative hypothesis: true autocorrelation is greater than 0

4 Children out of school, primary, female

data_female <- WDI(country = "all", indicator = "SE.PRM.UNER.FE", start = 1960, end = 2023)
head(data_female)

##                       country iso2c iso3c year SE.PRM.UNER.FE
## 1 Africa Eastern and Southern    ZH   AFE 2023             NA
## 2 Africa Eastern and Southern    ZH   AFE 2022             NA
## 3 Africa Eastern and Southern    ZH   AFE 2021             NA
## 4 Africa Eastern and Southern    ZH   AFE 2020             NA
## 5 Africa Eastern and Southern    ZH   AFE 2019             NA
## 6 Africa Eastern and Southern    ZH   AFE 2018             NA

summary(data_female$SE.PRM.UNER.FE)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##       10     3892    25780   534514   188644 41100000    13536

data_female %>%
  group_by(year) %>%
  summarise(
    world_female = mean(SE.PRM.UNER.FE, na.rm = TRUE)
  ) %>%
  ggplot(aes(x = year, y = world_female)) +
  geom_line() +
  labs(
    title = "Primary School Under-enrollment, Female (%) — World",
    x = "Year",
    y = "Under-enrollment (%)"
  )

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

Kazakistan tablosu

data_forest %>%
  filter(country == "Kazakhstan", !is.na(AG.LND.FRST.K2)) %>%
  ggplot(aes(x = year, y = AG.LND.FRST.K2)) +
  geom_line(color = "darkgreen") +
  labs(
    title = "Forest Area — Kazakhstan",
    x = "Year",
    y = "Square kilometers"
  )

BASIT REGRESYON

df_female <- data_female %>% filter(!is.na(SE.PRM.UNER.FE))

model_female <- lm(SE.PRM.UNER.FE ~ year, df_female)
summary(model_female)

## 
## Call:
## lm(formula = SE.PRM.UNER.FE ~ year, data = df_female)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
##  -602109  -551184  -489480  -321893 40582442 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6838530    6345333  -1.078    0.281
## year            3678       3165   1.162    0.245
## 
## Residual standard error: 2598000 on 3486 degrees of freedom
## Multiple R-squared:  0.0003872,  Adjusted R-squared:  0.0001004 
## F-statistic:  1.35 on 1 and 3486 DF,  p-value: 0.2453

Anlamlılık testi

coeftest(model_female, vcov = vcovHC(model_female, type = "HC1"))

## 
## t test of coefficients:
## 
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -6838530.4  3850684.6 -1.7759  0.07583 .
## year            3678.0     1930.2  1.9056  0.05679 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

bptest(model_female)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_female
## BP = 2.3805, df = 1, p-value = 0.1229

dwtest(model_female)

## 
##  Durbin-Watson test
## 
## data:  model_female
## DW = 0.085501, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

final projesi

Assel Ramazan

2025-12-19

1 Birth date, crude (per 1000 people)

Dünya çapında doğum oranı tablosu

Kazakistan doğum oranı tablosu

Basit ekonometrik model

Anlamlılık testi

2 Children out of school, primary, male

DÜNYA PROGRAMI

Kazakistan tablosu

BASIT REGRESYON

Sağlam standart hatalar

Breusch-Pagan testi

Dubrin-Watson

3 Forest area (sq.km)

# DÜNYA PROGRAMI

Kazakistan forest area tablosu

BASIT REGRESYON

Anlamlılık testi

4 Children out of school, primary, female

Kazakistan tablosu

BASIT REGRESYON

Anlamlılık testi