WDI Paketi ve Veri Yükleme

library(WDI)
library(tidyverse)
db <- WDI(country = "all", indicator = c("gsyh" = "NY.GDP.MKTP.CD", "nüfus" ="SP.POP.TOTL", "kadın" = "SL.UEM.TOTL.FE.ZS", "erkek" = "SL.UEM.TOTL.MA.ZS"), start = 2005, end = 2020)
library(explore)
describe_all(db)
## # A tibble: 8 × 8
##   variable type     na na_pct unique         min     mean      max
##   <chr>    <chr> <int>  <dbl>  <int>       <dbl>    <dbl>    <dbl>
## 1 country  chr       0    0      266       NA    NA       NA      
## 2 iso2c    chr       0    0      266       NA    NA       NA      
## 3 iso3c    chr       0    0      262       NA    NA       NA      
## 4 year     int       0    0       16     2005     2.01e 3  2.02e 3
## 5 gsyh     dbl     139    3.3   4087 22909980.    2.27e12  8.78e13
## 6 nüfus    dbl      16    0.4   4207     9912     2.91e 8  7.82e 9
## 7 kadın    dbl     496   11.7   3472        0.15  9.04e 0  4.25e 1
## 8 erkek    dbl     496   11.7   3425        0.04  7.18e 0  3.68e 1
db1 <- WDI_data$country
db <- left_join(db,db1)
## Joining with `by = join_by(country, iso2c, iso3c)`
db <- db %>% filter(income != "Aggregates")
describe_all(db)
## # A tibble: 14 × 8
##    variable  type     na na_pct unique         min     mean      max
##    <chr>     <chr> <int>  <dbl>  <int>       <dbl>    <dbl>    <dbl>
##  1 country   chr       0    0      215       NA    NA       NA      
##  2 iso2c     chr       0    0      215       NA    NA       NA      
##  3 iso3c     chr       0    0      215       NA    NA       NA      
##  4 year      int       0    0       16     2005     2.01e 3  2.02e 3
##  5 gsyh      dbl     123    3.6   3318 22909980.    3.42e11  2.14e13
##  6 nüfus     dbl       0    0     3438     9912     3.29e 7  1.41e 9
##  7 kadın     dbl     480   14     2720        0.15  9.42e 0  4.25e 1
##  8 erkek     dbl     480   14     2675        0.04  7.46e 0  3.68e 1
##  9 region    chr       0    0        7       NA    NA       NA      
## 10 capital   chr       0    0      210       NA    NA       NA      
## 11 longitude chr       0    0      210       NA    NA       NA      
## 12 latitude  chr       0    0      210       NA    NA       NA      
## 13 income    chr       0    0        5       NA    NA       NA      
## 14 lending   chr       0    0        4       NA    NA       NA
sondb <- db %>% group_by(country) %>% summarise(sayi = sum(is.na(kadın)))
db <- left_join(db, sondb)
## Joining with `by = join_by(country)`
db <- db %>% filter(sayi == 0)
describe_all(db)
## # A tibble: 15 × 8
##    variable  type     na na_pct unique          min     mean      max
##    <chr>     <chr> <int>  <dbl>  <int>        <dbl>    <dbl>    <dbl>
##  1 country   chr       0    0      185        NA    NA       NA      
##  2 iso2c     chr       0    0      185        NA    NA       NA      
##  3 iso3c     chr       0    0      185        NA    NA       NA      
##  4 year      int       0    0       16      2005     2.01e 3  2.02e 3
##  5 gsyh      dbl      57    1.9   2904 136450662.    3.90e11  2.14e13
##  6 nüfus     dbl       0    0     2960    104632     3.82e 7  1.41e 9
##  7 kadın     dbl       0    0     2719         0.15  9.42e 0  4.25e 1
##  8 erkek     dbl       0    0     2674         0.04  7.46e 0  3.68e 1
##  9 region    chr       0    0        7        NA    NA       NA      
## 10 capital   chr       0    0      181        NA    NA       NA      
## 11 longitude chr       0    0      184        NA    NA       NA      
## 12 latitude  chr       0    0      184        NA    NA       NA      
## 13 income    chr       0    0        5        NA    NA       NA      
## 14 lending   chr       0    0        4        NA    NA       NA      
## 15 sayi      int       0    0        1         0     0        0
sondb2<- db %>% group_by(country) %>% summarise(sayi2= sum(is.na(gsyh)))
db <- left_join(db, sondb2)
## Joining with `by = join_by(country)`
db <- db %>% filter(sayi2 == 0)
describe_all(db)
## # A tibble: 16 × 8
##    variable  type     na na_pct unique          min     mean      max
##    <chr>     <chr> <int>  <dbl>  <int>        <dbl>    <dbl>    <dbl>
##  1 country   chr       0      0    178        NA    NA       NA      
##  2 iso2c     chr       0      0    178        NA    NA       NA      
##  3 iso3c     chr       0      0    178        NA    NA       NA      
##  4 year      int       0      0     16      2005     2.01e 3  2.02e 3
##  5 gsyh      dbl       0      0   2848 136450662.    3.96e11  2.14e13
##  6 nüfus     dbl       0      0   2848    104632     3.91e 7  1.41e 9
##  7 kadın     dbl       0      0   2622         0.15  9.31e 0  4.25e 1
##  8 erkek     dbl       0      0   2582         0.04  7.4 e 0  3.68e 1
##  9 region    chr       0      0      7        NA    NA       NA      
## 10 capital   chr       0      0    175        NA    NA       NA      
## 11 longitude chr       0      0    178        NA    NA       NA      
## 12 latitude  chr       0      0    178        NA    NA       NA      
## 13 income    chr       0      0      4        NA    NA       NA      
## 14 lending   chr       0      0      4        NA    NA       NA      
## 15 sayi      int       0      0      1         0     0        0      
## 16 sayi2     int       0      0      1         0     0        0

Ülke Analizi

Australia, China, Russian Federation

Australia

db_AU <- db %>% filter(country == "Australia")
library(ggplot2)
ggplot(db_AU,aes(x=year, y=erkek)) + geom_line(colour = "blue") + theme_grey()

China

db_CN <- db %>% filter(country == "China")
ggplot(db_CN, aes(x= year, y= kadın)) + geom_smooth(colour = "red") + theme_dark()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Russian Federation

db_RU <- db %>% filter(country == "Russian Federation")
db_RU <- db_RU %>% mutate(kbgsyh = gsyh/nüfus)
ggplot(db_RU, aes(x = year, y = kbgsyh)) + geom_point(colour= "orange") +theme_linedraw()

2020 yılı analizi

db_2020 <- db %>% filter(year == 2020)
ggplot(db_2020,aes(nüfus)) + geom_histogram(binwidth = 39000000, colour = "purple", fill= "gray") + theme_classic()

## dünya analizi

describe_all(db)
## # A tibble: 16 × 8
##    variable  type     na na_pct unique          min     mean      max
##    <chr>     <chr> <int>  <dbl>  <int>        <dbl>    <dbl>    <dbl>
##  1 country   chr       0      0    178        NA    NA       NA      
##  2 iso2c     chr       0      0    178        NA    NA       NA      
##  3 iso3c     chr       0      0    178        NA    NA       NA      
##  4 year      int       0      0     16      2005     2.01e 3  2.02e 3
##  5 gsyh      dbl       0      0   2848 136450662.    3.96e11  2.14e13
##  6 nüfus     dbl       0      0   2848    104632     3.91e 7  1.41e 9
##  7 kadın     dbl       0      0   2622         0.15  9.31e 0  4.25e 1
##  8 erkek     dbl       0      0   2582         0.04  7.4 e 0  3.68e 1
##  9 region    chr       0      0      7        NA    NA       NA      
## 10 capital   chr       0      0    175        NA    NA       NA      
## 11 longitude chr       0      0    178        NA    NA       NA      
## 12 latitude  chr       0      0    178        NA    NA       NA      
## 13 income    chr       0      0      4        NA    NA       NA      
## 14 lending   chr       0      0      4        NA    NA       NA      
## 15 sayi      int       0      0      1         0     0        0      
## 16 sayi2     int       0      0      1         0     0        0
dunya <- db %>% group_by(year) %>%
  summarise(dunya_gsyh = sum(gsyh), 
            dunya_nufusu = sum(nüfus),
            dunya_kisi_uretim = dunya_gsyh/dunya_nufusu)
ggplot(dunya, aes(x= year, y=dunya_kisi_uretim)) + geom_line(colour= "green") + theme_dark()

db<- left_join(db, dunya, by = "year")
db<- db%>% mutate(
  ulke_oranı = gsyh/dunya_gsyh * 100,
  nufus_oranı = nüfus/dunya_nufusu * 100,
  verim = ulke_oranı/nufus_oranı
  
)
db<- db %>% 
  arrange(country,year)%>%
  mutate(kümülatif_değişim = (verim/first(verim) - 1) * 100)
db_FR <- db %>% filter(country=="France")
ggplot(db_FR, aes(x = year, y = verim)) + geom_line() + labs(
  title = "verim in France after 2005",
  x = "year",
  y = "verim"
)

db_SA <- db %>% filter(country=="Saudi Arabia")
ggplot(db_SA, aes(x = year, y = verim)) +
  geom_line() +
  labs(title = " verim in Saudi Arabia after 2005",
       x = "year",
       y = "verim")

db%>% filter(country %in% c("Luxembourg","Singapore")) %>%
  ggplot(aes(x = year,
             y = gsyh,
             col = country)) + geom_line()