library(WDI)
veri <- WDI(country = "AZ",indicator = c ("AG.LND.FRST.ZS","NY.GDP.MKTP.CD"))
str(veri)
## 'data.frame': 63 obs. of 6 variables:
## $ country : chr "Azerbaijan" "Azerbaijan" "Azerbaijan" "Azerbaijan" ...
## $ iso2c : chr "AZ" "AZ" "AZ" "AZ" ...
## $ iso3c : chr "AZE" "AZE" "AZE" "AZE" ...
## $ year : int 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
## $ AG.LND.FRST.ZS: num NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "label")= chr "Forest area (% of land area)"
## $ NY.GDP.MKTP.CD: num NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "label")= chr "GDP (current US$)"
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
library(dplyr)
library(explore)
countries <- unique(veri$country)
veri %>% describe_all()
## # A tibble: 6 × 8
## variable type na na_pct unique min mean max
## <chr> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 country chr 0 0 1 NA NA NA
## 2 iso2c chr 0 0 1 NA NA NA
## 3 iso3c chr 0 0 1 NA NA NA
## 4 year int 0 0 63 1960 1991 2.02e 3
## 5 AG.LND.FRST.ZS dbl 33 52.4 31 11.4 12.4 1.38e 1
## 6 NY.GDP.MKTP.CD dbl 30 47.6 34 444658672. 29420997153. 7.87e10
veri2 <- WDI_data$country
veri <- left_join(veri, veri2)
## Joining with `by = join_by(country, iso2c, iso3c)`
veri3 <- veri%>%filter(region!="aggregates")
veri3 %>% describe_all()
## # A tibble: 12 × 8
## variable type na na_pct unique min mean max
## <chr> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 country chr 0 0 1 NA NA NA
## 2 iso2c chr 0 0 1 NA NA NA
## 3 iso3c chr 0 0 1 NA NA NA
## 4 year int 0 0 63 1960 1991 2.02e 3
## 5 AG.LND.FRST.ZS dbl 33 52.4 31 11.4 12.4 1.38e 1
## 6 NY.GDP.MKTP.CD dbl 30 47.6 34 444658672. 29420997153. 7.87e10
## 7 region chr 0 0 1 NA NA NA
## 8 capital chr 0 0 1 NA NA NA
## 9 longitude chr 0 0 1 NA NA NA
## 10 latitude chr 0 0 1 NA NA NA
## 11 income chr 0 0 1 NA NA NA
## 12 lending chr 0 0 1 NA NA NA
veri3 <- veri3 %>% filter(year >= 1992)
veri3 %>% describe_all()
## # A tibble: 12 × 8
## variable type na na_pct unique min mean max
## <chr> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 country chr 0 0 1 NA NA NA
## 2 iso2c chr 0 0 1 NA NA NA
## 3 iso3c chr 0 0 1 NA NA NA
## 4 year int 0 0 31 1992 2007 2.02e 3
## 5 AG.LND.FRST.ZS dbl 1 3.2 31 11.4 12.4 1.38e 1
## 6 NY.GDP.MKTP.CD dbl 0 0 31 444658672. 30860130889. 7.87e10
## 7 region chr 0 0 1 NA NA NA
## 8 capital chr 0 0 1 NA NA NA
## 9 longitude chr 0 0 1 NA NA NA
## 10 latitude chr 0 0 1 NA NA NA
## 11 income chr 0 0 1 NA NA NA
## 12 lending chr 0 0 1 NA NA NA
bos_sayi <- veri3 %>% group_by(country) %>% summarise(sayi=sum(is.na("NY.GDP.MKTP.CD")))
veri3 <-left_join(veri3, bos_sayi)
## Joining with `by = join_by(country)`
veri3 <- veri3 %>% filter(sayi<1)
veri3 %>% describe_all()
## # A tibble: 13 × 8
## variable type na na_pct unique min mean max
## <chr> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 country chr 0 0 1 NA NA NA
## 2 iso2c chr 0 0 1 NA NA NA
## 3 iso3c chr 0 0 1 NA NA NA
## 4 year int 0 0 31 1992 2007 2.02e 3
## 5 AG.LND.FRST.ZS dbl 1 3.2 31 11.4 12.4 1.38e 1
## 6 NY.GDP.MKTP.CD dbl 0 0 31 444658672. 30860130889. 7.87e10
## 7 region chr 0 0 1 NA NA NA
## 8 capital chr 0 0 1 NA NA NA
## 9 longitude chr 0 0 1 NA NA NA
## 10 latitude chr 0 0 1 NA NA NA
## 11 income chr 0 0 1 NA NA NA
## 12 lending chr 0 0 1 NA NA NA
## 13 sayi int 0 0 1 0 0 0