Challenge 3

For this assignment, I use Fiscal Decentralization and Federalism dataset. The Fiscal decentralization database is a panel of data on indices of fiscal decentralization every ten years from 1965, 1975, 1985, and 1995 for about 100 countries.

setting up the correct working directory

setwd("~/NYU/R Programming")

make sure that we have the correct package

library(readr)
## Warning: package 'readr' was built under R version 4.0.5
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.0.5
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(knitr)
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Getting to know the data:

Import the following the data set (https://datacatalog.worldbank.org/search/dataset/0041166/Fiscal-Decentralization-and-Federalism)

data <- decme.clean_0 <- read.csv("~/NYU/R Programming/decme-clean_0.csv")

What is the average of urban population grouped by country

popurb <- data %>%
 group_by (country) %>%
 summarise(average_urban_population = mean(popurb))

popurb
## # A tibble: 130 x 2
##    country  average_urban_population
##    <chr>                       <dbl>
##  1 Afghanis                    2384 
##  2 Albania                      910.
##  3 Algeria                     9329.
##  4 Angola                      1768.
##  5 Argentin                   23605.
##  6 Australi                   12490 
##  7 Austria                     4930.
##  8 Banglade                   13458 
##  9 Belgium                     9346.
## 10 Benin                       1064 
## # ... with 120 more rows

what are the top 5 countries that has highest average for urban population?

attach(popurb)
newdata <- popurb[order(-average_urban_population),]
newdata
## # A tibble: 130 x 2
##    country  average_urban_population
##    <chr>                       <dbl>
##  1 China                     224505.
##  2 United S                  172318.
##  3 India                     165557.
##  4 USSR                      161679.
##  5 Japan                      85314 
##  6 Brazil                     82266.
##  7 United K                   49900.
##  8 Mexico                     45053.
##  9 Indonesi                   39305.
## 10 France                     38794.
## # ... with 120 more rows
newdata<-newdata[1:5,]
newdata
## # A tibble: 5 x 2
##   country  average_urban_population
##   <chr>                       <dbl>
## 1 China                     224505.
## 2 United S                  172318.
## 3 India                     165557.
## 4 USSR                      161679.
## 5 Japan                      85314

letโ€™s compute the percentage of urban population compared to total population

data$pct <- data$popurb/data$poptot*100

data$pct
##   [1]   9.352995  13.265704  16.908878  19.895235  12.509653  17.801731
##   [7]  24.184884  30.960627  31.229947  32.755776  34.773801  38.715770
##  [13]  48.611111  65.346535  76.868557  83.846154  76.111834  80.728627
##  [19]  84.784689  88.403129  82.963223  85.920863  85.461288  84.687396
##  [25]  65.162976  65.193297  64.726118  64.329459   2.209773   3.206522
##  [31]   5.230964   7.521118  93.353762  94.916292  96.012986  96.996431
##  [37]  12.510288  21.897571  30.803683  38.362069   5.195072   6.352325
##  [43]  11.397385  15.880941   6.223419   9.793424  16.990531  21.766878
##  [49]  45.872455  57.521211  64.520089  68.043299  39.994664  41.500315
##  [55]  50.517388  59.455085  50.372847  61.153587  70.655357  78.372849
##  [61]   3.825137  11.989460  25.161887  47.693351  26.714371  33.689840
##  [67]  36.248562  39.111922  72.893587  75.608600  76.351091  76.702569
##  [73]  52.808605  55.750118  58.338433  67.702646  71.698553  78.388314
##  [79]  82.626380  84.377199  17.566454  17.400691  22.995211  29.691868
##  [85]  23.105810  32.079941  37.477222  43.287995  16.405984  26.876578
##  [91]  35.727182  44.735245  32.313231  34.830684  47.502601  58.414682
##  [97]  53.514369  60.706828  67.023595  71.763271  38.124157  41.361789
## [103]  44.700984  46.792347  57.544493  64.227380  71.062778  74.580445
## [109]  45.194971  58.094075  68.067336  68.545918  77.950003  81.169998
## [115]  83.979999         NA  77.049180  81.818182  84.317560  85.071770
## [121]  35.076195  45.344691  54.611041  61.804934  37.624759  40.329629
## [127]  47.969114  56.561409  37.188958  42.362820  51.214419  60.270506
## [133]  40.680544  43.451507  43.887191  44.356315   9.763682  12.254667
## [139]  14.809330  17.100722  61.280260  69.572424  74.213755  76.455722
## [145]   7.587503   9.500016  11.674362  15.415327  43.930762  58.267884
## [151]  59.812322  64.389193  67.143853  73.020361  73.650535  74.669080
## [157]  23.434343  39.966273  59.153176  75.951718  87.083717  88.741863
## [163]  88.927550  89.222748  26.069741  30.053922  32.297224  35.888719
## [169]  11.725917  16.293083  22.277923  29.204530  13.613861  16.970803
## [175]  22.550336  28.982898  14.312977  15.948963  18.472064  21.731123
## [181]  47.526605  55.300099  58.445742  59.243016  33.968463  36.706547
## [187]  37.774619  38.602646  86.430119  89.695177  92.925220 100.000000
## [193]  25.701559  32.117998  37.744864  47.506190  17.571808  21.727642
## [199]  26.297747  32.592593  45.476941  52.791493  59.542490  62.999902
## [205]  15.790211  19.355623  26.149810  35.601933  18.798886  21.310099
## [211]  24.321019  26.789480  48.713491  53.635505  56.278153  57.910779
## [217]  38.012407  45.819338  53.382890  58.990116  50.651956  61.379310
## [223]  68.766730  74.521025  80.920796  86.657019  89.794472  90.711462
## [229]  61.843721  65.644559  66.826373  66.625972  37.613636  44.113264
## [235]  49.194602  53.699960  46.279307  55.307692  64.055300  71.363795
## [241]  66.994670  75.686848  76.675191  78.065226   8.606011  12.917546
## [247]  19.772533  28.593474  10.828855  10.298676  12.593094  14.155480
## [253]  32.351910  48.034353  64.887517  78.239783  77.707006  83.813307
## [259]  93.837209  96.923077   8.305921  11.375661  15.609349  20.699770
## [265]  49.465365  67.003975  79.422789  87.504154  22.092050  30.329397
## [271]  39.261286  42.009569  27.356747  60.874898  76.650819  85.343266
## [277]  19.858474  22.039256  21.101832  22.070312   6.334372  10.783488
## [283]  16.448231  23.987539  31.869699  37.711644  44.634360  52.106601
## [289]  12.379079  16.089014  20.804109  26.455180  54.934180  62.760791
## [295]  69.552773  73.433540  12.596505  16.210083  21.010739  26.830249
## [301]  20.970745  23.921685  24.022480  25.848357  42.138940  48.652384
## [307]  55.055003  60.832313   4.569441   8.620690  19.438493  33.845181
## [313]   9.032847  20.277170  34.994337  51.223701  37.051793  43.497758
## [319]  41.338583  40.484740   4.880503   7.665904  10.396245  18.417787
## [325]  29.909493  37.657040  45.908018  53.739805  16.690442  20.666667
## [331]  24.702886  28.580687   6.803279  10.626703  14.255448  18.196721
## [337]  17.009554  23.400112  30.730163  39.548468  42.730790  48.879103
## [343]  51.674501  54.450972  85.559714  88.405479  88.531604  88.996701
## [349]  57.561107  68.180684  71.297857  73.718096   3.506023   4.993358
## [355]   7.756166  10.309327  78.881279  82.776516  83.646443  85.344593
## [361]   6.339144  19.659091  46.526316  75.591647  23.543617  26.404314
## [367]  29.805735  34.297290  44.435917  49.042368  52.099677  54.998100
## [373]  51.870585  61.460326  66.868459  70.865205  31.604746  35.559172
## [379]  43.048950  54.042192   5.167598  11.945768  14.003486  16.042781
## [385]  50.012700  55.405326  59.979034  63.654494  45.092025  56.697743
## [391]  57.645817  59.121363  23.923759  27.658639  37.166801  56.412338
## [397]  36.232580  38.962016  44.970906  52.423364  37.699664  46.170864
## [403]  51.348735  54.898597  52.755466  59.693481  64.448556  66.578827
## [409]   2.764687   3.991788   5.004955   5.666477  38.764865  58.350572
## [415]  72.651803  82.775434  13.043126  18.942043  22.368237  31.348386
## [421]  32.680640  34.186434  37.474510  43.757503 100.000000 100.000000
## [427] 100.000000 100.000000  13.956361  21.392016  26.932738  33.261700
## [433]  38.911023  40.412621  42.713357  45.052037  19.445320  21.262700
## [439]  23.201466  25.631021  77.062322  82.729159  83.101796  83.102273
## [445]  40.000000  45.065878  48.437049  52.218310   8.998200  15.558313
## [451]  19.917905  22.230505  11.309158  16.323851  26.503635  30.714286
## [457]  12.861852  15.097077  17.866500  19.950520  63.727679  62.944664
## [463]  66.213922  71.790808  39.503240  49.841214  53.845105  61.947892
## [469]  34.114475  41.601499  52.452081  69.244076   5.254223  10.075472
## [475]  17.598163  26.857143   6.474463   8.343021   9.914495  12.537629
## [481]  81.136279  83.032874  87.171818  90.055935  71.879317  73.652488
## [487]  74.494635  76.148978  66.670332  75.749961  81.934882  85.529207
## [493]  16.421064  18.782011  19.561588  19.443316  11.021735  16.607066
## [499]  22.097340  23.572094  32.467806  41.856384  47.990971  51.301944
## [505]  47.246874  47.973957  48.333225  49.332800  26.102062  29.504107
## [511]  27.945309  28.715792  23.325955  34.827515  39.656786  39.155377
## [517]  14.420063  19.566987  25.214592  31.781805

How many countries has average of more than 50% of urban population compared to total population?

popurb_pct <- data %>%
 group_by (country) %>%
 summarise(average_pct = mean(pct))

popurb_pct
## # A tibble: 130 x 2
##    country  average_pct
##    <chr>          <dbl>
##  1 Afghanis        14.9
##  2 Albania         34.4
##  3 Algeria         45.6
##  4 Angola          21.4
##  5 Argentin        82.5
##  6 Australi        84.8
##  7 Austria         64.9
##  8 Banglade        13.7
##  9 Belgium         95.3
## 10 Benin           25.9
## # ... with 120 more rows
length(popurb_pct$average_pct > 50)
## [1] 130

Is there any country in the data set that do not have complete data in all the 4 years - from 1965, 1975, 1985, and 1995?

country_data <- data %>%
 count (country)
country_data$n != 4
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

Draw a graph for the top 5 countries that has highest average for urban population?

ggplot(data= newdata, aes(x=country, y = average_urban_population)) +
  geom_bar(stat ="identity", fill ="blue") +theme (axis.text.x = element_text(angle=35,vjust=0.5, hjust=0.5)) + xlab ("Countries")+ ggtitle ("Top 5 countries that has highest average for urban population")+ ylab("Average urban population")