For this assignment, I use Fiscal Decentralization and Federalism dataset. The Fiscal decentralization database is a panel of data on indices of fiscal decentralization every ten years from 1965, 1975, 1985, and 1995 for about 100 countries.
setting up the correct working directory
setwd("~/NYU/R Programming")
make sure that we have the correct package
library(readr)
## Warning: package 'readr' was built under R version 4.0.5
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.0.5
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(knitr)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
##
## src, summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
Getting to know the data:
Import the following the data set (https://datacatalog.worldbank.org/search/dataset/0041166/Fiscal-Decentralization-and-Federalism)
data <- decme.clean_0 <- read.csv("~/NYU/R Programming/decme-clean_0.csv")
What is the average of urban population grouped by country
popurb <- data %>%
group_by (country) %>%
summarise(average_urban_population = mean(popurb))
popurb
## # A tibble: 130 x 2
## country average_urban_population
## <chr> <dbl>
## 1 Afghanis 2384
## 2 Albania 910.
## 3 Algeria 9329.
## 4 Angola 1768.
## 5 Argentin 23605.
## 6 Australi 12490
## 7 Austria 4930.
## 8 Banglade 13458
## 9 Belgium 9346.
## 10 Benin 1064
## # ... with 120 more rows
what are the top 5 countries that has highest average for urban population?
attach(popurb)
newdata <- popurb[order(-average_urban_population),]
newdata
## # A tibble: 130 x 2
## country average_urban_population
## <chr> <dbl>
## 1 China 224505.
## 2 United S 172318.
## 3 India 165557.
## 4 USSR 161679.
## 5 Japan 85314
## 6 Brazil 82266.
## 7 United K 49900.
## 8 Mexico 45053.
## 9 Indonesi 39305.
## 10 France 38794.
## # ... with 120 more rows
newdata<-newdata[1:5,]
newdata
## # A tibble: 5 x 2
## country average_urban_population
## <chr> <dbl>
## 1 China 224505.
## 2 United S 172318.
## 3 India 165557.
## 4 USSR 161679.
## 5 Japan 85314
letโs compute the percentage of urban population compared to total population
data$pct <- data$popurb/data$poptot*100
data$pct
## [1] 9.352995 13.265704 16.908878 19.895235 12.509653 17.801731
## [7] 24.184884 30.960627 31.229947 32.755776 34.773801 38.715770
## [13] 48.611111 65.346535 76.868557 83.846154 76.111834 80.728627
## [19] 84.784689 88.403129 82.963223 85.920863 85.461288 84.687396
## [25] 65.162976 65.193297 64.726118 64.329459 2.209773 3.206522
## [31] 5.230964 7.521118 93.353762 94.916292 96.012986 96.996431
## [37] 12.510288 21.897571 30.803683 38.362069 5.195072 6.352325
## [43] 11.397385 15.880941 6.223419 9.793424 16.990531 21.766878
## [49] 45.872455 57.521211 64.520089 68.043299 39.994664 41.500315
## [55] 50.517388 59.455085 50.372847 61.153587 70.655357 78.372849
## [61] 3.825137 11.989460 25.161887 47.693351 26.714371 33.689840
## [67] 36.248562 39.111922 72.893587 75.608600 76.351091 76.702569
## [73] 52.808605 55.750118 58.338433 67.702646 71.698553 78.388314
## [79] 82.626380 84.377199 17.566454 17.400691 22.995211 29.691868
## [85] 23.105810 32.079941 37.477222 43.287995 16.405984 26.876578
## [91] 35.727182 44.735245 32.313231 34.830684 47.502601 58.414682
## [97] 53.514369 60.706828 67.023595 71.763271 38.124157 41.361789
## [103] 44.700984 46.792347 57.544493 64.227380 71.062778 74.580445
## [109] 45.194971 58.094075 68.067336 68.545918 77.950003 81.169998
## [115] 83.979999 NA 77.049180 81.818182 84.317560 85.071770
## [121] 35.076195 45.344691 54.611041 61.804934 37.624759 40.329629
## [127] 47.969114 56.561409 37.188958 42.362820 51.214419 60.270506
## [133] 40.680544 43.451507 43.887191 44.356315 9.763682 12.254667
## [139] 14.809330 17.100722 61.280260 69.572424 74.213755 76.455722
## [145] 7.587503 9.500016 11.674362 15.415327 43.930762 58.267884
## [151] 59.812322 64.389193 67.143853 73.020361 73.650535 74.669080
## [157] 23.434343 39.966273 59.153176 75.951718 87.083717 88.741863
## [163] 88.927550 89.222748 26.069741 30.053922 32.297224 35.888719
## [169] 11.725917 16.293083 22.277923 29.204530 13.613861 16.970803
## [175] 22.550336 28.982898 14.312977 15.948963 18.472064 21.731123
## [181] 47.526605 55.300099 58.445742 59.243016 33.968463 36.706547
## [187] 37.774619 38.602646 86.430119 89.695177 92.925220 100.000000
## [193] 25.701559 32.117998 37.744864 47.506190 17.571808 21.727642
## [199] 26.297747 32.592593 45.476941 52.791493 59.542490 62.999902
## [205] 15.790211 19.355623 26.149810 35.601933 18.798886 21.310099
## [211] 24.321019 26.789480 48.713491 53.635505 56.278153 57.910779
## [217] 38.012407 45.819338 53.382890 58.990116 50.651956 61.379310
## [223] 68.766730 74.521025 80.920796 86.657019 89.794472 90.711462
## [229] 61.843721 65.644559 66.826373 66.625972 37.613636 44.113264
## [235] 49.194602 53.699960 46.279307 55.307692 64.055300 71.363795
## [241] 66.994670 75.686848 76.675191 78.065226 8.606011 12.917546
## [247] 19.772533 28.593474 10.828855 10.298676 12.593094 14.155480
## [253] 32.351910 48.034353 64.887517 78.239783 77.707006 83.813307
## [259] 93.837209 96.923077 8.305921 11.375661 15.609349 20.699770
## [265] 49.465365 67.003975 79.422789 87.504154 22.092050 30.329397
## [271] 39.261286 42.009569 27.356747 60.874898 76.650819 85.343266
## [277] 19.858474 22.039256 21.101832 22.070312 6.334372 10.783488
## [283] 16.448231 23.987539 31.869699 37.711644 44.634360 52.106601
## [289] 12.379079 16.089014 20.804109 26.455180 54.934180 62.760791
## [295] 69.552773 73.433540 12.596505 16.210083 21.010739 26.830249
## [301] 20.970745 23.921685 24.022480 25.848357 42.138940 48.652384
## [307] 55.055003 60.832313 4.569441 8.620690 19.438493 33.845181
## [313] 9.032847 20.277170 34.994337 51.223701 37.051793 43.497758
## [319] 41.338583 40.484740 4.880503 7.665904 10.396245 18.417787
## [325] 29.909493 37.657040 45.908018 53.739805 16.690442 20.666667
## [331] 24.702886 28.580687 6.803279 10.626703 14.255448 18.196721
## [337] 17.009554 23.400112 30.730163 39.548468 42.730790 48.879103
## [343] 51.674501 54.450972 85.559714 88.405479 88.531604 88.996701
## [349] 57.561107 68.180684 71.297857 73.718096 3.506023 4.993358
## [355] 7.756166 10.309327 78.881279 82.776516 83.646443 85.344593
## [361] 6.339144 19.659091 46.526316 75.591647 23.543617 26.404314
## [367] 29.805735 34.297290 44.435917 49.042368 52.099677 54.998100
## [373] 51.870585 61.460326 66.868459 70.865205 31.604746 35.559172
## [379] 43.048950 54.042192 5.167598 11.945768 14.003486 16.042781
## [385] 50.012700 55.405326 59.979034 63.654494 45.092025 56.697743
## [391] 57.645817 59.121363 23.923759 27.658639 37.166801 56.412338
## [397] 36.232580 38.962016 44.970906 52.423364 37.699664 46.170864
## [403] 51.348735 54.898597 52.755466 59.693481 64.448556 66.578827
## [409] 2.764687 3.991788 5.004955 5.666477 38.764865 58.350572
## [415] 72.651803 82.775434 13.043126 18.942043 22.368237 31.348386
## [421] 32.680640 34.186434 37.474510 43.757503 100.000000 100.000000
## [427] 100.000000 100.000000 13.956361 21.392016 26.932738 33.261700
## [433] 38.911023 40.412621 42.713357 45.052037 19.445320 21.262700
## [439] 23.201466 25.631021 77.062322 82.729159 83.101796 83.102273
## [445] 40.000000 45.065878 48.437049 52.218310 8.998200 15.558313
## [451] 19.917905 22.230505 11.309158 16.323851 26.503635 30.714286
## [457] 12.861852 15.097077 17.866500 19.950520 63.727679 62.944664
## [463] 66.213922 71.790808 39.503240 49.841214 53.845105 61.947892
## [469] 34.114475 41.601499 52.452081 69.244076 5.254223 10.075472
## [475] 17.598163 26.857143 6.474463 8.343021 9.914495 12.537629
## [481] 81.136279 83.032874 87.171818 90.055935 71.879317 73.652488
## [487] 74.494635 76.148978 66.670332 75.749961 81.934882 85.529207
## [493] 16.421064 18.782011 19.561588 19.443316 11.021735 16.607066
## [499] 22.097340 23.572094 32.467806 41.856384 47.990971 51.301944
## [505] 47.246874 47.973957 48.333225 49.332800 26.102062 29.504107
## [511] 27.945309 28.715792 23.325955 34.827515 39.656786 39.155377
## [517] 14.420063 19.566987 25.214592 31.781805
How many countries has average of more than 50% of urban population compared to total population?
popurb_pct <- data %>%
group_by (country) %>%
summarise(average_pct = mean(pct))
popurb_pct
## # A tibble: 130 x 2
## country average_pct
## <chr> <dbl>
## 1 Afghanis 14.9
## 2 Albania 34.4
## 3 Algeria 45.6
## 4 Angola 21.4
## 5 Argentin 82.5
## 6 Australi 84.8
## 7 Austria 64.9
## 8 Banglade 13.7
## 9 Belgium 95.3
## 10 Benin 25.9
## # ... with 120 more rows
length(popurb_pct$average_pct > 50)
## [1] 130
Is there any country in the data set that do not have complete data in all the 4 years - from 1965, 1975, 1985, and 1995?
country_data <- data %>%
count (country)
country_data$n != 4
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Draw a graph for the top 5 countries that has highest average for urban population?
ggplot(data= newdata, aes(x=country, y = average_urban_population)) +
geom_bar(stat ="identity", fill ="blue") +theme (axis.text.x = element_text(angle=35,vjust=0.5, hjust=0.5)) + xlab ("Countries")+ ggtitle ("Top 5 countries that has highest average for urban population")+ ylab("Average urban population")