act_start <- us %>% 
  select(subagency_name, `_id`, activity_description, fiscal_year, activity_start_date) %>%  
  group_by(subagency_name, `_id`,activity_description,fiscal_year, activity_start_date) %>% 
  filter(is.na(activity_start_date)) %>% 
  summarise(count = n()) %>% 
  arrange(desc(count))
## `summarise()` has grouped output by 'subagency_name', '_id',
## 'activity_description', 'fiscal_year'. You can override using the `.groups`
## argument.
mytable <- xtabs(~fiscal_year + aid_type_group_name, data = us)

#to drop a column
us$subagency_acronym <- NULL
dim(us)
## [1] 8297   21
library(readr)
library(tidyverse)
library(DT)
## Warning: package 'DT' was built under R version 4.2.3
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.2.3
library(Kendall)
## Warning: package 'Kendall' was built under R version 4.2.3
library(vroom)
## Warning: package 'vroom' was built under R version 4.2.3
## 
## Attaching package: 'vroom'
## 
## The following objects are masked from 'package:readr':
## 
##     as.col_spec, col_character, col_date, col_datetime, col_double,
##     col_factor, col_guess, col_integer, col_logical, col_number,
##     col_skip, col_time, cols, cols_condense, cols_only, date_names,
##     date_names_lang, date_names_langs, default_locale, fwf_cols,
##     fwf_empty, fwf_positions, fwf_widths, locale, output_column,
##     problems, spec
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
us2 <- clean_names(us)
introduce(us)
## # A tibble: 1 × 9
##    rows columns discrete_columns continuous_columns all_missing_columns
##   <int>   <int>            <int>              <int>               <int>
## 1  8297      21               18                  3                   0
## # ℹ 4 more variables: total_missing_values <int>, complete_rows <int>,
## #   total_observations <int>, memory_usage <dbl>
plot_missing(data = us)

plot_bar(data = us)
## 7 columns ignored with more than 50 categories.
## channel_name: 365 categories
## dac_purpose_name: 92 categories
## funding_account_name: 58 categories
## activity_name: 1166 categories
## activity_start_date: 303 categories
## activity_end_date: 245 categories
## activity_description: 1378 categories

colSums(us == "")
##                         _id implementing_agency_acronym 
##                           0                           0 
##    implementing_agency_name              subagency_name 
##                           0                           0 
##       channel_category_name    channel_subcategory_name 
##                           0                           0 
##                channel_name           dac_category_name 
##                           0                           0 
##             dac_sector_name            dac_purpose_name 
##                           0                           0 
##        funding_account_name    assistance_category_name 
##                           0                           0 
##               activity_name         activity_start_date 
##                           0                          NA 
##           activity_end_date                 fiscal_year 
##                          NA                           0 
##              current_amount         funding_agency_name 
##                           0                           0 
##      funding_agency_acronym         aid_type_group_name 
##                           0                           0 
##        activity_description 
##                           0
missing_vars <- function(x) {
  var <- 0
  missing <- 0
  missing_prop <- 0
  for (i in 1:length(names(x))) {
    var[i] <- names(x)[i]
    missing[i] <- sum(is.na(x[, i]))
    missing_prop[i] <- missing[i] / nrow(x)
  }
  (missing_data <- data.frame(var = var, missing = missing, missing_prop = missing_prop) %>% 
      arrange(desc(missing_prop)))
}

missing_vars(us2)
##                            var missing missing_prop
## 1          activity_start_date    5966    0.7190551
## 2            activity_end_date    5874    0.7079667
## 3                           id       0    0.0000000
## 4  implementing_agency_acronym       0    0.0000000
## 5     implementing_agency_name       0    0.0000000
## 6               subagency_name       0    0.0000000
## 7        channel_category_name       0    0.0000000
## 8     channel_subcategory_name       0    0.0000000
## 9                 channel_name       0    0.0000000
## 10           dac_category_name       0    0.0000000
## 11             dac_sector_name       0    0.0000000
## 12            dac_purpose_name       0    0.0000000
## 13        funding_account_name       0    0.0000000
## 14    assistance_category_name       0    0.0000000
## 15               activity_name       0    0.0000000
## 16                 fiscal_year       0    0.0000000
## 17              current_amount       0    0.0000000
## 18         funding_agency_name       0    0.0000000
## 19      funding_agency_acronym       0    0.0000000
## 20         aid_type_group_name       0    0.0000000
## 21        activity_description       0    0.0000000
#two variables have 71 and 70% missing data

prop.table(table(us2$subagency_name))
## 
##                              Agricultural Research Service 
##                                               0.0007231529 
##                Animal, Plant and Health Inspection Service 
##                                               0.0016873569 
##                                     Army Corp of Engineers 
##                                               0.0003615765 
##                                            Bureau for Asia 
##                                               0.0442328552 
##                              Bureau for Asia and Near East 
##                                               0.0007231529 
## Bureau for Democracy, Conflict and Humanitarian Assistance 
##                                               0.1447511149 
##          Bureau for Economic Growth, Agriculture and Trade 
##                                               0.0010847294 
##                              Bureau for Europe and Eurasia 
##                                               0.0001205255 
##                                   Bureau for Global Health 
##                                               0.0042183922 
##                                     Bureau for Middle East 
##                                               0.0015668314 
##                  Bureau for Policy, Planning, and Learning 
##                                               0.0009642039 
##                      Bureau of International Labor Affairs 
##                                               0.0008436784 
##                 Centers for Disease Control and Prevention 
##                                               0.0138604315 
##                               Counterterrorism Coordinator 
##                                               0.0036157647 
##                        Defense Security Cooperation Agency 
##                                               0.0138604315 
##                            Defense Threat Reduction Agency 
##                                               0.0006026275 
##                         Democracy, Human Rights, and Labor 
##                                               0.0040978667 
##                                        Diplomatic Security 
##                                               0.0021694588 
##                            Drug Enforcement Administration 
##                                               0.0003615765 
##                           Educational and Cultural Affairs 
##                                               0.0021694588 
##                            Federal Bureau of Investigation 
##                                               0.0010847294 
##                             Food Safety Inspection Service 
##                                               0.0001205255 
##                               Foreign Agricultural Service 
##                                               0.0081957334 
##                                     Global Development Lab 
##                                               0.0001205255 
##        International Narcotics and Law Enforcement Affairs 
##                                               0.0048210196 
##                International Security and Nonproliferation 
##                                               0.0047004942 
##                         International Trade Administration 
##                                               0.0002410510 
##                           National Endowment for Democracy 
##                                               0.0451970592 
##                   National Nuclear Security Administration 
##                                               0.0038568157 
##            National Oceanic and Atmospheric Administration 
##                                               0.0001205255 
##                                             not applicable 
##                                               0.0128962276 
##                 Office of Afghanistan and Pakistan Affairs 
##                                               0.5723755574 
##                                 Office of Counternarcotics 
##                                               0.0020489333 
##                              Office of Development Partner 
##                                               0.0001205255 
##                                  Office of General Counsel 
##                                               0.0006026275 
##                             Office of Technical Assistance 
##                                               0.0013257804 
##                         Office of the Secretary of Defense 
##                                               0.0007231529 
##                                 Political-Military Affairs 
##                                               0.0001205255 
##                        Population, Refugees, and Migration 
##                                               0.0597806436 
##                            South and Central Asian Affairs 
##                                               0.0100036158 
##                                     Trafficking in Persons 
##                                               0.0010847294 
##                         U.S. Customs and Border Protection 
##                                               0.0004821020 
##                             U.S. Fish and Wildlife Service 
##                                               0.0008436784 
##                   U.S. Immigration and Customs Enforcement 
##                                               0.0003615765 
##                                       USAID Bureau - Other 
##                                               0.0267566590
clean_names(us)
## # A tibble: 8,297 × 21
##       id implementing_agency_acronym implementing_agency_name     subagency_name
##    <dbl> <chr>                       <chr>                        <chr>         
##  1     1 USAID                       U.S. Agency for Internation… Bureau for As…
##  2     2 DOD                         Department of Defense        Office of the…
##  3     3 DOD                         Department of Defense        Office of the…
##  4     4 DOD                         Department of Defense        Defense Secur…
##  5     5 DOD                         Department of Defense        Defense Secur…
##  6     6 DOD                         Department of Defense        Defense Secur…
##  7     7 DOD                         Department of Defense        Defense Secur…
##  8     8 DOD                         Department of Defense        Defense Secur…
##  9     9 DOD                         Department of Defense        Defense Secur…
## 10    10 DOD                         Department of Defense        Defense Secur…
## # ℹ 8,287 more rows
## # ℹ 17 more variables: channel_category_name <chr>,
## #   channel_subcategory_name <chr>, channel_name <chr>,
## #   dac_category_name <chr>, dac_sector_name <chr>, dac_purpose_name <chr>,
## #   funding_account_name <chr>, assistance_category_name <chr>,
## #   activity_name <chr>, activity_start_date <date>, activity_end_date <date>,
## #   fiscal_year <dbl>, current_amount <dbl>, funding_agency_name <chr>, …
head(us)
## # A tibble: 6 × 21
##   `_id` implementing_agency_acronym implementing_agency_name      subagency_name
##   <dbl> <chr>                       <chr>                         <chr>         
## 1     1 USAID                       U.S. Agency for Internationa… Bureau for As…
## 2     2 DOD                         Department of Defense         Office of the…
## 3     3 DOD                         Department of Defense         Office of the…
## 4     4 DOD                         Department of Defense         Defense Secur…
## 5     5 DOD                         Department of Defense         Defense Secur…
## 6     6 DOD                         Department of Defense         Defense Secur…
## # ℹ 17 more variables: channel_category_name <chr>,
## #   channel_subcategory_name <chr>, channel_name <chr>,
## #   dac_category_name <chr>, dac_sector_name <chr>, dac_purpose_name <chr>,
## #   funding_account_name <chr>, assistance_category_name <chr>,
## #   activity_name <chr>, activity_start_date <date>, activity_end_date <date>,
## #   fiscal_year <dbl>, current_amount <dbl>, funding_agency_name <chr>,
## #   funding_agency_acronym <chr>, aid_type_group_name <chr>, …
#we will split the data into having activity dates and without dates

#dataset without dates
usw <- us

#remove vairables form data set which are duplicate
usw$activity_end_date <- NULL
usw$activity_start_date <- NULL
usw$implementing_agency_acronym <- NULL
usw$implementing_agency_acronym <- NULL
usw$funding_agency_acronym <- NULL

dim(usw)
## [1] 8297   17
#we have two data sets us with dates usw without dates variables
#usw has no missing data
missmap(usw)
## Warning: Unknown or uninitialised column: `arguments`.
## Warning: Unknown or uninitialised column: `arguments`.
## Warning: Unknown or uninitialised column: `imputations`.

psych::describe(usw)
##                           vars    n       mean          sd median   trimmed
## _id                          1 8297    4149.00     2395.28   4149   4149.00
## implementing_agency_name*    2 8297      14.44        3.38     16     15.21
## subagency_name*              3 8297      26.53       11.52     32     27.44
## channel_category_name*       4 8297       3.24        1.36      3      3.13
## channel_subcategory_name*    5 8297       6.93        3.87      6      6.60
## channel_name*                6 8297     198.46      103.87    215    200.01
## dac_category_name*           7 8297       5.19        2.76      6      5.25
## dac_sector_name*             8 8297      14.10        7.55     14     14.00
## dac_purpose_name*            9 8297      46.13       24.58     55     47.34
## funding_account_name*       10 8297      25.81       14.41     35     26.08
## assistance_category_name*   11 8297       1.02        0.13      1      1.00
## activity_name*              12 8297     624.21      399.15    597    635.35
## fiscal_year                 13 8297    2012.77        4.53   2013   2012.91
## current_amount              14 8297 2027410.46 16522468.05  85000 357284.73
## funding_agency_name*        15 8297      12.88        4.00     15     13.77
## aid_type_group_name*        16 8297       4.11        1.75      5      4.28
## activity_description*       17 8297     556.09      421.19    526    538.16
##                                 mad       min       max     range  skew
## _id                         3074.91         1      8297      8296  0.00
## implementing_agency_name*      0.00         1        16        15 -2.08
## subagency_name*                0.00         1        45        44 -0.98
## channel_category_name*         1.48         1         7         6  0.71
## channel_subcategory_name*      4.45         1        16        15  0.68
## channel_name*                152.71         1       365       364 -0.05
## dac_category_name*             2.97         1        10         9 -0.39
## dac_sector_name*              10.38         1        30        29  0.05
## dac_purpose_name*             20.76         1        92        91 -0.43
## funding_account_name*          8.90         1        58        57 -0.25
## assistance_category_name*      0.00         1         2         1  7.28
## activity_name*               557.46         1      1166      1165 -0.07
## fiscal_year                    4.45      2001      2021        20 -0.24
## current_amount            125223.36 -84169377 600000000 684169377 19.99
## funding_agency_name*           0.00         1        15        14 -1.69
## aid_type_group_name*           0.00         1         6         5 -1.10
## activity_description*        518.91         1      1378      1377  0.38
##                           kurtosis        se
## _id                          -1.20     26.30
## implementing_agency_name*     3.46      0.04
## subagency_name*              -0.41      0.13
## channel_category_name*       -0.43      0.01
## channel_subcategory_name*    -1.00      0.04
## channel_name*                -1.32      1.14
## dac_category_name*           -1.13      0.03
## dac_sector_name*             -0.91      0.08
## dac_purpose_name*            -0.99      0.27
## funding_account_name*        -1.02      0.16
## assistance_category_name*    51.07      0.00
## activity_name*               -1.41      4.38
## fiscal_year                  -0.56      0.05
## current_amount              495.47 181390.43
## funding_agency_name*          1.63      0.04
## aid_type_group_name*         -0.58      0.02
## activity_description*        -1.11      4.62
head(usw)
## # A tibble: 6 × 17
##   `_id` implementing_agency_name            subagency_name channel_category_name
##   <dbl> <chr>                               <chr>          <chr>                
## 1     1 U.S. Agency for International Deve… Bureau for As… Government           
## 2     2 Department of Defense               Office of the… Government           
## 3     3 Department of Defense               Office of the… Government           
## 4     4 Department of Defense               Defense Secur… Government           
## 5     5 Department of Defense               Defense Secur… Government           
## 6     6 Department of Defense               Defense Secur… Government           
## # ℹ 13 more variables: channel_subcategory_name <chr>, channel_name <chr>,
## #   dac_category_name <chr>, dac_sector_name <chr>, dac_purpose_name <chr>,
## #   funding_account_name <chr>, assistance_category_name <chr>,
## #   activity_name <chr>, fiscal_year <dbl>, current_amount <dbl>,
## #   funding_agency_name <chr>, aid_type_group_name <chr>,
## #   activity_description <chr>
us[c('channel_nature', 'channel_source')] <- str_split_fixed(us$channel_subcategory_name,  '-', 2)

usw[c('channel_nature', 'channel_source')] <- str_split_fixed(us$channel_subcategory_name,  '-', 2)

head(usw)
## # A tibble: 6 × 19
##   `_id` implementing_agency_name            subagency_name channel_category_name
##   <dbl> <chr>                               <chr>          <chr>                
## 1     1 U.S. Agency for International Deve… Bureau for As… Government           
## 2     2 Department of Defense               Office of the… Government           
## 3     3 Department of Defense               Office of the… Government           
## 4     4 Department of Defense               Defense Secur… Government           
## 5     5 Department of Defense               Defense Secur… Government           
## 6     6 Department of Defense               Defense Secur… Government           
## # ℹ 15 more variables: channel_subcategory_name <chr>, channel_name <chr>,
## #   dac_category_name <chr>, dac_sector_name <chr>, dac_purpose_name <chr>,
## #   funding_account_name <chr>, assistance_category_name <chr>,
## #   activity_name <chr>, fiscal_year <dbl>, current_amount <dbl>,
## #   funding_agency_name <chr>, aid_type_group_name <chr>,
## #   activity_description <chr>, channel_nature <chr>, channel_source <chr>
range(usw$current_amount)
## [1] -84169377 600000000

#EDA of data with start and end activity dates Since dataset is analyzed in two sets one with dates information (us) and another without dates (usw)

head(us)
## # A tibble: 6 × 23
##   `_id` implementing_agency_acronym implementing_agency_name      subagency_name
##   <dbl> <chr>                       <chr>                         <chr>         
## 1     1 USAID                       U.S. Agency for Internationa… Bureau for As…
## 2     2 DOD                         Department of Defense         Office of the…
## 3     3 DOD                         Department of Defense         Office of the…
## 4     4 DOD                         Department of Defense         Defense Secur…
## 5     5 DOD                         Department of Defense         Defense Secur…
## 6     6 DOD                         Department of Defense         Defense Secur…
## # ℹ 19 more variables: channel_category_name <chr>,
## #   channel_subcategory_name <chr>, channel_name <chr>,
## #   dac_category_name <chr>, dac_sector_name <chr>, dac_purpose_name <chr>,
## #   funding_account_name <chr>, assistance_category_name <chr>,
## #   activity_name <chr>, activity_start_date <date>, activity_end_date <date>,
## #   fiscal_year <dbl>, current_amount <dbl>, funding_agency_name <chr>,
## #   funding_agency_acronym <chr>, aid_type_group_name <chr>, …
colnames(us)
##  [1] "_id"                         "implementing_agency_acronym"
##  [3] "implementing_agency_name"    "subagency_name"             
##  [5] "channel_category_name"       "channel_subcategory_name"   
##  [7] "channel_name"                "dac_category_name"          
##  [9] "dac_sector_name"             "dac_purpose_name"           
## [11] "funding_account_name"        "assistance_category_name"   
## [13] "activity_name"               "activity_start_date"        
## [15] "activity_end_date"           "fiscal_year"                
## [17] "current_amount"              "funding_agency_name"        
## [19] "funding_agency_acronym"      "aid_type_group_name"        
## [21] "activity_description"        "channel_nature"             
## [23] "channel_source"
us <- clean_names(us)
colSums(is.na(us))
##                          id implementing_agency_acronym 
##                           0                           0 
##    implementing_agency_name              subagency_name 
##                           0                           0 
##       channel_category_name    channel_subcategory_name 
##                           0                           0 
##                channel_name           dac_category_name 
##                           0                           0 
##             dac_sector_name            dac_purpose_name 
##                           0                           0 
##        funding_account_name    assistance_category_name 
##                           0                           0 
##               activity_name         activity_start_date 
##                           0                        5966 
##           activity_end_date                 fiscal_year 
##                        5874                           0 
##              current_amount         funding_agency_name 
##                           0                           0 
##      funding_agency_acronym         aid_type_group_name 
##                           0                           0 
##        activity_description              channel_nature 
##                           0                           0 
##              channel_source 
##                           0
us$funding_agency_acronym <- NULL
us$implementing_agency_acronym <- NULL

us$fiscal_year <- as.factor(us$fiscal_year)

#Information of number of projects with missing date information in both assistance categories

nostart_noend_numbers <- us %>% 
  select(implementing_agency_name, subagency_name, channel_category_name, channel_subcategory_name, current_amount, fiscal_year,dac_sector_name,dac_purpose_name,funding_account_name,assistance_category_name, activity_name, funding_agency_name, aid_type_group_name, aid_type_group_name, channel_nature,  channel_source, activity_start_date, activity_end_date ) %>% 
  group_by(fiscal_year,assistance_category_name,activity_start_date,activity_end_date) %>% filter(is.na(activity_start_date) & is.na(activity_end_date)) %>% summarise(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(fct_reorder(fiscal_year, count), count, label = count)) +
  geom_bar(stat = "identity", fill = "blue")+ geom_text(hjust = 0.4)+
  facet_grid(~assistance_category_name)+
  theme(legend.position = "none")+
     labs(
       title = "Number of projects with no start date information",
      subtitle = "Projects in two assistance groups", x = "Fiscal Years",
      y = "Number of Projects"
     )+coord_flip()
## `summarise()` has grouped output by 'fiscal_year', 'assistance_category_name',
## 'activity_start_date'. You can override using the `.groups` argument.
nostart_noend_numbers

start_noend_numbers <- us %>% 
  select(implementing_agency_name, subagency_name, channel_category_name, channel_subcategory_name, current_amount, fiscal_year,dac_sector_name,dac_purpose_name,funding_account_name,assistance_category_name, activity_name, funding_agency_name, aid_type_group_name, aid_type_group_name, channel_nature,  channel_source, activity_start_date, activity_end_date ) %>% 
  group_by(fiscal_year,assistance_category_name,activity_start_date,activity_end_date) %>% filter(!is.na(activity_start_date) & is.na(activity_end_date)) %>% summarise(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(fct_reorder(fiscal_year, count), count, label = count)) + geom_text(hjust = -40) +
  geom_bar(stat = "identity", fill = "yellow")+
  facet_grid(~assistance_category_name)+
  theme(legend.position = "none")+
     labs(
       title = "Number of projects with no end date information",
      subtitle = "Projects in two assistance groups", x = "Fiscal Years",
      y = "Number of Projects"
     )+
  coord_flip()
## `summarise()` has grouped output by 'fiscal_year', 'assistance_category_name',
## 'activity_start_date'. You can override using the `.groups` argument.
start_noend_numbers

start_end_numbers <- us %>% 
  select(implementing_agency_name, subagency_name, channel_category_name, channel_subcategory_name, current_amount, fiscal_year,dac_sector_name,dac_purpose_name,funding_account_name,assistance_category_name, activity_name, funding_agency_name, aid_type_group_name, aid_type_group_name, channel_nature,  channel_source, activity_start_date, activity_end_date ) %>% 
  group_by(fiscal_year,assistance_category_name,activity_start_date,activity_end_date) %>% filter(!is.na(activity_start_date) & !is.na(activity_end_date)) %>% summarise(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(fct_reorder(fiscal_year, count), count, label = count)) + geom_text(hjust = -10) + 
  geom_bar(stat = "identity", fill = "green")+
  facet_grid(~assistance_category_name)+
  theme(legend.position = "none")+
     labs(
       title = "Number of projects with completion information",
      subtitle = "Projects in two assistance groups", x = "Fiscal Years",
      y = "Number of Projects"
     )+
  coord_flip()
## `summarise()` has grouped output by 'fiscal_year', 'assistance_category_name',
## 'activity_start_date'. You can override using the `.groups` argument.
start_end_numbers

Variables with acronymns are deleted since they add no new information to dataset.

start <-  us %>% select(id, implementing_agency_name,subagency_name,channel_category_name,channel_subcategory_name, channel_name, dac_category_name, dac_sector_name, dac_purpose_name,funding_account_name, assistance_category_name,activity_name, activity_start_date, activity_end_date, fiscal_year, current_amount, funding_agency_name, aid_type_group_name, activity_description) %>% group_by(activity_end_date, activity_start_date) %>% filter(is.na(activity_start_date)) %>% 
  summarise(count =n()) %>% 
  arrange(desc(count))
## `summarise()` has grouped output by 'activity_end_date'. You can override using
## the `.groups` argument.
start
## # A tibble: 39 × 3
## # Groups:   activity_end_date [39]
##    activity_end_date activity_start_date count
##    <date>            <date>              <int>
##  1 NA                NA                   5846
##  2 2023-09-30        NA                     18
##  3 2020-09-30        NA                     14
##  4 2023-06-30        NA                      8
##  5 2018-12-31        NA                      5
##  6 2021-06-20        NA                      5
##  7 2022-04-24        NA                      5
##  8 2023-04-30        NA                      5
##  9 2020-03-15        NA                      4
## 10 2023-07-31        NA                      4
## # ℹ 29 more rows
end <- us %>% select(id, implementing_agency_name,subagency_name,channel_category_name,channel_subcategory_name, channel_name, dac_category_name, dac_sector_name, dac_purpose_name,funding_account_name, assistance_category_name,activity_name, activity_start_date, activity_end_date, fiscal_year, current_amount, funding_agency_name, aid_type_group_name, activity_description) %>% group_by(activity_end_date, activity_start_date) %>% filter(is.na(activity_end_date)) %>% 
  summarise(count =n()) %>% 
  arrange(desc(count))
## `summarise()` has grouped output by 'activity_end_date'. You can override using
## the `.groups` argument.
end
## # A tibble: 19 × 3
## # Groups:   activity_end_date [1]
##    activity_end_date activity_start_date count
##    <date>            <date>              <int>
##  1 NA                NA                   5846
##  2 NA                2009-10-01              4
##  3 NA                2012-09-27              4
##  4 NA                2010-03-01              2
##  5 NA                2010-07-08              2
##  6 NA                2010-07-23              2
##  7 NA                2012-08-30              2
##  8 NA                2007-09-05              1
##  9 NA                2009-07-13              1
## 10 NA                2010-08-01              1
## 11 NA                2010-08-26              1
## 12 NA                2010-09-14              1
## 13 NA                2010-09-23              1
## 14 NA                2011-10-01              1
## 15 NA                2012-05-24              1
## 16 NA                2012-06-12              1
## 17 NA                2012-06-28              1
## 18 NA                2014-04-17              1
## 19 NA                2015-10-28              1
both <- us %>% select(id, implementing_agency_name,subagency_name,channel_category_name,channel_subcategory_name, channel_name, dac_category_name, dac_sector_name, dac_purpose_name,funding_account_name, assistance_category_name,activity_name, activity_start_date, activity_end_date, fiscal_year, current_amount, funding_agency_name, aid_type_group_name, activity_description) %>% group_by(activity_end_date, activity_start_date) %>% filter(is.na(activity_end_date) | is.na(activity_start_date)) %>% 
  summarise(count =n()) %>% 
  arrange(desc(count))
## `summarise()` has grouped output by 'activity_end_date'. You can override using
## the `.groups` argument.
both
## # A tibble: 57 × 3
## # Groups:   activity_end_date [39]
##    activity_end_date activity_start_date count
##    <date>            <date>              <int>
##  1 NA                NA                   5846
##  2 2023-09-30        NA                     18
##  3 2020-09-30        NA                     14
##  4 2023-06-30        NA                      8
##  5 2018-12-31        NA                      5
##  6 2021-06-20        NA                      5
##  7 2022-04-24        NA                      5
##  8 2023-04-30        NA                      5
##  9 2020-03-15        NA                      4
## 10 2023-07-31        NA                      4
## # ℹ 47 more rows
amount_nostart_butend <- us %>% 
  select(implementing_agency_name, subagency_name, channel_category_name, channel_subcategory_name, current_amount, fiscal_year,dac_sector_name,dac_purpose_name,funding_account_name,assistance_category_name, activity_name, funding_agency_name, aid_type_group_name, aid_type_group_name, channel_nature,  channel_source, activity_start_date, activity_end_date ) %>% 
  group_by(fiscal_year,assistance_category_name,activity_start_date,activity_end_date) %>% filter(is.na(activity_start_date) & !is.na(activity_end_date)) %>% 
  summarise(max_amount = max(current_amount), min_amount = min(current_amount), avg_amount = mean(current_amount)) %>% ggplot(aes(fiscal_year, y = avg_amount))+geom_bar(stat = "identity", fill = "yellow")+
     theme(legend.position = "none")+
     labs(
       title = "An average current amount per fiscal year",
      subtitle = "Average amunt as per assistance category name", x = "Fiscal Years",
      y = "Avg Current Amount"
     )+
  facet_grid(~assistance_category_name)
## `summarise()` has grouped output by 'fiscal_year', 'assistance_category_name',
## 'activity_start_date'. You can override using the `.groups` argument.
amount_nostart_butend

amount_start_noend <- us %>% 
  select(implementing_agency_name, subagency_name, channel_category_name, channel_subcategory_name, current_amount, fiscal_year,dac_sector_name,dac_purpose_name,funding_account_name,assistance_category_name, activity_name, funding_agency_name, aid_type_group_name, aid_type_group_name, channel_nature,  channel_source, activity_start_date, activity_end_date ) %>% 
  group_by(fiscal_year,assistance_category_name,activity_start_date,activity_end_date) %>% filter(!is.na(activity_start_date) & is.na(activity_end_date)) %>% 
  summarise(max_amount = max(current_amount), min_amount = min(current_amount), avg_amount = mean(current_amount)) %>% ggplot(aes(fiscal_year, y = avg_amount))+geom_bar(stat = "identity", fill = "green")+
     theme(legend.position = "none")+
     labs(
       title = "An average current amount per fiscal year",
      subtitle = "Average amunt as per assistance category name", x = "Fiscal Years",
      y = "Avg Current Amount"
     )+
  facet_grid(~assistance_category_name)
## `summarise()` has grouped output by 'fiscal_year', 'assistance_category_name',
## 'activity_start_date'. You can override using the `.groups` argument.
amount_start_noend

amount_start_end <- us %>% 
  select(implementing_agency_name, subagency_name, channel_category_name, channel_subcategory_name, current_amount, fiscal_year,dac_sector_name,dac_purpose_name,funding_account_name,assistance_category_name, activity_name, funding_agency_name, aid_type_group_name, aid_type_group_name, channel_nature,  channel_source, activity_start_date, activity_end_date ) %>% 
  group_by(fiscal_year,assistance_category_name,activity_start_date,activity_end_date) %>% filter(!is.na(activity_start_date) & !is.na(activity_end_date)) %>% 
  summarise(max_amount = max(current_amount), min_amount = min(current_amount), avg_amount = mean(current_amount)) %>% ggplot(aes(fiscal_year, y = avg_amount))+geom_bar(stat = "identity", fill = "blue")+
     theme(legend.position = "none")+
     labs(
       title = "An average current amount per fiscal year",
      subtitle = "Average amunt as per assistance category name", x = "Fiscal Years",
      y = "Avg Current Amount"
     )+
  facet_grid(~assistance_category_name)+ coord_flip()
## `summarise()` has grouped output by 'fiscal_year', 'assistance_category_name',
## 'activity_start_date'. You can override using the `.groups` argument.
amount_start_end

amount_nostart_noend <- us %>% 
  select(implementing_agency_name, subagency_name, channel_category_name, channel_subcategory_name, current_amount, fiscal_year,dac_sector_name,dac_purpose_name,funding_account_name,assistance_category_name, activity_name, funding_agency_name, aid_type_group_name, aid_type_group_name, channel_nature,  channel_source, activity_start_date, activity_end_date ) %>% 
  group_by(fiscal_year,assistance_category_name,activity_start_date,activity_end_date) %>% filter(is.na(activity_start_date) & is.na(activity_end_date)) %>% 
  summarise(max_amount = max(current_amount), min_amount = min(current_amount), avg_amount = mean(current_amount)) %>% ggplot(aes(fiscal_year, y = avg_amount))+geom_bar(stat = "identity", fill = "red")+
     theme(legend.position = "none")+
     labs(
       title = "An average current amount per fiscal year",
      subtitle = "Average amunt as per assistance category name", x = "Fiscal Years",
      y = "Avg Current Amount"
     )+
  facet_grid(~assistance_category_name)+coord_flip()
## `summarise()` has grouped output by 'fiscal_year', 'assistance_category_name',
## 'activity_start_date'. You can override using the `.groups` argument.
amount_nostart_noend

#EDA of data with dates

colnames(us)
##  [1] "id"                       "implementing_agency_name"
##  [3] "subagency_name"           "channel_category_name"   
##  [5] "channel_subcategory_name" "channel_name"            
##  [7] "dac_category_name"        "dac_sector_name"         
##  [9] "dac_purpose_name"         "funding_account_name"    
## [11] "assistance_category_name" "activity_name"           
## [13] "activity_start_date"      "activity_end_date"       
## [15] "fiscal_year"              "current_amount"          
## [17] "funding_agency_name"      "aid_type_group_name"     
## [19] "activity_description"     "channel_nature"          
## [21] "channel_source"
clean_names(us)
## # A tibble: 8,297 × 21
##       id implementing_agency_name           subagency_name channel_category_name
##    <dbl> <chr>                              <chr>          <chr>                
##  1     1 U.S. Agency for International Dev… Bureau for As… Government           
##  2     2 Department of Defense              Office of the… Government           
##  3     3 Department of Defense              Office of the… Government           
##  4     4 Department of Defense              Defense Secur… Government           
##  5     5 Department of Defense              Defense Secur… Government           
##  6     6 Department of Defense              Defense Secur… Government           
##  7     7 Department of Defense              Defense Secur… Government           
##  8     8 Department of Defense              Defense Secur… Government           
##  9     9 Department of Defense              Defense Secur… Government           
## 10    10 Department of Defense              Defense Secur… Government           
## # ℹ 8,287 more rows
## # ℹ 17 more variables: channel_subcategory_name <chr>, channel_name <chr>,
## #   dac_category_name <chr>, dac_sector_name <chr>, dac_purpose_name <chr>,
## #   funding_account_name <chr>, assistance_category_name <chr>,
## #   activity_name <chr>, activity_start_date <date>, activity_end_date <date>,
## #   fiscal_year <fct>, current_amount <dbl>, funding_agency_name <chr>,
## #   aid_type_group_name <chr>, activity_description <chr>, …
us$implementing_agency_acronym <- NULL
us$subagency_acronym <- NULL
us$funding_agency_acronym <- NULL

us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date) %>% filter(!is.na(activity_start_date)) %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date'. You can override using the
## `.groups` argument.
## # A tibble: 400 × 5
## # Groups:   implementing_agency_name, assistance_category_name,
## #   activity_start_date [339]
##    implementing_agency_name           assistance_category_…¹ activity_start_date
##    <chr>                              <chr>                  <date>             
##  1 U.S. Agency for International Dev… Economic               2001-10-01         
##  2 U.S. Agency for International Dev… Economic               2011-05-27         
##  3 U.S. Agency for International Dev… Economic               2014-01-01         
##  4 U.S. Agency for International Dev… Economic               2010-08-18         
##  5 U.S. Agency for International Dev… Economic               2006-09-29         
##  6 U.S. Agency for International Dev… Economic               2010-08-31         
##  7 U.S. Agency for International Dev… Economic               2017-09-28         
##  8 U.S. Agency for International Dev… Economic               2007-03-01         
##  9 U.S. Agency for International Dev… Economic               2007-11-12         
## 10 U.S. Agency for International Dev… Economic               2013-05-24         
## # ℹ 390 more rows
## # ℹ abbreviated name: ¹​assistance_category_name
## # ℹ 2 more variables: activity_end_date <date>, count <int>
us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date) %>% filter(!is.na(activity_end_date) ) %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date'. You can override using the
## `.groups` argument.
## # A tibble: 427 × 5
## # Groups:   implementing_agency_name, assistance_category_name,
## #   activity_start_date [339]
##    implementing_agency_name           assistance_category_…¹ activity_start_date
##    <chr>                              <chr>                  <date>             
##  1 U.S. Agency for International Dev… Economic               2001-10-01         
##  2 U.S. Agency for International Dev… Economic               2011-05-27         
##  3 U.S. Agency for International Dev… Economic               2014-01-01         
##  4 U.S. Agency for International Dev… Economic               2010-08-18         
##  5 U.S. Agency for International Dev… Economic               2006-09-29         
##  6 U.S. Agency for International Dev… Economic               2010-08-31         
##  7 U.S. Agency for International Dev… Economic               2017-09-28         
##  8 U.S. Agency for International Dev… Economic               2007-03-01         
##  9 U.S. Agency for International Dev… Economic               2007-11-12         
## 10 U.S. Agency for International Dev… Economic               2013-05-24         
## # ℹ 417 more rows
## # ℹ abbreviated name: ¹​assistance_category_name
## # ℹ 2 more variables: activity_end_date <date>, count <int>
us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date) %>% filter(!is.na(activity_start_date) & !is.na(activity_end_date) ) %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date'. You can override using the
## `.groups` argument.
## # A tibble: 382 × 5
## # Groups:   implementing_agency_name, assistance_category_name,
## #   activity_start_date [329]
##    implementing_agency_name           assistance_category_…¹ activity_start_date
##    <chr>                              <chr>                  <date>             
##  1 U.S. Agency for International Dev… Economic               2001-10-01         
##  2 U.S. Agency for International Dev… Economic               2011-05-27         
##  3 U.S. Agency for International Dev… Economic               2014-01-01         
##  4 U.S. Agency for International Dev… Economic               2010-08-18         
##  5 U.S. Agency for International Dev… Economic               2006-09-29         
##  6 U.S. Agency for International Dev… Economic               2010-08-31         
##  7 U.S. Agency for International Dev… Economic               2017-09-28         
##  8 U.S. Agency for International Dev… Economic               2007-03-01         
##  9 U.S. Agency for International Dev… Economic               2007-11-12         
## 10 U.S. Agency for International Dev… Economic               2013-05-24         
## # ℹ 372 more rows
## # ℹ abbreviated name: ¹​assistance_category_name
## # ℹ 2 more variables: activity_end_date <date>, count <int>
us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date) %>% filter(!is.na(activity_start_date) | !is.na(activity_end_date) ) %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date'. You can override using the
## `.groups` argument.
## # A tibble: 445 × 5
## # Groups:   implementing_agency_name, assistance_category_name,
## #   activity_start_date [349]
##    implementing_agency_name           assistance_category_…¹ activity_start_date
##    <chr>                              <chr>                  <date>             
##  1 U.S. Agency for International Dev… Economic               2001-10-01         
##  2 U.S. Agency for International Dev… Economic               2011-05-27         
##  3 U.S. Agency for International Dev… Economic               2014-01-01         
##  4 U.S. Agency for International Dev… Economic               2010-08-18         
##  5 U.S. Agency for International Dev… Economic               2006-09-29         
##  6 U.S. Agency for International Dev… Economic               2010-08-31         
##  7 U.S. Agency for International Dev… Economic               2017-09-28         
##  8 U.S. Agency for International Dev… Economic               2007-03-01         
##  9 U.S. Agency for International Dev… Economic               2007-11-12         
## 10 U.S. Agency for International Dev… Economic               2013-05-24         
## # ℹ 435 more rows
## # ℹ abbreviated name: ¹​assistance_category_name
## # ℹ 2 more variables: activity_end_date <date>, count <int>
agency_dates_economic <- us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date,channel_name) %>% filter(!is.na(activity_start_date) & !is.na(activity_end_date) & assistance_category_name == "Economic") %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date', 'activity_end_date'. You can
## override using the `.groups` argument.
agency_dates_economic
## # A tibble: 386 × 6
## # Groups:   implementing_agency_name, assistance_category_name,
## #   activity_start_date, activity_end_date [374]
##    implementing_agency_name           assistance_category_…¹ activity_start_date
##    <chr>                              <chr>                  <date>             
##  1 U.S. Agency for International Dev… Economic               2011-05-27         
##  2 U.S. Agency for International Dev… Economic               2014-01-01         
##  3 U.S. Agency for International Dev… Economic               2001-10-01         
##  4 U.S. Agency for International Dev… Economic               2010-08-18         
##  5 U.S. Agency for International Dev… Economic               2006-09-29         
##  6 U.S. Agency for International Dev… Economic               2010-08-31         
##  7 U.S. Agency for International Dev… Economic               2017-09-28         
##  8 U.S. Agency for International Dev… Economic               2007-03-01         
##  9 U.S. Agency for International Dev… Economic               2007-11-12         
## 10 U.S. Agency for International Dev… Economic               2013-05-24         
## # ℹ 376 more rows
## # ℹ abbreviated name: ¹​assistance_category_name
## # ℹ 3 more variables: activity_end_date <date>, channel_name <chr>, count <int>
agency_dates_military <- us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date,channel_name) %>% filter(!is.na(activity_start_date) & !is.na(activity_end_date) & assistance_category_name == "Military") %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date', 'activity_end_date'. You can
## override using the `.groups` argument.
agency_dates_military
## # A tibble: 9 × 6
## # Groups:   implementing_agency_name, assistance_category_name,
## #   activity_start_date, activity_end_date [8]
##   implementing_agency_name    assistance_category_name activity_start_date
##   <chr>                       <chr>                    <date>             
## 1 Department of Defense       Military                 2014-01-15         
## 2 Department of Defense       Military                 2009-01-01         
## 3 Department of Defense       Military                 2014-01-15         
## 4 Department of Defense       Military                 2017-10-01         
## 5 Department of the Air Force Military                 2015-10-01         
## 6 Department of the Air Force Military                 2016-10-01         
## 7 Department of the Army      Military                 2015-10-01         
## 8 Department of the Army      Military                 2017-10-01         
## 9 Department of the Navy      Military                 2015-10-01         
## # ℹ 3 more variables: activity_end_date <date>, channel_name <chr>, count <int>
agency_noend_economic <- us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date) %>% filter(!is.na(activity_start_date) & is.na(activity_end_date) & assistance_category_name == "Economic") %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date'. You can override using the
## `.groups` argument.
noend_eco <- ggplot(agency_noend_economic, aes(
    x = fct_reorder(implementing_agency_name,count), count))+
    geom_bar(stat = "identity", fill = "green") +
  #geom_text(hjust = 0.2) +
    coord_flip() +
    theme(legend.position = "none") +
    labs(
      title = "The Number of Implementing agencies",
      subtitle = "Implementing agencies in Economic sector with No end date",
      #y = "Number of projects per year" 
      x = "Name of agencies"
      #caption = "US aid program 2001-2023"
  )

agency_nostart_economic <- us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date,channel_name) %>% filter(is.na(activity_start_date) & !is.na(activity_end_date) & assistance_category_name == "Economic") %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date', 'activity_end_date'. You can
## override using the `.groups` argument.
nostart_eco <- ggplot(agency_nostart_economic, aes(
    x = fct_reorder(implementing_agency_name,count), count))+
    geom_bar(stat = "identity", fill = "blue") +
  #geom_text(hjust = 0.2)+
    coord_flip()+
    theme(legend.position = "none")+
     labs(
       subtitle = "Implementing agencies in Economic sector with No start date",x = "Name of agencies"
      #caption = "US aid program 2001-2023",
     )

agency_noclue_economic <- us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date,channel_name) %>% filter(is.na(activity_start_date) & is.na(activity_end_date) & assistance_category_name == "Economic") %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date', 'activity_end_date'. You can
## override using the `.groups` argument.
nodate_eco <- ggplot(agency_noclue_economic, aes(
    x = fct_reorder(implementing_agency_name,count), count))+
    geom_bar(stat = "identity", fill = "red") +
    coord_flip()+
    theme(legend.position = "none")+
     labs(
      subtitle = "Implementing agencies in Economic sector with No date info",
      caption = "US aid program 2001-2023",y = "Number of projects per year", x = "Name of agencies"
     )

plot_grid(noend_eco, nostart_eco,nodate_eco, nrow=3)

agency_noend_military <- us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date,channel_name) %>% filter(!is.na(activity_start_date) & is.na(activity_end_date) & assistance_category_name == "Military") %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date', 'activity_end_date'. You can
## override using the `.groups` argument.
noend_military <- ggplot(agency_noend_military, aes(
    x = fct_reorder(implementing_agency_name,count), count))+
    geom_bar(stat = "identity", fill = "green") +
    coord_flip()+
    theme(legend.position = "none")+
     labs(
       title = "The Number of Implementing agencies",
      subtitle = "Implementing agencies in Economic sector with No end date", x = "Name of agencies"
      #y = "Number of projects per year"
     )



agency_nostart_military <- us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date,channel_name) %>% filter(is.na(activity_start_date) & !is.na(activity_end_date) & assistance_category_name == "Military") %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date', 'activity_end_date'. You can
## override using the `.groups` argument.
nostart_military <- ggplot(agency_nostart_military, aes(
    x = fct_reorder(implementing_agency_name,count), count))+
    geom_bar(stat = "identity", fill = "blue") +
    coord_flip()+
    theme(legend.position = "none")+
     labs(
      subtitle = "Implementing agencies in Economic sector with No start date",
      #caption = "US aid program 2001-2023",
      x = "Name of agencies"
     )



agency_noclue_military <- us %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name, activity_start_date, activity_end_date) %>% group_by(implementing_agency_name,assistance_category_name,activity_start_date, activity_end_date,channel_name) %>% filter(is.na(activity_start_date) & is.na(activity_end_date) & assistance_category_name == "Military") %>% 
  summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name',
## 'assistance_category_name', 'activity_start_date', 'activity_end_date'. You can
## override using the `.groups` argument.
nodate_military <- ggplot(agency_noclue_military, aes(
    x = fct_reorder(implementing_agency_name,count), count))+
    geom_bar(stat = "identity", fill = "red") +
    coord_flip()+
    theme(legend.position = "none")+
     labs(
      subtitle = "Implementing agencies in Economic sector with No date info",
      caption = "US aid program 2001-2023",y = "Number of projects per year", x = "Name of agencies"
     )

plot_grid(noend_military, nostart_military,nodate_military, nrow=3)

#EDA of Data without dates

imple_proj_num <- usw %>% select(channel_name, channel_source, channel_nature, implementing_agency_name,aid_type_group_name,assistance_category_name) %>% group_by(implementing_agency_name,assistance_category_name) %>% summarise(count = n()) %>% arrange(desc(count))
## `summarise()` has grouped output by 'implementing_agency_name'. You can
## override using the `.groups` argument.
#number of implementing agencies
  ggplot(imple_proj_num, aes(
    x = fct_reorder(implementing_agency_name,count), count, label = count, fill = implementing_agency_name))+
    geom_col() +
  geom_text(hjust = 0.2)+
    coord_flip()+
    theme(legend.position = "none")+
    labs(
      title = "The Number of Implementing agencies",
      subtitle = "Details about implementing agencies",
      caption = "US aid program 2001-2023",
      y = "Number of projects per year", x = "Name of agencies"
    )+facet_wrap(~assistance_category_name)

agency_year <- xtabs(~ implementing_agency_name + fiscal_year , data = usw)
addmargins(agency_year)
##                                            fiscal_year
## implementing_agency_name                    2001 2002 2003 2004 2005 2006 2007
##   Department of Agriculture                    0    1   14    7    4    8    2
##   Department of Commerce                       0    0    0    0    0    1    1
##   Department of Defense                        1    3    3    3    3    5    4
##   Department of Energy                         0    0    0    0    0    0    4
##   Department of Health and Human Services      3    3    3    3    3    5    3
##   Department of Homeland Security              0    0    0    0    0    0    2
##   Department of Justice                        0    0    0    0    0    0    0
##   Department of Labor                          0    1    1    0    1    1    1
##   Department of State                         33   24   32   25   41   44   74
##   Department of the Air Force                  0    0   19    2    4    0    1
##   Department of the Army                       0    0    1    0    0    0    0
##   Department of the Interior                   0    0    0    0    0    0    0
##   Department of the Navy                       0    0    0    1    0    1    0
##   Department of the Treasury                   0    1    1    1    0    0    1
##   Trade and Development Agency                 0    3    6    3    5    3    5
##   U.S. Agency for International Development   13   25   47   82   88  188  215
##   Sum                                         50   61  127  127  149  256  313
##                                            fiscal_year
## implementing_agency_name                    2008 2009 2010 2011 2012 2013 2014
##   Department of Agriculture                    1    4   10   11   10    2    1
##   Department of Commerce                       0    0    1    1    2    0    1
##   Department of Defense                        4    6    5    8    6    5   10
##   Department of Energy                         4    3    4    5    3    2    2
##   Department of Health and Human Services      3    4    4    2    1    3    2
##   Department of Homeland Security              0    0    0    1    0    0    1
##   Department of Justice                        2    2    4    0    3    1    0
##   Department of Labor                          0    2    0    0    0    0    0
##   Department of State                         58   64   80   77   67   67   86
##   Department of the Air Force                  1    0    2    0    0    0    1
##   Department of the Army                       0    0    4    0    0    1    6
##   Department of the Interior                   0    0    0    1    2    2    0
##   Department of the Navy                       0    1   13    0    0    0    0
##   Department of the Treasury                   1    2    2    4    0    0    0
##   Trade and Development Agency                 0    1    9    9   16   11    7
##   U.S. Agency for International Development  290  441  511  557  492  589  478
##   Sum                                        364  530  649  676  602  683  595
##                                            fiscal_year
## implementing_agency_name                    2015 2016 2017 2018 2019 2020 2021
##   Department of Agriculture                    1    8    2    1    1    1    0
##   Department of Commerce                       0    0    0    1    0    0    0
##   Department of Defense                        5    4    3    2    4    1    0
##   Department of Energy                         1    1    1    1    1    1    0
##   Department of Health and Human Services      0    8   29   24   10    2    0
##   Department of Homeland Security              1    0    2    0    0    0    0
##   Department of Justice                        0    0    0    0    0    0    0
##   Department of Labor                          0    0    0    0    0    0    0
##   Department of State                         71   70  101   72   48    9    0
##   Department of the Air Force                  0    1    1    2    4    1    0
##   Department of the Army                       2    2    0    1    1    1    0
##   Department of the Interior                   0    0    0    2    0    0    0
##   Department of the Navy                       2    1    0    0    0    0    0
##   Department of the Treasury                   0    0    0    0    0    0    0
##   Trade and Development Agency                 4    3    1    0    1    1    0
##   U.S. Agency for International Development  507  435  423  365  342  337  188
##   Sum                                        594  533  563  471  412  354  188
##                                            fiscal_year
## implementing_agency_name                     Sum
##   Department of Agriculture                   89
##   Department of Commerce                       8
##   Department of Defense                       85
##   Department of Energy                        33
##   Department of Health and Human Services    115
##   Department of Homeland Security              7
##   Department of Justice                       12
##   Department of Labor                          7
##   Department of State                       1143
##   Department of the Air Force                 39
##   Department of the Army                      19
##   Department of the Interior                   7
##   Department of the Navy                      19
##   Department of the Treasury                  13
##   Trade and Development Agency                88
##   U.S. Agency for International Development 6613
##   Sum                                       8297
usw %>% select(fiscal_year, implementing_agency_name,assistance_category_name) %>% group_by(fiscal_year,assistance_category_name) %>% summarise(count = n()) %>% arrange(desc(count)) %>% 
ggplot(aes(x = fiscal_year, y = count, fill = assistance_category_name))+
  geom_col()+
  theme_classic()+
  labs(
    title = "Details about number of projects in each Fiscal Year",
    subtitle = "The count of projects in every fiscal year",
    caption = "US aid program 2001-2023",
    x = "Fiscal year", y = "Number of projects"
  )+scale_colour_continuous()
## `summarise()` has grouped output by 'fiscal_year'. You can override using the
## `.groups` argument.

assitance_year <- xtabs(~ fiscal_year + assistance_category_name , data = usw)
addmargins(assitance_year)
##            assistance_category_name
## fiscal_year Economic Military  Sum
##        2001       49        1   50
##        2002       58        3   61
##        2003      104       23  127
##        2004      121        6  127
##        2005      142        7  149
##        2006      251        5  256
##        2007      309        4  313
##        2008      360        4  364
##        2009      525        5  530
##        2010      628       21  649
##        2011      670        6  676
##        2012      597        5  602
##        2013      678        5  683
##        2014      579       16  595
##        2015      585        9  594
##        2016      525        8  533
##        2017      559        4  563
##        2018      466        5  471
##        2019      403        9  412
##        2020      352        2  354
##        2021      188        0  188
##        Sum      8149      148 8297
usw %>% select(assistance_category_name, fiscal_year) %>% group_by(assistance_category_name) %>% summarise(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(
    x = fct_reorder(assistance_category_name,count), count, label = count, fill = assistance_category_name))+
    geom_col() +
  geom_text(hjust = -0.2)+
    coord_flip()+
    theme(legend.position = "none") +
    labs(
      title = "The Number of Assistance categoies",
      subtitle = "Details about Assistance categoies",
      caption = "US aid program 2001-2023",
      y = "Number of projects per year", x = "Assistance cateroies")+scale_fill_discrete()

economic <- usw %>% select(fiscal_year, assistance_category_name) %>% group_by(fiscal_year)  %>% filter (assistance_category_name == "Economic")%>%
  summarise(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(x = fiscal_year, y = count))+
  geom_bar(stat = "identity",fill = "blue")+
  theme_classic()+
  labs(
    title = "Number of Economic projects in each Fiscal Year",
    subtitle = "Economic projects",
    #caption = "US aid program 2001-2023",
    x = "Fiscal year", y = "Economic projects"
  )+
  scale_fill_brewer()


military <- usw %>% select(fiscal_year, assistance_category_name) %>% group_by(fiscal_year) %>% filter (assistance_category_name == "Military") %>% summarise(count = n()) %>% arrange(desc(count)) %>%
  ggplot(aes(x = fiscal_year, y = count))+
  geom_bar(stat = "identity" , fill = "red")+
  theme_classic()+
  labs(
    title = "Number of Military projects in each Fiscal Year",
    subtitle = "Military projects",
    caption = "US aid program 2001-2023",
    x = "Fiscal year", y = "Military projects"
  )+
  scale_fill_brewer()

plot_grid(economic, military, ncol = 1)

aid_type_year <- xtabs(~ fiscal_year + aid_type_group_name , data = usw)
addmargins(aid_type_year)
##            aid_type_group_name
## fiscal_year Administrative Costs Budget support Core contributions Debt relief
##        2001                    1              0                  3           0
##        2002                    5              1                  4           0
##        2003                   16              0                  7           1
##        2004                   17              0                  5           1
##        2005                   20              0                  9           0
##        2006                   20              0                  9           0
##        2007                   28              7                 17           0
##        2008                   48              0                 19           0
##        2009                   79              0                 12           0
##        2010                   95              3                 18           0
##        2011                  118              5                 19           0
##        2012                  102             12                 13           0
##        2013                  202              9                  4           0
##        2014                  194              6                  7           0
##        2015                  173              9                 10           0
##        2016                  153              2                  6           0
##        2017                  165              0                  2           0
##        2018                  134              0                  0           0
##        2019                  121              0                  1           0
##        2020                  125              1                  2           0
##        2021                   76              0                  0           0
##        Sum                  1892             55                167           2
##            aid_type_group_name
## fiscal_year Project-Type Technical Assistance  Sum
##        2001           45                    1   50
##        2002           43                    8   61
##        2003           90                   13  127
##        2004           92                   12  127
##        2005          104                   16  149
##        2006          202                   25  256
##        2007          228                   33  313
##        2008          254                   43  364
##        2009          376                   63  530
##        2010          442                   91  649
##        2011          461                   73  676
##        2012          412                   63  602
##        2013          419                   49  683
##        2014          359                   29  595
##        2015          375                   27  594
##        2016          345                   27  533
##        2017          368                   28  563
##        2018          317                   20  471
##        2019          279                   11  412
##        2020          210                   16  354
##        2021          110                    2  188
##        Sum          5531                  650 8297
usw %>% select(fiscal_year, aid_type_group_name,assistance_category_name) %>% group_by(aid_type_group_name,assistance_category_name) %>% summarise(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(x = fct_reorder(aid_type_group_name, count), y = count, label = count))+
  geom_bar(stat = "identity", fill = "skyblue")+
  scale_fill_brewer()+
  theme_classic()+
  labs(
    title = "Number of Military projects in each Fiscal Year",
    subtitle = "Military projects",
    caption = "US aid program 2001-2023",
    x = "Fiscal year", y = "Military projects"
  )+ coord_flip()+ geom_text(hjust = 0.2)+ facet_grid(~assistance_category_name)
## `summarise()` has grouped output by 'aid_type_group_name'. You can override
## using the `.groups` argument.

source_year <- xtabs(~ channel_source + fiscal_year , data = usw)
addmargins(source_year)
##                               fiscal_year
## channel_source                 2001 2002 2003 2004 2005 2006 2007 2008 2009
##                                   0    0    0    0    0    0    0    0    0
##    International                 23   12   14   14   11   17   32   21   22
##    International Monetary Fund    0    0    0    0    0    0    0    0    0
##    Non United States              1    4   13   17   28   58   70   84  113
##    Other                          0    1    0    0    2    4    2    3    5
##    United Nations                 3    9   19    7    9   35   17   24   45
##    United States                 23   35   81   89   99  142  192  232  344
##    World Bank Group               0    0    0    0    0    0    0    0    1
##   Sum                            50   61  127  127  149  256  313  364  530
##                               fiscal_year
## channel_source                 2010 2011 2012 2013 2014 2015 2016 2017 2018
##                                   0    0    0    0    0    1    0    0    0
##    International                 17   24   16   14   13    6    2    2    2
##    International Monetary Fund    0    0    0    0    0    0    0    2    1
##    Non United States            164  194  213  263  236  247  213  211  173
##    Other                          6   12   12   12    8   13    8    3    5
##    United Nations                66   67   48   23   15   25   26   24   25
##    United States                396  373  311  369  321  299  282  319  263
##    World Bank Group               0    6    2    2    2    3    2    2    2
##   Sum                           649  676  602  683  595  594  533  563  471
##                               fiscal_year
## channel_source                 2019 2020 2021  Sum
##                                   0    0    0    1
##    International                  5    7    3  277
##    International Monetary Fund    0    0    0    3
##    Non United States            163  130   81 2676
##    Other                          6    6    2  110
##    United Nations                19   14    7  527
##    United States                218  195   95 4678
##    World Bank Group               1    2    0   25
##   Sum                           412  354  188 8297
usw %>% select(channel_source, fiscal_year,assistance_category_name) %>% group_by(channel_source,assistance_category_name) %>% summarize(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(x = fct_reorder(channel_source, count), count,label = count))+ geom_bar(stat= "identity", fill = "purple")+
  coord_flip()+theme(legend.position = "none")+geom_text(hjust = 0.25)+
  labs(
    title = "Number of projects per channel source",
    subtitle = "Channel sources of Projects",
    caption = "US aid program 2001-2023",
    y = "Number of projects", x = "Channel sources"
  )+facet_grid(~assistance_category_name)
## `summarise()` has grouped output by 'channel_source'. You can override using
## the `.groups` argument.

dac_cat_year <- xtabs(~ dac_category_name + fiscal_year , data = usw)

usw %>% select(dac_category_name, fiscal_year,assistance_category_name) %>% group_by(dac_category_name,assistance_category_name) %>% summarize(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(x = fct_reorder(dac_category_name, count), count,label = count))+ geom_bar(stat= "identity", fill = "orange")+
  coord_flip()+theme(legend.position = "none")+geom_text(hjust = 0.6)+
  labs(
    title = "Number of projects per Categories",
    subtitle = "Category sources of Projects",
    caption = "US aid program 2001-2023",
    y = "Number of projects", x = "Category Names"
  )+facet_grid(~assistance_category_name)
## `summarise()` has grouped output by 'dac_category_name'. You can override using
## the `.groups` argument.

activ_year <- xtabs(~ funding_agency_name + fiscal_year , data = usw)
addmargins(activ_year)
##                                            fiscal_year
## funding_agency_name                         2001 2002 2003 2004 2005 2006 2007
##   Department of Agriculture                    4   11   22   18    4   24    2
##   Department of Commerce                       0    0    0    0    0    0    0
##   Department of Defense                        1    3   23    6    7    6    5
##   Department of Energy                         0    0    0    0    0    0    4
##   Department of Health and Human Services      3    3    3    3    3    5    3
##   Department of Justice                        0    0    0    0    0    0    0
##   Department of Labor                          0    1    1    0    1    1    1
##   Department of State                         33   23   32   23   33   42   71
##   Department of the Air Force                  0    0    0    0    0    0    0
##   Department of the Army                       0    0    0    0    0    0    0
##   Department of the Interior                   0    0    0    0    0    0    0
##   Department of the Navy                       0    0    0    0    0    0    0
##   Department of the Treasury                   0    1    0    0    0    0    1
##   Trade and Development Agency                 0    3    6    3    5    3    5
##   U.S. Agency for International Development    9   16   40   74   96  175  221
##   Sum                                         50   61  127  127  149  256  313
##                                            fiscal_year
## funding_agency_name                         2008 2009 2010 2011 2012 2013 2014
##   Department of Agriculture                   11   23   37   42   39   13   11
##   Department of Commerce                       0    0    0    0    1    0    0
##   Department of Defense                        5    6   21    7    5    5   13
##   Department of Energy                         4    2    4    3    3    2    2
##   Department of Health and Human Services      3    4    4    2    1    3    2
##   Department of Justice                        1    0    0    0    3    1    0
##   Department of Labor                          0    2    0    0    0    0    0
##   Department of State                         58   76  102   95   85   97  120
##   Department of the Air Force                  0    0    0    0    0    0    0
##   Department of the Army                       0    1    3    1    1    1    4
##   Department of the Interior                   0    0    0    1    2    2    0
##   Department of the Navy                       0    0    0    0    0    0    0
##   Department of the Treasury                   1    2    2    4    0    0    0
##   Trade and Development Agency                 0    1    9    9    4    1    1
##   U.S. Agency for International Development  281  413  467  512  458  558  442
##   Sum                                        364  530  649  676  602  683  595
##                                            fiscal_year
## funding_agency_name                         2015 2016 2017 2018 2019 2020 2021
##   Department of Agriculture                   10   22   18   16   12    7    3
##   Department of Commerce                       0    0    0    1    0    0    0
##   Department of Defense                        9    5    3    2    4    1    0
##   Department of Energy                         1    1    1    1    1    1    0
##   Department of Health and Human Services      0    8   29   24   10    2    0
##   Department of Justice                        0    0    0    0    0    0    0
##   Department of Labor                          0    0    0    0    0    0    0
##   Department of State                         98   94   65   65   56   33   15
##   Department of the Air Force                  0    1    1    2    4    1    0
##   Department of the Army                       0    1    0    1    1    1    0
##   Department of the Interior                   0    0    0    1    0    0    0
##   Department of the Navy                       0    1    0    0    0    0    0
##   Department of the Treasury                   0    0    0    0    0    0    0
##   Trade and Development Agency                 0    0    0    0    0    1    0
##   U.S. Agency for International Development  476  400  446  358  324  307  170
##   Sum                                        594  533  563  471  412  354  188
##                                            fiscal_year
## funding_agency_name                          Sum
##   Department of Agriculture                  349
##   Department of Commerce                       2
##   Department of Defense                      137
##   Department of Energy                        30
##   Department of Health and Human Services    115
##   Department of Justice                        5
##   Department of Labor                          7
##   Department of State                       1316
##   Department of the Air Force                  9
##   Department of the Army                      15
##   Department of the Interior                   6
##   Department of the Navy                       1
##   Department of the Treasury                  11
##   Trade and Development Agency                51
##   U.S. Agency for International Development 6243
##   Sum                                       8297
usw %>% select(funding_agency_name, fiscal_year,assistance_category_name) %>% group_by(funding_agency_name,assistance_category_name) %>% summarize(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(x = fct_reorder(funding_agency_name, count), count,label = count))+ geom_bar(stat= "identity", fill = "pink")+
  coord_flip()+theme(legend.position = "none")+geom_text(hjust = 0.4)+
  labs(
    title = "Funding agencies for Projects",
    subtitle = "Details of funding agencies",
    caption = "US aid program 2001-2023",
    y = "Number of projects", x = "Funding agency"
  )+facet_grid(~assistance_category_name)
## `summarise()` has grouped output by 'funding_agency_name'. You can override
## using the `.groups` argument.

aid_gp_year <- xtabs(~ aid_type_group_name + fiscal_year , data = usw)
addmargins(aid_gp_year)
##                       fiscal_year
## aid_type_group_name    2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
##   Administrative Costs    1    5   16   17   20   20   28   48   79   95  118
##   Budget support          0    1    0    0    0    0    7    0    0    3    5
##   Core contributions      3    4    7    5    9    9   17   19   12   18   19
##   Debt relief             0    0    1    1    0    0    0    0    0    0    0
##   Project-Type           45   43   90   92  104  202  228  254  376  442  461
##   Technical Assistance    1    8   13   12   16   25   33   43   63   91   73
##   Sum                    50   61  127  127  149  256  313  364  530  649  676
##                       fiscal_year
## aid_type_group_name    2012 2013 2014 2015 2016 2017 2018 2019 2020 2021  Sum
##   Administrative Costs  102  202  194  173  153  165  134  121  125   76 1892
##   Budget support         12    9    6    9    2    0    0    0    1    0   55
##   Core contributions     13    4    7   10    6    2    0    1    2    0  167
##   Debt relief             0    0    0    0    0    0    0    0    0    0    2
##   Project-Type          412  419  359  375  345  368  317  279  210  110 5531
##   Technical Assistance   63   49   29   27   27   28   20   11   16    2  650
##   Sum                   602  683  595  594  533  563  471  412  354  188 8297
usw %>% select(aid_type_group_name, fiscal_year,assistance_category_name) %>% group_by(aid_type_group_name,assistance_category_name) %>% summarize(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(x = fct_reorder(aid_type_group_name, count), count,label = count))+ 
  geom_bar(stat= "identity", fill = "magenta")+
  coord_flip()+theme(legend.position = "none")+
  geom_text(hjust = 0.7)+
  labs(title = "Funding agencies for Projects",
    subtitle = "Details of funding agencies",
    caption = "US aid program 2001-2023",
    y = "Number of projects", x = "Funding agency")+
facet_grid(~assistance_category_name)
## `summarise()` has grouped output by 'aid_type_group_name'. You can override
## using the `.groups` argument.

chan_src_year <- xtabs(~ channel_source + fiscal_year , data = usw)
addmargins(chan_src_year)
##                               fiscal_year
## channel_source                 2001 2002 2003 2004 2005 2006 2007 2008 2009
##                                   0    0    0    0    0    0    0    0    0
##    International                 23   12   14   14   11   17   32   21   22
##    International Monetary Fund    0    0    0    0    0    0    0    0    0
##    Non United States              1    4   13   17   28   58   70   84  113
##    Other                          0    1    0    0    2    4    2    3    5
##    United Nations                 3    9   19    7    9   35   17   24   45
##    United States                 23   35   81   89   99  142  192  232  344
##    World Bank Group               0    0    0    0    0    0    0    0    1
##   Sum                            50   61  127  127  149  256  313  364  530
##                               fiscal_year
## channel_source                 2010 2011 2012 2013 2014 2015 2016 2017 2018
##                                   0    0    0    0    0    1    0    0    0
##    International                 17   24   16   14   13    6    2    2    2
##    International Monetary Fund    0    0    0    0    0    0    0    2    1
##    Non United States            164  194  213  263  236  247  213  211  173
##    Other                          6   12   12   12    8   13    8    3    5
##    United Nations                66   67   48   23   15   25   26   24   25
##    United States                396  373  311  369  321  299  282  319  263
##    World Bank Group               0    6    2    2    2    3    2    2    2
##   Sum                           649  676  602  683  595  594  533  563  471
##                               fiscal_year
## channel_source                 2019 2020 2021  Sum
##                                   0    0    0    1
##    International                  5    7    3  277
##    International Monetary Fund    0    0    0    3
##    Non United States            163  130   81 2676
##    Other                          6    6    2  110
##    United Nations                19   14    7  527
##    United States                218  195   95 4678
##    World Bank Group               1    2    0   25
##   Sum                           412  354  188 8297
usw %>% select(channel_source, fiscal_year,assistance_category_name) %>% group_by(channel_source,assistance_category_name) %>% summarize(count = n()) %>% arrange(desc(count)) %>% 
  ggplot(aes(x = fct_reorder(channel_source, count),y=count, label = count))+
  geom_bar(stat = "identity", fill = "blue")+
  coord_flip()+theme(legend.position = "none")+
  geom_text(vjust = -0.4)+
  labs(title = "Funding agencies for Projects",
    subtitle = "Details of funding agencies",
    caption = "US aid program 2001-2023",
    y = "Number of projects", x = "Funding agency")+
facet_wrap(~assistance_category_name)
## `summarise()` has grouped output by 'channel_source'. You can override using
## the `.groups` argument.

library(forcats)
chan_nat_year <- xtabs(~ channel_nature + fiscal_year , data = usw)
addmargins(chan_nat_year)
##                                        fiscal_year
## channel_nature                          2001 2002 2003 2004 2005 2006 2007 2008
##   Church and Faith Based                   1    0    4    3    3    4    9    7
##   Enterprises                              2    7   21   47   51   90  114  154
##   Government                               4   15   41   19   24   41   62   80
##   Multilateral                             3   10   19    7   11   39   19   27
##   NGO                                     40   29   40   48   56   75  102   87
##   Public and Private Partnerships          0    0    0    0    0    0    0    0
##   Universities and Research Institutes     0    0    2    3    4    7    7    9
##   Sum                                     50   61  127  127  149  256  313  364
##                                        fiscal_year
## channel_nature                          2009 2010 2011 2012 2013 2014 2015 2016
##   Church and Faith Based                   9   10   10    8    8    5   12   15
##   Enterprises                            225  269  264  211  309  244  256  202
##   Government                             109  153  138  130  146  164  145  149
##   Multilateral                            51   72   85   62   37   25   41   36
##   NGO                                    128  135  169  175  173  137  114  113
##   Public and Private Partnerships          0    0    0    0    0    0    1    0
##   Universities and Research Institutes     8   10   10   16   10   20   25   18
##   Sum                                    530  649  676  602  683  595  594  533
##                                        fiscal_year
## channel_nature                          2017 2018 2019 2020 2021  Sum
##   Church and Faith Based                   9    5    5    1    0  128
##   Enterprises                            214  178  174  156   98 3286
##   Government                             181  138  112  110   51 2012
##   Multilateral                            31   33   26   22    9  665
##   NGO                                    109   98   80   54   28 1990
##   Public and Private Partnerships          0    0    0    0    0    1
##   Universities and Research Institutes    19   19   15   11    2  215
##   Sum                                    563  471  412  354  188 8297
usw %>% select(channel_nature, fiscal_year,assistance_category_name) %>% group_by(channel_nature,assistance_category_name) %>% summarize(count = n()) %>% arrange(desc(count)) %>% mutate(channel_nature = as.factor(channel_nature)) %>%
  ggplot(aes(x = fct_reorder(channel_nature, count),y=count, label = count))+
  geom_bar(stat = "identity", fill = "blue")+
  coord_flip()+theme(legend.position = "none")+
  geom_text(vjust = -0.4)+
  labs(title = "Funding agencies for Projects",
    subtitle = "Details of funding agencies",
    caption = "US aid program 2001-2023",
    y = "Number of projects", x = "Funding agency")+
facet_wrap(~assistance_category_name)
## `summarise()` has grouped output by 'channel_nature'. You can override using
## the `.groups` argument.

head(usw)
## # A tibble: 6 × 19
##   `_id` implementing_agency_name            subagency_name channel_category_name
##   <dbl> <chr>                               <chr>          <chr>                
## 1     1 U.S. Agency for International Deve… Bureau for As… Government           
## 2     2 Department of Defense               Office of the… Government           
## 3     3 Department of Defense               Office of the… Government           
## 4     4 Department of Defense               Defense Secur… Government           
## 5     5 Department of Defense               Defense Secur… Government           
## 6     6 Department of Defense               Defense Secur… Government           
## # ℹ 15 more variables: channel_subcategory_name <chr>, channel_name <chr>,
## #   dac_category_name <chr>, dac_sector_name <chr>, dac_purpose_name <chr>,
## #   funding_account_name <chr>, assistance_category_name <chr>,
## #   activity_name <chr>, fiscal_year <dbl>, current_amount <dbl>,
## #   funding_agency_name <chr>, aid_type_group_name <chr>,
## #   activity_description <chr>, channel_nature <chr>, channel_source <chr>
colnames(usw)
##  [1] "_id"                      "implementing_agency_name"
##  [3] "subagency_name"           "channel_category_name"   
##  [5] "channel_subcategory_name" "channel_name"            
##  [7] "dac_category_name"        "dac_sector_name"         
##  [9] "dac_purpose_name"         "funding_account_name"    
## [11] "assistance_category_name" "activity_name"           
## [13] "fiscal_year"              "current_amount"          
## [15] "funding_agency_name"      "aid_type_group_name"     
## [17] "activity_description"     "channel_nature"          
## [19] "channel_source"

Dig down projects per year

usw <- usw %>% 
  mutate(NumChar = nchar(activity_description), 
         NumWords = str_count(activity_description, patter = "\\w+" ))
  
  
range(usw$fiscal_year)
## [1] 2001 2021
datatable(usw %>% select(fiscal_year, activity_description,assistance_category_name) %>% 
            filter(fiscal_year == 2001 & assistance_category_name == "Economic"), caption = NULL, options = list(dom="t"))
datatable(usw %>% select(fiscal_year, activity_description,assistance_category_name) %>% 
            filter(fiscal_year == 2001 & assistance_category_name == "Military"), caption = NULL, options = list(dom="t"))
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:xgboost':
## 
##     slice
## The following object is masked from 'package:lightgbm':
## 
##     slice
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
usw %>% select(NumChar, NumWords, fiscal_year) %>% 
  ggplot(aes(x = NumChar, y = NumWords,col = as.factor(fiscal_year))) +
  geom_point() +
  labs(title = "Number of character vs Number of words by year",
       x = "Number of Characters",
       col = "fiscal_year",
       y = "Number of words")

usw %>% select(NumChar) %>% 
  ggplot(aes(x = NumChar)) +
  geom_histogram(fill = "darkred", bins = 30) +
  labs(title = "Number of characters used",
       x = "Number of characters")

#Number of characters by Year(Boxplot)

usw %>% select(NumChar, fiscal_year) %>% 
  ggplot(aes(x = as.factor(fiscal_year), y = NumChar))+
  geom_boxplot(aes(fill = as.factor(fiscal_year)))+
  labs(title = "Number of character by year",
       x = "Year",
       fill = "year",
       y = "Number of Characters")+
  theme(axis.text.x = element_text(angle = 90,vjust=0.5, size=7), legend.position = "none")

library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.2.3
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.3
usw %>% select(activity_description) %>% unnest_tokens(word, activity_description) %>% anti_join(get_stopwords()) %>% 
  count(word, sort = TRUE) %>% 
  slice_max(n, n = 20) %>% 
  ggplot(aes(
    x = fct_reorder(word, n), y =n, fill = word))+
  geom_col()+ coord_flip()+
  theme(legend.position = "none") +
  labs(
    title = "The most word used",
    subtitle = "Bar plot, The most word used",
    caption = "Kaggle: All Trump's Twitter insults (2015-2021)",
    x = "word",
    y = "Count"
  )
## Joining with `by = join_by(word)`

#bigram

usw %>% select(activity_description, fiscal_year) %>% 
  unnest_tokens(Bigram, activity_description, token = "ngrams", n = 2) %>%
  separate(Bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stop_words$word,!word2 %in% stop_words$word) %>%
  unite(Bigram, word1, word2, sep = " ") %>%
  filter(Bigram != "https t.co") %>%
  count(Bigram, fiscal_year, sort = TRUE) %>%
  mutate(Bigram = reorder_within(Bigram, n, fiscal_year)) %>% 
  slice_max(n, n = 30) %>% 
  ggplot(aes(
    x = fct_reorder(Bigram, n),
    y = n,
    fill = Bigram
  )) +
  geom_col() +
  coord_flip() +
  facet_wrap(vars(fiscal_year), scales = "free_y", ncol = 2) +
  scale_x_reordered() +
  scale_x_reordered() +
  theme(legend.position = "none") +
  labs(
    title = "The most words used (Bigram)",
    subtitle = "Bar plot and facet wrap, The most words used (Bigram)",
    caption = "US aid program 2001-2022",
    x = "Bigram",
    y = "Count"
  )
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.