Federal Research and Development Spending by Agency

GitHub Link

The NY Times has a recent article on federal Research and Development spending by agency.Data comes directly from the American Association for the Advancement of Science Historical Trends. In this Assignment we will break down the spending by political parties in the US for the periods supplied to establish patterns in Democrat and Republican Party spending for future Governments. Predictive Analytics will be run through the data and represented in Graphical Format.

EXPLORE DATA

CLIMATE SPEND

##Democrats and Republic Split in spending

## # A tibble: 2 x 2
##   party_in_power gcc_spending
##   <chr>                 <dbl>
## 1 Democrat             22773.
## 2 Republican           20882.

Build a Model For Climate Spend

## # Bootstrap sampling 
## # A tibble: 25 x 2
##    splits           id         
##    <list>           <chr>      
##  1 <split [126/43]> Bootstrap01
##  2 <split [126/46]> Bootstrap02
##  3 <split [126/43]> Bootstrap03
##  4 <split [126/47]> Bootstrap04
##  5 <split [126/41]> Bootstrap05
##  6 <split [126/41]> Bootstrap06
##  7 <split [126/41]> Bootstrap07
##  8 <split [126/44]> Bootstrap08
##  9 <split [126/45]> Bootstrap09
## 10 <split [126/48]> Bootstrap10
## # … with 15 more rows
## ══ Workflow ══════════════════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: rand_forest()
## 
## ── Preprocessor ──────────────────────────────────────────────────────────────────
## 0 Recipe Steps
## 
## ── Model ─────────────────────────────────────────────────────────────────────────
## Random Forest Model Specification (classification)
## 
## Main Arguments:
##   trees = 1000
## 
## Computational engine: ranger
climate_res %>%
  collect_metrics()
## # A tibble: 2 x 5
##   .metric  .estimator  mean     n std_err
##   <chr>    <chr>      <dbl> <int>   <dbl>
## 1 accuracy binary     0.903    25 0.0117 
## 2 roc_auc  binary     0.980    25 0.00494
##             Truth
## Prediction   Democrat Republican
##   Democrat        512         55
##   Republican       56        515
climate_res %>%
  collect_predictions() %>%
  group_by(id) %>%
  ppv(party_in_power, .pred_class) %>% 
  ggplot(aes(.estimate)) +
  geom_histogram(bins=10)  +
  ggtitle("Climate Spend Data Model Histogram")

##set importance

## # A tibble: 1,138 x 10
##    id    .pred_Democrat .pred_Republican  .row .pred_class party_in_power
##    <chr>          <dbl>            <dbl> <int> <fct>       <fct>         
##  1 Boot…         0.541             0.459     1 Democrat    Democrat      
##  2 Boot…         0.163             0.837     2 Republican  Republican    
##  3 Boot…         0.0548            0.945     3 Republican  Republican    
##  4 Boot…         0.0458            0.954     6 Republican  Republican    
##  5 Boot…         0.0520            0.948    22 Republican  Republican    
##  6 Boot…         0.0517            0.948    23 Republican  Republican    
##  7 Boot…         0.0551            0.945    24 Republican  Republican    
##  8 Boot…         0.124             0.876    25 Republican  Republican    
##  9 Boot…         0.213             0.787    27 Republican  Republican    
## 10 Boot…         0.857             0.143    29 Democrat    Democrat      
## # … with 1,128 more rows, and 4 more variables: correct <lgl>,
## #   department <fct>, year <dbl>, gcc_spending <dbl>

ENERGY SPEND

Build a Model For Energy Spend

Data Frame, Recipe and Predictive Model for Climate Spend

## # Bootstrap sampling 
## # A tibble: 25 x 2
##    splits           id         
##    <list>           <chr>      
##  1 <split [231/83]> Bootstrap01
##  2 <split [231/88]> Bootstrap02
##  3 <split [231/78]> Bootstrap03
##  4 <split [231/82]> Bootstrap04
##  5 <split [231/84]> Bootstrap05
##  6 <split [231/83]> Bootstrap06
##  7 <split [231/84]> Bootstrap07
##  8 <split [231/86]> Bootstrap08
##  9 <split [231/80]> Bootstrap09
## 10 <split [231/76]> Bootstrap10
## # … with 15 more rows
## ══ Workflow ══════════════════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: rand_forest()
## 
## ── Preprocessor ──────────────────────────────────────────────────────────────────
## 0 Recipe Steps
## 
## ── Model ─────────────────────────────────────────────────────────────────────────
## Random Forest Model Specification (classification)
## 
## Main Arguments:
##   trees = 1000
## 
## Computational engine: ranger
energy_res %>%
  collect_metrics()
## # A tibble: 2 x 5
##   .metric  .estimator  mean     n  std_err
##   <chr>    <chr>      <dbl> <int>    <dbl>
## 1 accuracy binary     0.968    25 0.00479 
## 2 roc_auc  binary     0.999    25 0.000361
energy_res %>%
  collect_predictions() %>%
  conf_mat(party_in_power1,.pred_class)
##             Truth
## Prediction   Democrat Republican
##   Democrat       1325         55
##   Republican       13        744
energy_res %>%
  collect_predictions() %>%
  group_by(id) %>%
  ppv(party_in_power1, .pred_class) %>% 
  ggplot(aes(.estimate)) +
  geom_histogram(bins=10)  +
  ggtitle("Energy Spend Data Model Histogram")

##set importance

energy_pred<- energy_res %>%
  collect_predictions() %>%
  mutate(correct = party_in_power1 == .pred_class) %>%
  #count(correct)
  left_join(energy_df %>%
               mutate(.row = row_number()))

energy_pred
## # A tibble: 2,137 x 10
##    id    .pred_Democrat .pred_Republican  .row .pred_class party_in_power1
##    <chr>          <dbl>            <dbl> <int> <fct>       <fct>          
##  1 Boot…          0.851           0.149      5 Democrat    Democrat       
##  2 Boot…          0.503           0.497      6 Democrat    Republican     
##  3 Boot…          0.229           0.771      8 Republican  Republican     
##  4 Boot…          0.228           0.772      9 Republican  Republican     
##  5 Boot…          0.225           0.775     11 Republican  Republican     
##  6 Boot…          0.864           0.136     13 Democrat    Democrat       
##  7 Boot…          0.970           0.0304    17 Democrat    Democrat       
##  8 Boot…          0.768           0.232     22 Democrat    Democrat       
##  9 Boot…          0.648           0.352     24 Democrat    Democrat       
## 10 Boot…          0.618           0.382     26 Democrat    Democrat       
## # … with 2,127 more rows, and 4 more variables: correct <lgl>,
## #   department <fct>, year <dbl>, energy_spending <dbl>
ggplot() +
  geom_line(data = energy_spend,
            aes(year, energy_spending, color = department)) +
  stat_summary(data = energy_pred,
               aes(year, energy_spending, z = as.integer(correct)),
               fun = "mean",
               alpha = 0.7) +
  ggtitle("Energy Spend Data Predictive Model by Energy Spend per Year")+
  xlab("Year") + # for the x axis label
  ylab("Spending Amount $millions (US)")

FED RD

Build a Model For FED RD Budget

## # Bootstrap sampling 
## # A tibble: 25 x 2
##    splits            id         
##    <list>            <chr>      
##  1 <split [588/220]> Bootstrap01
##  2 <split [588/223]> Bootstrap02
##  3 <split [588/225]> Bootstrap03
##  4 <split [588/206]> Bootstrap04
##  5 <split [588/220]> Bootstrap05
##  6 <split [588/214]> Bootstrap06
##  7 <split [588/220]> Bootstrap07
##  8 <split [588/223]> Bootstrap08
##  9 <split [588/213]> Bootstrap09
## 10 <split [588/224]> Bootstrap10
## # … with 15 more rows
## ══ Workflow ══════════════════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: rand_forest()
## 
## ── Preprocessor ──────────────────────────────────────────────────────────────────
## 0 Recipe Steps
## 
## ── Model ─────────────────────────────────────────────────────────────────────────
## Random Forest Model Specification (classification)
## 
## Main Arguments:
##   trees = 1000
## 
## Computational engine: ranger
## # A tibble: 2 x 5
##   .metric  .estimator  mean     n  std_err
##   <chr>    <chr>      <dbl> <int>    <dbl>
## 1 accuracy binary      1.00    25 0.000181
## 2 roc_auc  binary      1       25 0
##             Truth
## Prediction   Republican Democrat
##   Republican       2677        0
##   Democrat            1     2721

##set importance

## # A tibble: 7,743 x 12
##    id    .pred_Republican .pred_Democrat  .row .pred_class party_in_power2
##    <chr>            <dbl>          <dbl> <int> <fct>       <fct>          
##  1 Boot…            0.938         0.0617     3 Republican  Republican     
##  2 Boot…            0.956         0.0439     4 Republican  Republican     
##  3 Boot…            0.958         0.0424     6 Republican  Republican     
##  4 Boot…            0.911         0.0891     7 Republican  Republican     
##  5 Boot…            0.969         0.0313    10 Republican  Republican     
##  6 Boot…            0.824         0.176     12 Republican  Republican     
##  7 Boot…            0.672         0.328     13 Republican  Republican     
##  8 Boot…            0.885         0.115     14 Republican  Republican     
##  9 Boot…            0.824         0.176     15 Republican  Republican     
## 10 Boot…            0.879         0.121     17 Republican  Republican     
## # … with 7,733 more rows, and 6 more variables: correct <lgl>,
## #   department <chr>, year <dbl>, rd_budget <dbl>, gdp <dbl>, rd_gdp <dbl>
ggplot() +
  geom_line(data = fed_rd_df,
            aes(year, rd_gdp * 100 , color = department)) +
  stat_summary(data = fed_rd_pred,
               aes(year, rd_gdp * 100 , z = as.integer(correct)),
               fun = "mean",
               alpha = 0.7) +
ggtitle("Data Frame Prediction of Fed RD Budget by %GDP")+
  xlab("Year") + # for the x axis label
  ylab("% of GDP") 

```