Author: Suriya Manimaran - 836854117

Question 1

Graph 1
#Question 1 Graph 1
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
library(scales)
library(ggrepel)
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
# Load the data 
data_all <- read_xlsx("NSF_Funding_by_Year.xlsx")
view(data_all)

Data Preparation for Bar, Scatter and Bubble plots

#Create a subset for visualization using the `filter()` and `select()` functions from the `dplyr` package.

data_subset2 <- data_all %>%
  dplyr::select(Organization, AwardYear, AWARD_SUM, AWARD_COUNT, AvarageAwardDuration) %>%
  filter(AwardYear == "2012") %>%
  filter(str_detect(Organization, "Washington") | str_detect(Organization, "Virginia") | str_detect(Organization, "Tennessee") | str_detect(Organization, "Illinois") | str_detect(Organization, "California") | str_detect(Organization, "Texas A&M") | str_detect(Organization, "Purdue") | str_detect(Organization, "Oregon") | str_detect(Organization, "North Carolina") | str_detect(Organization, "Michigan") | str_detect(Organization, "Kansas") | str_detect(Organization, "Iowa") | str_detect(Organization, "Colorado") | str_detect(Organization, "Oklahoma") ) 

# %>% droplevels()
# View Filtered Dataset
view(data_subset2)
data_subset2$Organization <- as.factor(data_subset2$Organization)
levels(data_subset2$Organization)
##  [1] "California Academy of Sciences"                                 
##  [2] "California Institute of Technology"                             
##  [3] "California Polytechnic State University Foundation"             
##  [4] "California State L A University Auxiliary Services Inc."        
##  [5] "California State University-Bakersfield"                        
##  [6] "California State University-Dominguez Hills"                    
##  [7] "California State University-Fresno Foundation"                  
##  [8] "California State University-Fullerton Foundation"               
##  [9] "California State University-Long Beach Foundation"              
## [10] "California State University-San Bernardino Foundation"          
## [11] "California State University-Stanislaus"                         
## [12] "California State University, Chico Research Fdtn"               
## [13] "Carnegie Institution of Washington"                             
## [14] "Central Michigan University"                                    
## [15] "Central Washington University"                                  
## [16] "College of William & Mary Virginia Institute of Marine Science" 
## [17] "Colorado School of Mines"                                       
## [18] "Colorado State University"                                      
## [19] "Eastern Illinois University"                                    
## [20] "Eastern Michigan University"                                    
## [21] "Friends of the North Carolina State Museum of Natural Sciences" 
## [22] "George Washington University"                                   
## [23] "Illinois Institute of Technology"                               
## [24] "Illinois State University"                                      
## [25] "Illinois Wesleyan University"                                   
## [26] "Iowa State University"                                          
## [27] "Kansas State University"                                        
## [28] "Michigan State University"                                      
## [29] "Michigan Technological University"                              
## [30] "Middle Tennessee State University"                              
## [31] "North Carolina Agricultural & Technical State University"       
## [32] "North Carolina Central University"                              
## [33] "North Carolina State University"                                
## [34] "Northeastern Illinois University"                               
## [35] "Northern Illinois University"                                   
## [36] "Oklahoma State University"                                      
## [37] "Oregon Health and Science University"                           
## [38] "Oregon Museum of Science and Industry"                          
## [39] "Oregon State University"                                        
## [40] "Purdue University"                                              
## [41] "Southern Illinois University at Carbondale"                     
## [42] "Southern Illinois University at Edwardsville"                   
## [43] "Tennessee State University"                                     
## [44] "Tennessee Technological University"                             
## [45] "Texas A&M International University"                             
## [46] "Texas A&M Research Foundation"                                  
## [47] "Texas A&M University-Commerce"                                  
## [48] "Texas A&M University Corpus Christi"                            
## [49] "Texas A&M University Main Campus"                               
## [50] "University of California-Berkeley"                              
## [51] "University of California-Davis"                                 
## [52] "University of California-Irvine"                                
## [53] "University of California-Los Angeles"                           
## [54] "University of California-Riverside"                             
## [55] "University of California-San Diego"                             
## [56] "University of California-San Diego Scripps Inst of Oceanography"
## [57] "University of California-San Francisco"                         
## [58] "University of California-Santa Barbara"                         
## [59] "University of California-Santa Cruz"                            
## [60] "University of California - Merced"                              
## [61] "University of Colorado at Boulder"                              
## [62] "University of Colorado at Colorado Springs"                     
## [63] "University of Colorado at Denver"                               
## [64] "University of Colorado at Denver and Health Sciences Center"    
## [65] "University of Illinois at Chicago"                              
## [66] "University of Illinois at Urbana-Champaign"                     
## [67] "University of Iowa"                                             
## [68] "University of Kansas Center for Research Inc"                   
## [69] "University of Michigan Ann Arbor"                               
## [70] "University of Missouri-Kansas City"                             
## [71] "University of North Carolina at Chapel Hill"                    
## [72] "University of North Carolina at Charlotte"                      
## [73] "University of North Carolina at Wilmington"                     
## [74] "University of North Carolina Greensboro"                        
## [75] "University of Northern Colorado"                                
## [76] "University of Oklahoma Norman Campus"                           
## [77] "University of Oregon Eugene"                                    
## [78] "University of Southern California"                              
## [79] "University of Tennessee Chattanooga"                            
## [80] "University of Tennessee Knoxville"                              
## [81] "University of Virginia Main Campus"                             
## [82] "University of Washington"                                       
## [83] "Virginia Commonwealth University"                               
## [84] "Virginia Polytechnic Institute and State University"            
## [85] "Virginia State University"                                      
## [86] "Washington State University"                                    
## [87] "Washington University"                                          
## [88] "West Virginia University Research Corporation"                  
## [89] "Western Michigan University"                                    
## [90] "Western Washington University"
data_alternative <- data_subset2 %>%
  mutate(Names = str_replace(Organization, ".*Washington State University.*", "WSU")) %>%
  mutate(Names = str_replace(Names, ".*Virginia Polytechnic Institute and State University.*", "VPISU")) %>% 
  mutate(Names = str_replace(Names, ".*University of Tennessee.*", "UT")) %>%
  mutate(Names = str_replace(Names, ".*University of Illinois at Urbana-Champaign.*", "UIUC")) %>%
  mutate(Names = str_replace(Names, ".*University of California-Davis.*", "UCD")) %>%
  mutate(Names = str_replace(Names, ".*Texas A&M University.*", "TAMU")) %>%
  mutate(Names = str_replace(Names, ".*Purdue University.*", "PU")) %>%
  mutate(Names = str_replace(Names, ".*Oregon State University.*", "ORSU")) %>%
  mutate(Names = str_replace(Names, ".*Oklahoma State University.*", "OKSU")) %>%
  mutate(Names = str_replace(Names, ".*North Carolina State University.*", "NCSU")) %>%
  mutate(Names = str_replace(Names, ".*Michigan State University.*", "MSU")) %>%
  mutate(Names = str_replace(Names, ".*Kansas State University.*", "KSU")) %>%
  mutate(Names = str_replace(Names, ".*Iowa State University.*", "ISU")) %>%
  mutate(Names = str_replace(Names, ".*Colorado State University.*", "CSU")) %>%
  filter (Names == "WSU" | Names == "VPISU" | Names == "UT" | Names == "UIUC" | Names == "UCD" | Names == "TAMU" | Names == "PU" | Names == "ORSU" | Names == "OKSU" | Names == "NCSU" | Names == "MSU" | Names == "KSU" | Names == "ISU" | Names == "CSU" ) %>%
  droplevels()
levels(as.factor(data_alternative$Names))
##  [1] "CSU"   "ISU"   "KSU"   "MSU"   "NCSU"  "OKSU"  "ORSU"  "PU"    "TAMU" 
## [10] "UCD"   "UIUC"  "UT"    "VPISU" "WSU"
data_alternative_summary <- data_alternative %>% 
  group_by(Names) %>%
  summarise(as=sum(AWARD_SUM), ac=sum(AWARD_COUNT), ay=sum(AwardYear))
data_alternative_summary
## # A tibble: 14 × 4
##    Names       as    ac    ay
##    <chr>    <dbl> <dbl> <dbl>
##  1 CSU   11592315    41  2012
##  2 ISU   16738011    65  2012
##  3 KSU    8839116    23  2012
##  4 MSU   17351663    58  2012
##  5 NCSU  31440526    91  2012
##  6 OKSU   5331516    21  2012
##  7 ORSU  23180107    86  2012
##  8 PU    33991966   121  2012
##  9 TAMU  15471170    59  6036
## 10 UCD   24867252    87  2012
## 11 UIUC  43536459   153  2012
## 12 UT    13646485    51  4024
## 13 VPISU 19609620    78  2012
## 14 WSU   14509261    43  2012

Data Visualization

Initializing - Bar Plots

head(data_alternative_summary)
## # A tibble: 6 × 4
##   Names       as    ac    ay
##   <chr>    <dbl> <dbl> <dbl>
## 1 CSU   11592315    41  2012
## 2 ISU   16738011    65  2012
## 3 KSU    8839116    23  2012
## 4 MSU   17351663    58  2012
## 5 NCSU  31440526    91  2012
## 6 OKSU   5331516    21  2012
ggplot(data_alternative_summary, aes(y = reorder(Names, ac), x = ac)) +
  geom_col()+
  scale_x_continuous(labels = comma)+
  labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar Plot",
       subtitle = "CSU vs. Other-Orgs",
       x = "AwardCount",
       y = "Organizations") +
   theme(aspect.ratio = 1/2.5)

Add a column which indicates whether a category should be highlighted:

data_alternative_summary <- data_alternative_summary %>%
  mutate(Highlight = ifelse(Names=="CSU", "Yes", "No"))
data_alternative_summary <- data_alternative_summary %>%
  mutate(Highlight_a = ifelse(Names=="CSU", "CSU", ""))

Bar Plots (CSU VS Other Orgs)

ggplot(data_alternative_summary, aes(y = reorder(Names, as), x = as / 1000000, fill=Highlight)) +
  geom_col()+
  scale_x_continuous(labels = comma)+
  geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),  
                  size = 3, 
                  hjust = -2, 
                  nudge_y = 0.2, 
                  nudge_x = 0.2,
                  direction = "x",  
                  color = "red",  # Different color for Award Sum
                  #box.padding = 0.5,  
                 # point.padding = 0.5,  
                  max.overlaps = Inf) +
   geom_text_repel(aes(label = paste0("Awards Count: ", ac)),  
                  size = 3,  
                  hjust = -1, 
                  nudge_y = 0.5, 
                  nudge_x = 0.5,
                  direction = "x",  
                  color = "blue",  
                  #box.padding = 2,  
                  #point.padding = 2,  
                  max.overlaps = Inf) +
  labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar plot",
       subtitle = "CSU vs. Other-Orgs",
       x = "Award Sum: (USD) - M$",
       y = "Organizations")+
  scale_fill_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)

## Compare the effects of color=Highlight and fill=Highlight: ## Bar Plot - (CSU VS Other Orgs)

ggplot(data_alternative_summary, aes(y = reorder(Names, as), x = as / 1000000, color=Highlight)) +
  geom_col()+
  scale_x_continuous(labels = comma)+
  geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),  
                  size = 3, 
                  hjust = -2, 
                  nudge_y = 0.2, 
                  nudge_x = 0.2,
                  direction = "x",  
                  color = "red",  # Different color for Award Sum
                  #box.padding = 0.5,  
                 # point.padding = 0.5,  
                  max.overlaps = Inf) +
   geom_text_repel(aes(label = paste0("Awards Count: ", ac)),  
                  size = 3,  
                  hjust = -1, 
                  nudge_y = 0.5, 
                  nudge_x = 0.5,
                  direction = "x",  
                  color = "blue",  
                  #box.padding = 2,  
                  #point.padding = 2,  
                  max.overlaps = Inf) +
  labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar plot",
       subtitle = "CSU vs. Other-Orgs",
       x = "Award Sum: (USD) - M$",
       y = "Organizations")+
  scale_fill_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)

Scatter Plot (CSU VS Other Orgs)

ggplot(data_alternative_summary, aes(y = as/1000000, x = ac , color = Highlight)) +
  geom_point(alpha=0.5, stroke = 1.5)+
  geom_text_repel(aes(label = Names), hjust=0.5, vjust=0.5)+
  geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),  
                  size = 3, 
                  hjust = 1, 
                  nudge_y = 2,  # Move below Award Count
                  direction = "x",  
                  color = "red",  # Different color for Award Sum
                  box.padding = 0.5,  
                  point.padding = 0.5,  
                  max.overlaps = Inf) +
   geom_text_repel(aes(label = paste0("Awards Count: ", ac)),  
                  size = 3,  
                  hjust = 1, 
                  nudge_y = 5,  
                  direction = "y",  
                  color = "blue",  
                  box.padding = 0.2,  
                  point.padding = 0.2,  
                  max.overlaps = Inf) +
  scale_y_continuous(labels = comma)+
 labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Scatter Plot",
       subtitle = "CSU vs. Other-Orgs",
       x = "AwardCount",
       y = "Award Sum: (USD) - M$")+
  scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)

Bubble Chart (CSU VS Other Orgs)

ggplot(data_alternative_summary, aes(y = as/1000000, x = ac , size = ac , color = Highlight)) +
  geom_point(alpha=0.5, stroke = 1.5)+
  geom_text_repel(aes(label = Names), size=5)+
  geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),  
                  size = 3, 
                  hjust = 1, 
                  nudge_y = 2,  
                  direction = "x",  
                  color = "red", 
                  box.padding = 0.3,  
                  point.padding = 0.3,  
                  max.overlaps = Inf) +
  geom_text_repel(aes(label = paste0("Awards Count: ", ac)),  
                  size = 3,  
                  hjust = 1, 
                  nudge_y = 5,  
                  direction = "y",  
                  color = "blue",  
                  box.padding = 0.5,  
                  point.padding = 0.5,  
                  max.overlaps = Inf) +
  scale_x_continuous(labels = comma)+
 labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bubble plot",
       subtitle = "CSU vs. Other-Orgs",
       x = "AwardCount",
       y = "Award Sum: (USD) - M$")+
  scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)

## Question 2: Time series

Time Series - Data Preparation

#Create a subset for visualization using the `filter()` and `select()` functions from the `dplyr` package.

data_time_subset <- data_all %>%
  dplyr::select(Organization, AwardYear, AWARD_SUM, AWARD_COUNT, AvarageAwardDuration) %>%
  filter(str_detect(Organization, "Washington") | str_detect(Organization, "Virginia") | str_detect(Organization, "Tennessee") | str_detect(Organization, "Illinois") | str_detect(Organization, "California") | str_detect(Organization, "Texas A&M") | str_detect(Organization, "Purdue") | str_detect(Organization, "Oregon") | str_detect(Organization, "North Carolina") | str_detect(Organization, "Michigan") | str_detect(Organization, "Kansas") | str_detect(Organization, "Iowa") | str_detect(Organization, "Colorado") | str_detect(Organization, "Oklahoma") ) 

# %>% droplevels()
# View Filtered Dataset
view(data_time_subset)
data_time_subset$Organization <- as.factor(data_time_subset$Organization)
levels(data_time_subset$Organization)
##   [1] "Benaroya Research Institute at Virginia Mason"                   
##   [2] "California Academy of Sciences"                                  
##   [3] "California Institute of Technology"                              
##   [4] "California Lutheran University"                                  
##   [5] "California Polytechnic State University Foundation"              
##   [6] "California State L A University Auxiliary Services Inc."         
##   [7] "California State University-Bakersfield"                         
##   [8] "California State University-Dominguez Hills"                     
##   [9] "California State University-Fresno Foundation"                   
##  [10] "California State University-Fullerton Foundation"                
##  [11] "California State University-Long Beach"                          
##  [12] "California State University-Long Beach Foundation"               
##  [13] "California State University-Los Angeles"                         
##  [14] "California State University-San Bernardino Foundation"           
##  [15] "California State University-Stanislaus"                          
##  [16] "California State University Bakersfield Foundation"              
##  [17] "California State University Monterey Bay Foundation"             
##  [18] "California State University, Chico Research Fdtn"                
##  [19] "California State University, East Bay Foundation, Inc."          
##  [20] "California State University, Trustees"                           
##  [21] "Carnegie Institution of Washington"                              
##  [22] "Central Michigan University"                                     
##  [23] "Central Virginia Community College"                              
##  [24] "Central Washington University"                                   
##  [25] "City Colleges of Chicago Harold Washington College"              
##  [26] "College of William & Mary Virginia Institute of Marine Science"  
##  [27] "Colorado College"                                                
##  [28] "Colorado School of Mines"                                        
##  [29] "Colorado State University"                                       
##  [30] "Colorado State University-Pueblo"                                
##  [31] "East Tennessee State University"                                 
##  [32] "Eastern Illinois University"                                     
##  [33] "Eastern Iowa Community College"                                  
##  [34] "Eastern Michigan University"                                     
##  [35] "Eastern Washington University"                                   
##  [36] "Friends of the North Carolina State Museum of Natural Sciences"  
##  [37] "George Washington University"                                    
##  [38] "Illinois Institute of Technology"                                
##  [39] "Illinois State Museum Society"                                   
##  [40] "Illinois State University"                                       
##  [41] "Illinois Wesleyan University"                                    
##  [42] "Iowa State University"                                           
##  [43] "Kansas City University of Medicine and Biosciences"              
##  [44] "Kansas State University"                                         
##  [45] "Michigan State University"                                       
##  [46] "Michigan Technological University"                               
##  [47] "Middle Tennessee State University"                               
##  [48] "North Carolina Agricultural & Technical State University"        
##  [49] "North Carolina Central University"                               
##  [50] "North Carolina State University"                                 
##  [51] "Northeastern Illinois University"                                
##  [52] "Northern Illinois University"                                    
##  [53] "Oklahoma Medical Research Foundation"                            
##  [54] "Oklahoma State University"                                       
##  [55] "Oregon Health and Science University"                            
##  [56] "Oregon Museum of Science and Industry"                           
##  [57] "Oregon State University"                                         
##  [58] "Piedmont Virginia Community College"                             
##  [59] "Purdue Research Foundation"                                      
##  [60] "Purdue University"                                               
##  [61] "Southern Illinois University at Carbondale"                      
##  [62] "Southern Illinois University at Edwardsville"                    
##  [63] "Southern Oregon University"                                      
##  [64] "Tennessee State University"                                      
##  [65] "Tennessee Technological University"                              
##  [66] "Texas A&M International University"                              
##  [67] "Texas A&M Research Foundation"                                   
##  [68] "Texas A&M University-Commerce"                                   
##  [69] "Texas A&M University-Kingsville"                                 
##  [70] "Texas A&M University Corpus Christi"                             
##  [71] "Texas A&M University Main Campus"                                
##  [72] "The Texas A&M University System  HSC Research Foundation"        
##  [73] "The University of Tennessee, Memphis - The Health Science Center"
##  [74] "University of California-Berkeley"                               
##  [75] "University of California-Davis"                                  
##  [76] "University of California-Irvine"                                 
##  [77] "University of California-Los Angeles"                            
##  [78] "University of California-Riverside"                              
##  [79] "University of California-San Diego"                              
##  [80] "University of California-San Diego Scripps Inst of Oceanography" 
##  [81] "University of California-San Francisco"                          
##  [82] "University of California-Santa Barbara"                          
##  [83] "University of California-Santa Cruz"                             
##  [84] "University of California - Merced"                               
##  [85] "University of California, Office of the President, Oakland"      
##  [86] "University of Central Oklahoma"                                  
##  [87] "University of Colorado at Boulder"                               
##  [88] "University of Colorado at Colorado Springs"                      
##  [89] "University of Colorado at Denver"                                
##  [90] "University of Colorado at Denver and Health Sciences Center"     
##  [91] "University of Illinois at Chicago"                               
##  [92] "University of Illinois at Springfield"                           
##  [93] "University of Illinois at Urbana-Champaign"                      
##  [94] "University of Iowa"                                              
##  [95] "University of Kansas Center for Research Inc"                    
##  [96] "University of Kansas Medical Center"                             
##  [97] "University of Mary Washington"                                   
##  [98] "University of Michigan Ann Arbor"                                
##  [99] "University of Missouri-Kansas City"                              
## [100] "University of North Carolina at Asheville"                       
## [101] "University of North Carolina at Chapel Hill"                     
## [102] "University of North Carolina at Charlotte"                       
## [103] "University of North Carolina at Pembroke"                        
## [104] "University of North Carolina at Wilmington"                      
## [105] "University of North Carolina Greensboro"                         
## [106] "University of Northern Colorado"                                 
## [107] "University of Northern Iowa"                                     
## [108] "University of Oklahoma Health Sciences Center"                   
## [109] "University of Oklahoma Norman Campus"                            
## [110] "University of Oregon Eugene"                                     
## [111] "University of Southern California"                               
## [112] "University of Tennessee Chattanooga"                             
## [113] "University of Tennessee Institute of Agriculture"                
## [114] "University of Tennessee Knoxville"                               
## [115] "University of Tennessee Space Institute"                         
## [116] "University of Virginia Main Campus"                              
## [117] "University of Washington"                                        
## [118] "Virginia Commonwealth University"                                
## [119] "Virginia Military Institute Research Laboratories"               
## [120] "Virginia Museum of Natural History Foundation"                   
## [121] "Virginia Polytechnic Institute and State University"             
## [122] "Virginia State University"                                       
## [123] "Washington and Jefferson College"                                
## [124] "Washington and Lee University"                                   
## [125] "Washington State University"                                     
## [126] "Washington University"                                           
## [127] "Washington University School of Medicine"                        
## [128] "West Virginia High Technology Consortium Foundation"             
## [129] "West Virginia State University"                                  
## [130] "West Virginia University Research Corporation"                   
## [131] "Western Illinois University"                                     
## [132] "Western Michigan University"                                     
## [133] "Western Washington University"
data_time_alternative <- data_time_subset %>%
  mutate(Names = str_replace(Organization, ".*Washington State University.*", "WSU")) %>%
  mutate(Names = str_replace(Names, ".*Virginia Polytechnic Institute and State University.*", "VPISU")) %>% 
  mutate(Names = str_replace(Names, ".*University of Tennessee.*", "UT")) %>%
  mutate(Names = str_replace(Names, ".*University of Illinois at Urbana-Champaign.*", "UIUC")) %>%
  mutate(Names = str_replace(Names, ".*University of California-Davis.*", "UCD")) %>%
  mutate(Names = str_replace(Names, ".*Texas A&M University.*", "TAMU")) %>%
  mutate(Names = str_replace(Names, ".*Purdue University.*", "PU")) %>%
  mutate(Names = str_replace(Names, ".*Oregon State University.*", "ORSU")) %>%
  mutate(Names = str_replace(Names, ".*Oklahoma State University.*", "OKSU")) %>%
  mutate(Names = str_replace(Names, ".*North Carolina State University.*", "NCSU")) %>%
  mutate(Names = str_replace(Names, ".*Michigan State University.*", "MSU")) %>%
  mutate(Names = str_replace(Names, ".*Kansas State University.*", "KSU")) %>%
  mutate(Names = str_replace(Names, ".*Iowa State University.*", "ISU")) %>%
  mutate(Names = str_replace(Names, ".*Colorado State University.*", "CSU")) %>%
  filter (Names == "WSU" | Names == "VPISU" | Names == "UT" | Names == "UIUC" | Names == "UCD" | Names == "TAMU" | Names == "PU" | Names == "ORSU" | Names == "OKSU" | Names == "NCSU" | Names == "MSU" | Names == "KSU" | Names == "ISU" | Names == "CSU" ) %>%
  droplevels()
levels(as.factor(data_time_alternative$Names))
##  [1] "CSU"   "ISU"   "KSU"   "MSU"   "NCSU"  "OKSU"  "ORSU"  "PU"    "TAMU" 
## [10] "UCD"   "UIUC"  "UT"    "VPISU" "WSU"

Time Series - Data Summary

data_time_alternative_summary <- data_time_alternative %>% 
  group_by(AwardYear, Names) %>%
  summarise(ttas=sum(AWARD_SUM), ttac=sum(AWARD_COUNT))
## `summarise()` has grouped output by 'AwardYear'. You can override using the
## `.groups` argument.
data_time_alternative_summary
## # A tibble: 168 × 4
## # Groups:   AwardYear [12]
##    AwardYear Names      ttas  ttac
##        <dbl> <chr>     <dbl> <dbl>
##  1      2001 CSU    24584345    52
##  2      2001 ISU    22841170    66
##  3      2001 KSU     7680499    29
##  4      2001 MSU   108207941    78
##  5      2001 NCSU   28185012    72
##  6      2001 OKSU    6109189    19
##  7      2001 ORSU   28522558    58
##  8      2001 PU     23564973    71
##  9      2001 TAMU     547365     4
## 10      2001 UCD    34268776    77
## # ℹ 158 more rows

Time Series - Calculate Award Sum

Create a time-series plot with all three brands in the same graph:

ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names)) + 
  geom_line() +
  geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)), 
                  aes(label = Names),
                  hjust = 1, 
                  nudge_x = 1,
                  direction = "y",
                  box.padding = 0.5,  
                  point.padding = 0.5, 
                  size = 4,
                  max.overlaps = Inf) +
  xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
  ggtitle("Oganizations - Award Sum in years (2001 -2012)  - CSU vs Other Organizations-Time Series")

## Time Series - Calculate Award Count

ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttac, group=Names)) + 
  geom_line() +
  geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)), 
                  aes(label = Names),
                  hjust = 1, 
                  nudge_x = 1,
                  direction = "y",
                  box.padding = 0.5,  
                  point.padding = 0.5, 
                  size = 4,
                  max.overlaps = Inf) +
  xlab("AwardYear") + ylab("Award Count") +
   ggtitle("Oganizations - Award Count in years (2001 - 2012)  - CSU vs Other Organizations-Time Series")

data_time_alternative_summary <- data_time_alternative_summary %>%
  mutate(focus = ifelse(Names=="CSU", "Yes", "No"))

Time Series - Calculate Award Count (CSU VS Other Orgs)

ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttac, group=Names, color=focus)) + 
  geom_line(size=1) +
  geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)), 
                  aes(label = Names),
                  hjust = 1, 
                  nudge_x = 1,
                  direction = "y",
                  box.padding = 0.5,  
                  point.padding = 0.5, 
                  size = 4,
                  max.overlaps = Inf) +
  xlab("AwardYear") + ylab("Award Count") +
 ggtitle("Oganizations - Award Count in years (2001 - 2012)  - CSU vs Other Organizations-Time Series")+
  scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Time Series - Calculate Award Count (CSU VS Other Orgs)

ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names, color=focus)) + 
  geom_line(size=1) +
  geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)), 
                  aes(label = Names),
                  hjust = 1, 
                  nudge_x = 1,
                  direction = "y",
                  box.padding = 0.5,  
                  point.padding = 0.5, 
                  size = 4,
                  max.overlaps = Inf) +
  xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
 ggtitle("Oganizations - Award Sum in years (2001 - 2012)  - CSU vs Other Organizations-Time Series")+
  scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)

Time Series(Small Multiples)

ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names, color=focus)) + 
  geom_line() +
  facet_wrap(~Names) +
  # Add labels for the last available Award Year in each facet
  geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)), 
                  aes(label = paste0("Awards Sum: ", round(ttas/1000000, 1), "M")), 
                  nudge_x = 1,   
                  hjust = 0,       
                  size = 3,       
                  box.padding = 0.5, 
                  point.padding = 0.5) +
  xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
  theme(axis.text.x = element_text(size = 5), 
        legend.position = "none") + 
   ggtitle("Oganizations - Award Sum in years (2001 - 2012)  - CSU vs Other Organizations-Time Series(Small Multiples)")+
  scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)

Appendix

#Question 1 Graph 1
library(tidyverse)
library(scales)
library(ggrepel)
library(readxl)

# Load the data 
data_all <- read_xlsx("NSF_Funding_by_Year.xlsx")
view(data_all)


#Create a subset for visualization using the `filter()` and `select()` functions from the `dplyr` package.

data_subset2 <- data_all %>%
  dplyr::select(Organization, AwardYear, AWARD_SUM, AWARD_COUNT, AvarageAwardDuration) %>%
  filter(AwardYear == "2012") %>%
  filter(str_detect(Organization, "Washington") | str_detect(Organization, "Virginia") | str_detect(Organization, "Tennessee") | str_detect(Organization, "Illinois") | str_detect(Organization, "California") | str_detect(Organization, "Texas A&M") | str_detect(Organization, "Purdue") | str_detect(Organization, "Oregon") | str_detect(Organization, "North Carolina") | str_detect(Organization, "Michigan") | str_detect(Organization, "Kansas") | str_detect(Organization, "Iowa") | str_detect(Organization, "Colorado") | str_detect(Organization, "Oklahoma") ) 

# %>% droplevels()

# View Filtered Dataset
view(data_subset2)
data_subset2$Organization <- as.factor(data_subset2$Organization)
levels(data_subset2$Organization)
data_alternative <- data_subset2 %>%
  mutate(Names = str_replace(Organization, ".*Washington State University.*", "WSU")) %>%
  mutate(Names = str_replace(Names, ".*Virginia Polytechnic Institute and State University.*", "VPISU")) %>% 
  mutate(Names = str_replace(Names, ".*University of Tennessee.*", "UT")) %>%
  mutate(Names = str_replace(Names, ".*University of Illinois at Urbana-Champaign.*", "UIUC")) %>%
  mutate(Names = str_replace(Names, ".*University of California-Davis.*", "UCD")) %>%
  mutate(Names = str_replace(Names, ".*Texas A&M University.*", "TAMU")) %>%
  mutate(Names = str_replace(Names, ".*Purdue University.*", "PU")) %>%
  mutate(Names = str_replace(Names, ".*Oregon State University.*", "ORSU")) %>%
  mutate(Names = str_replace(Names, ".*Oklahoma State University.*", "OKSU")) %>%
  mutate(Names = str_replace(Names, ".*North Carolina State University.*", "NCSU")) %>%
  mutate(Names = str_replace(Names, ".*Michigan State University.*", "MSU")) %>%
  mutate(Names = str_replace(Names, ".*Kansas State University.*", "KSU")) %>%
  mutate(Names = str_replace(Names, ".*Iowa State University.*", "ISU")) %>%
  mutate(Names = str_replace(Names, ".*Colorado State University.*", "CSU")) %>%
  filter (Names == "WSU" | Names == "VPISU" | Names == "UT" | Names == "UIUC" | Names == "UCD" | Names == "TAMU" | Names == "PU" | Names == "ORSU" | Names == "OKSU" | Names == "NCSU" | Names == "MSU" | Names == "KSU" | Names == "ISU" | Names == "CSU" ) %>%
  droplevels()
levels(as.factor(data_alternative$Names))
data_alternative_summary <- data_alternative %>% 
  group_by(Names) %>%
  summarise(as=sum(AWARD_SUM), ac=sum(AWARD_COUNT), ay=sum(AwardYear))
data_alternative_summary
head(data_alternative_summary)
ggplot(data_alternative_summary, aes(y = reorder(Names, ac), x = ac)) +
  geom_col()+
  scale_x_continuous(labels = comma)+
  labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar Plot",
       subtitle = "CSU vs. Other-Orgs",
       x = "AwardCount",
       y = "Organizations") +
   theme(aspect.ratio = 1/2.5)
data_alternative_summary <- data_alternative_summary %>%
  mutate(Highlight = ifelse(Names=="CSU", "Yes", "No"))
data_alternative_summary <- data_alternative_summary %>%
  mutate(Highlight_a = ifelse(Names=="CSU", "CSU", ""))
ggplot(data_alternative_summary, aes(y = reorder(Names, as), x = as / 1000000, fill=Highlight)) +
  geom_col()+
  scale_x_continuous(labels = comma)+
  geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),  
                  size = 3, 
                  hjust = -2, 
                  nudge_y = 0.2, 
                  nudge_x = 0.2,
                  direction = "x",  
                  color = "red",  # Different color for Award Sum
                  #box.padding = 0.5,  
                 # point.padding = 0.5,  
                  max.overlaps = Inf) +
   geom_text_repel(aes(label = paste0("Awards Count: ", ac)),  
                  size = 3,  
                  hjust = -1, 
                  nudge_y = 0.5, 
                  nudge_x = 0.5,
                  direction = "x",  
                  color = "blue",  
                  #box.padding = 2,  
                  #point.padding = 2,  
                  max.overlaps = Inf) +
  labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar plot",
       subtitle = "CSU vs. Other-Orgs",
       x = "Award Sum: (USD) - M$",
       y = "Organizations")+
  scale_fill_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_alternative_summary, aes(y = reorder(Names, as), x = as / 1000000, color=Highlight)) +
  geom_col()+
  scale_x_continuous(labels = comma)+
  geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),  
                  size = 3, 
                  hjust = -2, 
                  nudge_y = 0.2, 
                  nudge_x = 0.2,
                  direction = "x",  
                  color = "red",  # Different color for Award Sum
                  #box.padding = 0.5,  
                 # point.padding = 0.5,  
                  max.overlaps = Inf) +
   geom_text_repel(aes(label = paste0("Awards Count: ", ac)),  
                  size = 3,  
                  hjust = -1, 
                  nudge_y = 0.5, 
                  nudge_x = 0.5,
                  direction = "x",  
                  color = "blue",  
                  #box.padding = 2,  
                  #point.padding = 2,  
                  max.overlaps = Inf) +
  labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar plot",
       subtitle = "CSU vs. Other-Orgs",
       x = "Award Sum: (USD) - M$",
       y = "Organizations")+
  scale_fill_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_alternative_summary, aes(y = as/1000000, x = ac , color = Highlight)) +
  geom_point(alpha=0.5, stroke = 1.5)+
  geom_text_repel(aes(label = Names), hjust=0.5, vjust=0.5)+
  geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),  
                  size = 3, 
                  hjust = 1, 
                  nudge_y = 2,  # Move below Award Count
                  direction = "x",  
                  color = "red",  # Different color for Award Sum
                  box.padding = 0.5,  
                  point.padding = 0.5,  
                  max.overlaps = Inf) +
   geom_text_repel(aes(label = paste0("Awards Count: ", ac)),  
                  size = 3,  
                  hjust = 1, 
                  nudge_y = 5,  
                  direction = "y",  
                  color = "blue",  
                  box.padding = 0.2,  
                  point.padding = 0.2,  
                  max.overlaps = Inf) +
  scale_y_continuous(labels = comma)+
 labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Scatter Plot",
       subtitle = "CSU vs. Other-Orgs",
       x = "AwardCount",
       y = "Award Sum: (USD) - M$")+
  scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_alternative_summary, aes(y = as/1000000, x = ac , size = ac , color = Highlight)) +
  geom_point(alpha=0.5, stroke = 1.5)+
  geom_text_repel(aes(label = Names), size=5)+
  geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),  
                  size = 3, 
                  hjust = 1, 
                  nudge_y = 2,  
                  direction = "x",  
                  color = "red", 
                  box.padding = 0.3,  
                  point.padding = 0.3,  
                  max.overlaps = Inf) +
  geom_text_repel(aes(label = paste0("Awards Count: ", ac)),  
                  size = 3,  
                  hjust = 1, 
                  nudge_y = 5,  
                  direction = "y",  
                  color = "blue",  
                  box.padding = 0.5,  
                  point.padding = 0.5,  
                  max.overlaps = Inf) +
  scale_x_continuous(labels = comma)+
 labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bubble plot",
       subtitle = "CSU vs. Other-Orgs",
       x = "AwardCount",
       y = "Award Sum: (USD) - M$")+
  scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)

#Create a subset for visualization using the `filter()` and `select()` functions from the `dplyr` package.

data_time_subset <- data_all %>%
  dplyr::select(Organization, AwardYear, AWARD_SUM, AWARD_COUNT, AvarageAwardDuration) %>%
  filter(str_detect(Organization, "Washington") | str_detect(Organization, "Virginia") | str_detect(Organization, "Tennessee") | str_detect(Organization, "Illinois") | str_detect(Organization, "California") | str_detect(Organization, "Texas A&M") | str_detect(Organization, "Purdue") | str_detect(Organization, "Oregon") | str_detect(Organization, "North Carolina") | str_detect(Organization, "Michigan") | str_detect(Organization, "Kansas") | str_detect(Organization, "Iowa") | str_detect(Organization, "Colorado") | str_detect(Organization, "Oklahoma") ) 

# %>% droplevels()

# View Filtered Dataset
view(data_time_subset)
data_time_subset$Organization <- as.factor(data_time_subset$Organization)
levels(data_time_subset$Organization)
data_time_alternative <- data_time_subset %>%
  mutate(Names = str_replace(Organization, ".*Washington State University.*", "WSU")) %>%
  mutate(Names = str_replace(Names, ".*Virginia Polytechnic Institute and State University.*", "VPISU")) %>% 
  mutate(Names = str_replace(Names, ".*University of Tennessee.*", "UT")) %>%
  mutate(Names = str_replace(Names, ".*University of Illinois at Urbana-Champaign.*", "UIUC")) %>%
  mutate(Names = str_replace(Names, ".*University of California-Davis.*", "UCD")) %>%
  mutate(Names = str_replace(Names, ".*Texas A&M University.*", "TAMU")) %>%
  mutate(Names = str_replace(Names, ".*Purdue University.*", "PU")) %>%
  mutate(Names = str_replace(Names, ".*Oregon State University.*", "ORSU")) %>%
  mutate(Names = str_replace(Names, ".*Oklahoma State University.*", "OKSU")) %>%
  mutate(Names = str_replace(Names, ".*North Carolina State University.*", "NCSU")) %>%
  mutate(Names = str_replace(Names, ".*Michigan State University.*", "MSU")) %>%
  mutate(Names = str_replace(Names, ".*Kansas State University.*", "KSU")) %>%
  mutate(Names = str_replace(Names, ".*Iowa State University.*", "ISU")) %>%
  mutate(Names = str_replace(Names, ".*Colorado State University.*", "CSU")) %>%
  filter (Names == "WSU" | Names == "VPISU" | Names == "UT" | Names == "UIUC" | Names == "UCD" | Names == "TAMU" | Names == "PU" | Names == "ORSU" | Names == "OKSU" | Names == "NCSU" | Names == "MSU" | Names == "KSU" | Names == "ISU" | Names == "CSU" ) %>%
  droplevels()
levels(as.factor(data_time_alternative$Names))
data_time_alternative_summary <- data_time_alternative %>% 
  group_by(AwardYear, Names) %>%
  summarise(ttas=sum(AWARD_SUM), ttac=sum(AWARD_COUNT))
data_time_alternative_summary
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names)) + 
  geom_line() +
  geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)), 
                  aes(label = Names),
                  hjust = 1, 
                  nudge_x = 1,
                  direction = "y",
                  box.padding = 0.5,  
                  point.padding = 0.5, 
                  size = 4,
                  max.overlaps = Inf) +
  xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
  ggtitle("Oganizations - Award Sum in years (2001 -2012)  - CSU vs Other Organizations-Time Series")
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttac, group=Names)) + 
  geom_line() +
  geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)), 
                  aes(label = Names),
                  hjust = 1, 
                  nudge_x = 1,
                  direction = "y",
                  box.padding = 0.5,  
                  point.padding = 0.5, 
                  size = 4,
                  max.overlaps = Inf) +
  xlab("AwardYear") + ylab("Award Count") +
   ggtitle("Oganizations - Award Count in years (2001 - 2012)  - CSU vs Other Organizations-Time Series")
data_time_alternative_summary <- data_time_alternative_summary %>%
  mutate(focus = ifelse(Names=="CSU", "Yes", "No"))
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttac, group=Names, color=focus)) + 
  geom_line(size=1) +
  geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)), 
                  aes(label = Names),
                  hjust = 1, 
                  nudge_x = 1,
                  direction = "y",
                  box.padding = 0.5,  
                  point.padding = 0.5, 
                  size = 4,
                  max.overlaps = Inf) +
  xlab("AwardYear") + ylab("Award Count") +
 ggtitle("Oganizations - Award Count in years (2001 - 2012)  - CSU vs Other Organizations-Time Series")+
  scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names, color=focus)) + 
  geom_line(size=1) +
  geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)), 
                  aes(label = Names),
                  hjust = 1, 
                  nudge_x = 1,
                  direction = "y",
                  box.padding = 0.5,  
                  point.padding = 0.5, 
                  size = 4,
                  max.overlaps = Inf) +
  xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
 ggtitle("Oganizations - Award Sum in years (2001 - 2012)  - CSU vs Other Organizations-Time Series")+
  scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names, color=focus)) + 
  geom_line() +
  facet_wrap(~Names) +
  # Add labels for the last available Award Year in each facet
  geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)), 
                  aes(label = paste0("Awards Sum: ", round(ttas/1000000, 1), "M")), 
                  nudge_x = 1,   
                  hjust = 0,       
                  size = 3,       
                  box.padding = 0.5, 
                  point.padding = 0.5) +
  xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
  theme(axis.text.x = element_text(size = 5), 
        legend.position = "none") + 
   ggtitle("Oganizations - Award Sum in years (2001 - 2012)  - CSU vs Other Organizations-Time Series(Small Multiples)")+
  scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)