Author: Suriya Manimaran - 836854117
#Question 1 Graph 1
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
library(scales)
library(ggrepel)
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
# Load the data
data_all <- read_xlsx("NSF_Funding_by_Year.xlsx")
view(data_all)
#Create a subset for visualization using the `filter()` and `select()` functions from the `dplyr` package.
data_subset2 <- data_all %>%
dplyr::select(Organization, AwardYear, AWARD_SUM, AWARD_COUNT, AvarageAwardDuration) %>%
filter(AwardYear == "2012") %>%
filter(str_detect(Organization, "Washington") | str_detect(Organization, "Virginia") | str_detect(Organization, "Tennessee") | str_detect(Organization, "Illinois") | str_detect(Organization, "California") | str_detect(Organization, "Texas A&M") | str_detect(Organization, "Purdue") | str_detect(Organization, "Oregon") | str_detect(Organization, "North Carolina") | str_detect(Organization, "Michigan") | str_detect(Organization, "Kansas") | str_detect(Organization, "Iowa") | str_detect(Organization, "Colorado") | str_detect(Organization, "Oklahoma") )
# %>% droplevels()
# View Filtered Dataset
view(data_subset2)
data_subset2$Organization <- as.factor(data_subset2$Organization)
levels(data_subset2$Organization)
## [1] "California Academy of Sciences"
## [2] "California Institute of Technology"
## [3] "California Polytechnic State University Foundation"
## [4] "California State L A University Auxiliary Services Inc."
## [5] "California State University-Bakersfield"
## [6] "California State University-Dominguez Hills"
## [7] "California State University-Fresno Foundation"
## [8] "California State University-Fullerton Foundation"
## [9] "California State University-Long Beach Foundation"
## [10] "California State University-San Bernardino Foundation"
## [11] "California State University-Stanislaus"
## [12] "California State University, Chico Research Fdtn"
## [13] "Carnegie Institution of Washington"
## [14] "Central Michigan University"
## [15] "Central Washington University"
## [16] "College of William & Mary Virginia Institute of Marine Science"
## [17] "Colorado School of Mines"
## [18] "Colorado State University"
## [19] "Eastern Illinois University"
## [20] "Eastern Michigan University"
## [21] "Friends of the North Carolina State Museum of Natural Sciences"
## [22] "George Washington University"
## [23] "Illinois Institute of Technology"
## [24] "Illinois State University"
## [25] "Illinois Wesleyan University"
## [26] "Iowa State University"
## [27] "Kansas State University"
## [28] "Michigan State University"
## [29] "Michigan Technological University"
## [30] "Middle Tennessee State University"
## [31] "North Carolina Agricultural & Technical State University"
## [32] "North Carolina Central University"
## [33] "North Carolina State University"
## [34] "Northeastern Illinois University"
## [35] "Northern Illinois University"
## [36] "Oklahoma State University"
## [37] "Oregon Health and Science University"
## [38] "Oregon Museum of Science and Industry"
## [39] "Oregon State University"
## [40] "Purdue University"
## [41] "Southern Illinois University at Carbondale"
## [42] "Southern Illinois University at Edwardsville"
## [43] "Tennessee State University"
## [44] "Tennessee Technological University"
## [45] "Texas A&M International University"
## [46] "Texas A&M Research Foundation"
## [47] "Texas A&M University-Commerce"
## [48] "Texas A&M University Corpus Christi"
## [49] "Texas A&M University Main Campus"
## [50] "University of California-Berkeley"
## [51] "University of California-Davis"
## [52] "University of California-Irvine"
## [53] "University of California-Los Angeles"
## [54] "University of California-Riverside"
## [55] "University of California-San Diego"
## [56] "University of California-San Diego Scripps Inst of Oceanography"
## [57] "University of California-San Francisco"
## [58] "University of California-Santa Barbara"
## [59] "University of California-Santa Cruz"
## [60] "University of California - Merced"
## [61] "University of Colorado at Boulder"
## [62] "University of Colorado at Colorado Springs"
## [63] "University of Colorado at Denver"
## [64] "University of Colorado at Denver and Health Sciences Center"
## [65] "University of Illinois at Chicago"
## [66] "University of Illinois at Urbana-Champaign"
## [67] "University of Iowa"
## [68] "University of Kansas Center for Research Inc"
## [69] "University of Michigan Ann Arbor"
## [70] "University of Missouri-Kansas City"
## [71] "University of North Carolina at Chapel Hill"
## [72] "University of North Carolina at Charlotte"
## [73] "University of North Carolina at Wilmington"
## [74] "University of North Carolina Greensboro"
## [75] "University of Northern Colorado"
## [76] "University of Oklahoma Norman Campus"
## [77] "University of Oregon Eugene"
## [78] "University of Southern California"
## [79] "University of Tennessee Chattanooga"
## [80] "University of Tennessee Knoxville"
## [81] "University of Virginia Main Campus"
## [82] "University of Washington"
## [83] "Virginia Commonwealth University"
## [84] "Virginia Polytechnic Institute and State University"
## [85] "Virginia State University"
## [86] "Washington State University"
## [87] "Washington University"
## [88] "West Virginia University Research Corporation"
## [89] "Western Michigan University"
## [90] "Western Washington University"
data_alternative <- data_subset2 %>%
mutate(Names = str_replace(Organization, ".*Washington State University.*", "WSU")) %>%
mutate(Names = str_replace(Names, ".*Virginia Polytechnic Institute and State University.*", "VPISU")) %>%
mutate(Names = str_replace(Names, ".*University of Tennessee.*", "UT")) %>%
mutate(Names = str_replace(Names, ".*University of Illinois at Urbana-Champaign.*", "UIUC")) %>%
mutate(Names = str_replace(Names, ".*University of California-Davis.*", "UCD")) %>%
mutate(Names = str_replace(Names, ".*Texas A&M University.*", "TAMU")) %>%
mutate(Names = str_replace(Names, ".*Purdue University.*", "PU")) %>%
mutate(Names = str_replace(Names, ".*Oregon State University.*", "ORSU")) %>%
mutate(Names = str_replace(Names, ".*Oklahoma State University.*", "OKSU")) %>%
mutate(Names = str_replace(Names, ".*North Carolina State University.*", "NCSU")) %>%
mutate(Names = str_replace(Names, ".*Michigan State University.*", "MSU")) %>%
mutate(Names = str_replace(Names, ".*Kansas State University.*", "KSU")) %>%
mutate(Names = str_replace(Names, ".*Iowa State University.*", "ISU")) %>%
mutate(Names = str_replace(Names, ".*Colorado State University.*", "CSU")) %>%
filter (Names == "WSU" | Names == "VPISU" | Names == "UT" | Names == "UIUC" | Names == "UCD" | Names == "TAMU" | Names == "PU" | Names == "ORSU" | Names == "OKSU" | Names == "NCSU" | Names == "MSU" | Names == "KSU" | Names == "ISU" | Names == "CSU" ) %>%
droplevels()
levels(as.factor(data_alternative$Names))
## [1] "CSU" "ISU" "KSU" "MSU" "NCSU" "OKSU" "ORSU" "PU" "TAMU"
## [10] "UCD" "UIUC" "UT" "VPISU" "WSU"
data_alternative_summary <- data_alternative %>%
group_by(Names) %>%
summarise(as=sum(AWARD_SUM), ac=sum(AWARD_COUNT), ay=sum(AwardYear))
data_alternative_summary
## # A tibble: 14 × 4
## Names as ac ay
## <chr> <dbl> <dbl> <dbl>
## 1 CSU 11592315 41 2012
## 2 ISU 16738011 65 2012
## 3 KSU 8839116 23 2012
## 4 MSU 17351663 58 2012
## 5 NCSU 31440526 91 2012
## 6 OKSU 5331516 21 2012
## 7 ORSU 23180107 86 2012
## 8 PU 33991966 121 2012
## 9 TAMU 15471170 59 6036
## 10 UCD 24867252 87 2012
## 11 UIUC 43536459 153 2012
## 12 UT 13646485 51 4024
## 13 VPISU 19609620 78 2012
## 14 WSU 14509261 43 2012
head(data_alternative_summary)
## # A tibble: 6 × 4
## Names as ac ay
## <chr> <dbl> <dbl> <dbl>
## 1 CSU 11592315 41 2012
## 2 ISU 16738011 65 2012
## 3 KSU 8839116 23 2012
## 4 MSU 17351663 58 2012
## 5 NCSU 31440526 91 2012
## 6 OKSU 5331516 21 2012
ggplot(data_alternative_summary, aes(y = reorder(Names, ac), x = ac)) +
geom_col()+
scale_x_continuous(labels = comma)+
labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar Plot",
subtitle = "CSU vs. Other-Orgs",
x = "AwardCount",
y = "Organizations") +
theme(aspect.ratio = 1/2.5)
data_alternative_summary <- data_alternative_summary %>%
mutate(Highlight = ifelse(Names=="CSU", "Yes", "No"))
data_alternative_summary <- data_alternative_summary %>%
mutate(Highlight_a = ifelse(Names=="CSU", "CSU", ""))
ggplot(data_alternative_summary, aes(y = reorder(Names, as), x = as / 1000000, fill=Highlight)) +
geom_col()+
scale_x_continuous(labels = comma)+
geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),
size = 3,
hjust = -2,
nudge_y = 0.2,
nudge_x = 0.2,
direction = "x",
color = "red", # Different color for Award Sum
#box.padding = 0.5,
# point.padding = 0.5,
max.overlaps = Inf) +
geom_text_repel(aes(label = paste0("Awards Count: ", ac)),
size = 3,
hjust = -1,
nudge_y = 0.5,
nudge_x = 0.5,
direction = "x",
color = "blue",
#box.padding = 2,
#point.padding = 2,
max.overlaps = Inf) +
labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar plot",
subtitle = "CSU vs. Other-Orgs",
x = "Award Sum: (USD) - M$",
y = "Organizations")+
scale_fill_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
## Compare the effects of
color=Highlight and
fill=Highlight: ## Bar Plot - (CSU VS Other Orgs)
ggplot(data_alternative_summary, aes(y = reorder(Names, as), x = as / 1000000, color=Highlight)) +
geom_col()+
scale_x_continuous(labels = comma)+
geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),
size = 3,
hjust = -2,
nudge_y = 0.2,
nudge_x = 0.2,
direction = "x",
color = "red", # Different color for Award Sum
#box.padding = 0.5,
# point.padding = 0.5,
max.overlaps = Inf) +
geom_text_repel(aes(label = paste0("Awards Count: ", ac)),
size = 3,
hjust = -1,
nudge_y = 0.5,
nudge_x = 0.5,
direction = "x",
color = "blue",
#box.padding = 2,
#point.padding = 2,
max.overlaps = Inf) +
labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar plot",
subtitle = "CSU vs. Other-Orgs",
x = "Award Sum: (USD) - M$",
y = "Organizations")+
scale_fill_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_alternative_summary, aes(y = as/1000000, x = ac , color = Highlight)) +
geom_point(alpha=0.5, stroke = 1.5)+
geom_text_repel(aes(label = Names), hjust=0.5, vjust=0.5)+
geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),
size = 3,
hjust = 1,
nudge_y = 2, # Move below Award Count
direction = "x",
color = "red", # Different color for Award Sum
box.padding = 0.5,
point.padding = 0.5,
max.overlaps = Inf) +
geom_text_repel(aes(label = paste0("Awards Count: ", ac)),
size = 3,
hjust = 1,
nudge_y = 5,
direction = "y",
color = "blue",
box.padding = 0.2,
point.padding = 0.2,
max.overlaps = Inf) +
scale_y_continuous(labels = comma)+
labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Scatter Plot",
subtitle = "CSU vs. Other-Orgs",
x = "AwardCount",
y = "Award Sum: (USD) - M$")+
scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_alternative_summary, aes(y = as/1000000, x = ac , size = ac , color = Highlight)) +
geom_point(alpha=0.5, stroke = 1.5)+
geom_text_repel(aes(label = Names), size=5)+
geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),
size = 3,
hjust = 1,
nudge_y = 2,
direction = "x",
color = "red",
box.padding = 0.3,
point.padding = 0.3,
max.overlaps = Inf) +
geom_text_repel(aes(label = paste0("Awards Count: ", ac)),
size = 3,
hjust = 1,
nudge_y = 5,
direction = "y",
color = "blue",
box.padding = 0.5,
point.padding = 0.5,
max.overlaps = Inf) +
scale_x_continuous(labels = comma)+
labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bubble plot",
subtitle = "CSU vs. Other-Orgs",
x = "AwardCount",
y = "Award Sum: (USD) - M$")+
scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
## Question 2: Time series
#Create a subset for visualization using the `filter()` and `select()` functions from the `dplyr` package.
data_time_subset <- data_all %>%
dplyr::select(Organization, AwardYear, AWARD_SUM, AWARD_COUNT, AvarageAwardDuration) %>%
filter(str_detect(Organization, "Washington") | str_detect(Organization, "Virginia") | str_detect(Organization, "Tennessee") | str_detect(Organization, "Illinois") | str_detect(Organization, "California") | str_detect(Organization, "Texas A&M") | str_detect(Organization, "Purdue") | str_detect(Organization, "Oregon") | str_detect(Organization, "North Carolina") | str_detect(Organization, "Michigan") | str_detect(Organization, "Kansas") | str_detect(Organization, "Iowa") | str_detect(Organization, "Colorado") | str_detect(Organization, "Oklahoma") )
# %>% droplevels()
# View Filtered Dataset
view(data_time_subset)
data_time_subset$Organization <- as.factor(data_time_subset$Organization)
levels(data_time_subset$Organization)
## [1] "Benaroya Research Institute at Virginia Mason"
## [2] "California Academy of Sciences"
## [3] "California Institute of Technology"
## [4] "California Lutheran University"
## [5] "California Polytechnic State University Foundation"
## [6] "California State L A University Auxiliary Services Inc."
## [7] "California State University-Bakersfield"
## [8] "California State University-Dominguez Hills"
## [9] "California State University-Fresno Foundation"
## [10] "California State University-Fullerton Foundation"
## [11] "California State University-Long Beach"
## [12] "California State University-Long Beach Foundation"
## [13] "California State University-Los Angeles"
## [14] "California State University-San Bernardino Foundation"
## [15] "California State University-Stanislaus"
## [16] "California State University Bakersfield Foundation"
## [17] "California State University Monterey Bay Foundation"
## [18] "California State University, Chico Research Fdtn"
## [19] "California State University, East Bay Foundation, Inc."
## [20] "California State University, Trustees"
## [21] "Carnegie Institution of Washington"
## [22] "Central Michigan University"
## [23] "Central Virginia Community College"
## [24] "Central Washington University"
## [25] "City Colleges of Chicago Harold Washington College"
## [26] "College of William & Mary Virginia Institute of Marine Science"
## [27] "Colorado College"
## [28] "Colorado School of Mines"
## [29] "Colorado State University"
## [30] "Colorado State University-Pueblo"
## [31] "East Tennessee State University"
## [32] "Eastern Illinois University"
## [33] "Eastern Iowa Community College"
## [34] "Eastern Michigan University"
## [35] "Eastern Washington University"
## [36] "Friends of the North Carolina State Museum of Natural Sciences"
## [37] "George Washington University"
## [38] "Illinois Institute of Technology"
## [39] "Illinois State Museum Society"
## [40] "Illinois State University"
## [41] "Illinois Wesleyan University"
## [42] "Iowa State University"
## [43] "Kansas City University of Medicine and Biosciences"
## [44] "Kansas State University"
## [45] "Michigan State University"
## [46] "Michigan Technological University"
## [47] "Middle Tennessee State University"
## [48] "North Carolina Agricultural & Technical State University"
## [49] "North Carolina Central University"
## [50] "North Carolina State University"
## [51] "Northeastern Illinois University"
## [52] "Northern Illinois University"
## [53] "Oklahoma Medical Research Foundation"
## [54] "Oklahoma State University"
## [55] "Oregon Health and Science University"
## [56] "Oregon Museum of Science and Industry"
## [57] "Oregon State University"
## [58] "Piedmont Virginia Community College"
## [59] "Purdue Research Foundation"
## [60] "Purdue University"
## [61] "Southern Illinois University at Carbondale"
## [62] "Southern Illinois University at Edwardsville"
## [63] "Southern Oregon University"
## [64] "Tennessee State University"
## [65] "Tennessee Technological University"
## [66] "Texas A&M International University"
## [67] "Texas A&M Research Foundation"
## [68] "Texas A&M University-Commerce"
## [69] "Texas A&M University-Kingsville"
## [70] "Texas A&M University Corpus Christi"
## [71] "Texas A&M University Main Campus"
## [72] "The Texas A&M University System HSC Research Foundation"
## [73] "The University of Tennessee, Memphis - The Health Science Center"
## [74] "University of California-Berkeley"
## [75] "University of California-Davis"
## [76] "University of California-Irvine"
## [77] "University of California-Los Angeles"
## [78] "University of California-Riverside"
## [79] "University of California-San Diego"
## [80] "University of California-San Diego Scripps Inst of Oceanography"
## [81] "University of California-San Francisco"
## [82] "University of California-Santa Barbara"
## [83] "University of California-Santa Cruz"
## [84] "University of California - Merced"
## [85] "University of California, Office of the President, Oakland"
## [86] "University of Central Oklahoma"
## [87] "University of Colorado at Boulder"
## [88] "University of Colorado at Colorado Springs"
## [89] "University of Colorado at Denver"
## [90] "University of Colorado at Denver and Health Sciences Center"
## [91] "University of Illinois at Chicago"
## [92] "University of Illinois at Springfield"
## [93] "University of Illinois at Urbana-Champaign"
## [94] "University of Iowa"
## [95] "University of Kansas Center for Research Inc"
## [96] "University of Kansas Medical Center"
## [97] "University of Mary Washington"
## [98] "University of Michigan Ann Arbor"
## [99] "University of Missouri-Kansas City"
## [100] "University of North Carolina at Asheville"
## [101] "University of North Carolina at Chapel Hill"
## [102] "University of North Carolina at Charlotte"
## [103] "University of North Carolina at Pembroke"
## [104] "University of North Carolina at Wilmington"
## [105] "University of North Carolina Greensboro"
## [106] "University of Northern Colorado"
## [107] "University of Northern Iowa"
## [108] "University of Oklahoma Health Sciences Center"
## [109] "University of Oklahoma Norman Campus"
## [110] "University of Oregon Eugene"
## [111] "University of Southern California"
## [112] "University of Tennessee Chattanooga"
## [113] "University of Tennessee Institute of Agriculture"
## [114] "University of Tennessee Knoxville"
## [115] "University of Tennessee Space Institute"
## [116] "University of Virginia Main Campus"
## [117] "University of Washington"
## [118] "Virginia Commonwealth University"
## [119] "Virginia Military Institute Research Laboratories"
## [120] "Virginia Museum of Natural History Foundation"
## [121] "Virginia Polytechnic Institute and State University"
## [122] "Virginia State University"
## [123] "Washington and Jefferson College"
## [124] "Washington and Lee University"
## [125] "Washington State University"
## [126] "Washington University"
## [127] "Washington University School of Medicine"
## [128] "West Virginia High Technology Consortium Foundation"
## [129] "West Virginia State University"
## [130] "West Virginia University Research Corporation"
## [131] "Western Illinois University"
## [132] "Western Michigan University"
## [133] "Western Washington University"
data_time_alternative <- data_time_subset %>%
mutate(Names = str_replace(Organization, ".*Washington State University.*", "WSU")) %>%
mutate(Names = str_replace(Names, ".*Virginia Polytechnic Institute and State University.*", "VPISU")) %>%
mutate(Names = str_replace(Names, ".*University of Tennessee.*", "UT")) %>%
mutate(Names = str_replace(Names, ".*University of Illinois at Urbana-Champaign.*", "UIUC")) %>%
mutate(Names = str_replace(Names, ".*University of California-Davis.*", "UCD")) %>%
mutate(Names = str_replace(Names, ".*Texas A&M University.*", "TAMU")) %>%
mutate(Names = str_replace(Names, ".*Purdue University.*", "PU")) %>%
mutate(Names = str_replace(Names, ".*Oregon State University.*", "ORSU")) %>%
mutate(Names = str_replace(Names, ".*Oklahoma State University.*", "OKSU")) %>%
mutate(Names = str_replace(Names, ".*North Carolina State University.*", "NCSU")) %>%
mutate(Names = str_replace(Names, ".*Michigan State University.*", "MSU")) %>%
mutate(Names = str_replace(Names, ".*Kansas State University.*", "KSU")) %>%
mutate(Names = str_replace(Names, ".*Iowa State University.*", "ISU")) %>%
mutate(Names = str_replace(Names, ".*Colorado State University.*", "CSU")) %>%
filter (Names == "WSU" | Names == "VPISU" | Names == "UT" | Names == "UIUC" | Names == "UCD" | Names == "TAMU" | Names == "PU" | Names == "ORSU" | Names == "OKSU" | Names == "NCSU" | Names == "MSU" | Names == "KSU" | Names == "ISU" | Names == "CSU" ) %>%
droplevels()
levels(as.factor(data_time_alternative$Names))
## [1] "CSU" "ISU" "KSU" "MSU" "NCSU" "OKSU" "ORSU" "PU" "TAMU"
## [10] "UCD" "UIUC" "UT" "VPISU" "WSU"
data_time_alternative_summary <- data_time_alternative %>%
group_by(AwardYear, Names) %>%
summarise(ttas=sum(AWARD_SUM), ttac=sum(AWARD_COUNT))
## `summarise()` has grouped output by 'AwardYear'. You can override using the
## `.groups` argument.
data_time_alternative_summary
## # A tibble: 168 × 4
## # Groups: AwardYear [12]
## AwardYear Names ttas ttac
## <dbl> <chr> <dbl> <dbl>
## 1 2001 CSU 24584345 52
## 2 2001 ISU 22841170 66
## 3 2001 KSU 7680499 29
## 4 2001 MSU 108207941 78
## 5 2001 NCSU 28185012 72
## 6 2001 OKSU 6109189 19
## 7 2001 ORSU 28522558 58
## 8 2001 PU 23564973 71
## 9 2001 TAMU 547365 4
## 10 2001 UCD 34268776 77
## # ℹ 158 more rows
Create a time-series plot with all three brands in the same graph:
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names)) +
geom_line() +
geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)),
aes(label = Names),
hjust = 1,
nudge_x = 1,
direction = "y",
box.padding = 0.5,
point.padding = 0.5,
size = 4,
max.overlaps = Inf) +
xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
ggtitle("Oganizations - Award Sum in years (2001 -2012) - CSU vs Other Organizations-Time Series")
## Time Series - Calculate Award Count
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttac, group=Names)) +
geom_line() +
geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)),
aes(label = Names),
hjust = 1,
nudge_x = 1,
direction = "y",
box.padding = 0.5,
point.padding = 0.5,
size = 4,
max.overlaps = Inf) +
xlab("AwardYear") + ylab("Award Count") +
ggtitle("Oganizations - Award Count in years (2001 - 2012) - CSU vs Other Organizations-Time Series")
data_time_alternative_summary <- data_time_alternative_summary %>%
mutate(focus = ifelse(Names=="CSU", "Yes", "No"))
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttac, group=Names, color=focus)) +
geom_line(size=1) +
geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)),
aes(label = Names),
hjust = 1,
nudge_x = 1,
direction = "y",
box.padding = 0.5,
point.padding = 0.5,
size = 4,
max.overlaps = Inf) +
xlab("AwardYear") + ylab("Award Count") +
ggtitle("Oganizations - Award Count in years (2001 - 2012) - CSU vs Other Organizations-Time Series")+
scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Time Series - Calculate Award Count (CSU VS Other Orgs)
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names, color=focus)) +
geom_line(size=1) +
geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)),
aes(label = Names),
hjust = 1,
nudge_x = 1,
direction = "y",
box.padding = 0.5,
point.padding = 0.5,
size = 4,
max.overlaps = Inf) +
xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
ggtitle("Oganizations - Award Sum in years (2001 - 2012) - CSU vs Other Organizations-Time Series")+
scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names, color=focus)) +
geom_line() +
facet_wrap(~Names) +
# Add labels for the last available Award Year in each facet
geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)),
aes(label = paste0("Awards Sum: ", round(ttas/1000000, 1), "M")),
nudge_x = 1,
hjust = 0,
size = 3,
box.padding = 0.5,
point.padding = 0.5) +
xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
theme(axis.text.x = element_text(size = 5),
legend.position = "none") +
ggtitle("Oganizations - Award Sum in years (2001 - 2012) - CSU vs Other Organizations-Time Series(Small Multiples)")+
scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
#Question 1 Graph 1
library(tidyverse)
library(scales)
library(ggrepel)
library(readxl)
# Load the data
data_all <- read_xlsx("NSF_Funding_by_Year.xlsx")
view(data_all)
#Create a subset for visualization using the `filter()` and `select()` functions from the `dplyr` package.
data_subset2 <- data_all %>%
dplyr::select(Organization, AwardYear, AWARD_SUM, AWARD_COUNT, AvarageAwardDuration) %>%
filter(AwardYear == "2012") %>%
filter(str_detect(Organization, "Washington") | str_detect(Organization, "Virginia") | str_detect(Organization, "Tennessee") | str_detect(Organization, "Illinois") | str_detect(Organization, "California") | str_detect(Organization, "Texas A&M") | str_detect(Organization, "Purdue") | str_detect(Organization, "Oregon") | str_detect(Organization, "North Carolina") | str_detect(Organization, "Michigan") | str_detect(Organization, "Kansas") | str_detect(Organization, "Iowa") | str_detect(Organization, "Colorado") | str_detect(Organization, "Oklahoma") )
# %>% droplevels()
# View Filtered Dataset
view(data_subset2)
data_subset2$Organization <- as.factor(data_subset2$Organization)
levels(data_subset2$Organization)
data_alternative <- data_subset2 %>%
mutate(Names = str_replace(Organization, ".*Washington State University.*", "WSU")) %>%
mutate(Names = str_replace(Names, ".*Virginia Polytechnic Institute and State University.*", "VPISU")) %>%
mutate(Names = str_replace(Names, ".*University of Tennessee.*", "UT")) %>%
mutate(Names = str_replace(Names, ".*University of Illinois at Urbana-Champaign.*", "UIUC")) %>%
mutate(Names = str_replace(Names, ".*University of California-Davis.*", "UCD")) %>%
mutate(Names = str_replace(Names, ".*Texas A&M University.*", "TAMU")) %>%
mutate(Names = str_replace(Names, ".*Purdue University.*", "PU")) %>%
mutate(Names = str_replace(Names, ".*Oregon State University.*", "ORSU")) %>%
mutate(Names = str_replace(Names, ".*Oklahoma State University.*", "OKSU")) %>%
mutate(Names = str_replace(Names, ".*North Carolina State University.*", "NCSU")) %>%
mutate(Names = str_replace(Names, ".*Michigan State University.*", "MSU")) %>%
mutate(Names = str_replace(Names, ".*Kansas State University.*", "KSU")) %>%
mutate(Names = str_replace(Names, ".*Iowa State University.*", "ISU")) %>%
mutate(Names = str_replace(Names, ".*Colorado State University.*", "CSU")) %>%
filter (Names == "WSU" | Names == "VPISU" | Names == "UT" | Names == "UIUC" | Names == "UCD" | Names == "TAMU" | Names == "PU" | Names == "ORSU" | Names == "OKSU" | Names == "NCSU" | Names == "MSU" | Names == "KSU" | Names == "ISU" | Names == "CSU" ) %>%
droplevels()
levels(as.factor(data_alternative$Names))
data_alternative_summary <- data_alternative %>%
group_by(Names) %>%
summarise(as=sum(AWARD_SUM), ac=sum(AWARD_COUNT), ay=sum(AwardYear))
data_alternative_summary
head(data_alternative_summary)
ggplot(data_alternative_summary, aes(y = reorder(Names, ac), x = ac)) +
geom_col()+
scale_x_continuous(labels = comma)+
labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar Plot",
subtitle = "CSU vs. Other-Orgs",
x = "AwardCount",
y = "Organizations") +
theme(aspect.ratio = 1/2.5)
data_alternative_summary <- data_alternative_summary %>%
mutate(Highlight = ifelse(Names=="CSU", "Yes", "No"))
data_alternative_summary <- data_alternative_summary %>%
mutate(Highlight_a = ifelse(Names=="CSU", "CSU", ""))
ggplot(data_alternative_summary, aes(y = reorder(Names, as), x = as / 1000000, fill=Highlight)) +
geom_col()+
scale_x_continuous(labels = comma)+
geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),
size = 3,
hjust = -2,
nudge_y = 0.2,
nudge_x = 0.2,
direction = "x",
color = "red", # Different color for Award Sum
#box.padding = 0.5,
# point.padding = 0.5,
max.overlaps = Inf) +
geom_text_repel(aes(label = paste0("Awards Count: ", ac)),
size = 3,
hjust = -1,
nudge_y = 0.5,
nudge_x = 0.5,
direction = "x",
color = "blue",
#box.padding = 2,
#point.padding = 2,
max.overlaps = Inf) +
labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar plot",
subtitle = "CSU vs. Other-Orgs",
x = "Award Sum: (USD) - M$",
y = "Organizations")+
scale_fill_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_alternative_summary, aes(y = reorder(Names, as), x = as / 1000000, color=Highlight)) +
geom_col()+
scale_x_continuous(labels = comma)+
geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),
size = 3,
hjust = -2,
nudge_y = 0.2,
nudge_x = 0.2,
direction = "x",
color = "red", # Different color for Award Sum
#box.padding = 0.5,
# point.padding = 0.5,
max.overlaps = Inf) +
geom_text_repel(aes(label = paste0("Awards Count: ", ac)),
size = 3,
hjust = -1,
nudge_y = 0.5,
nudge_x = 0.5,
direction = "x",
color = "blue",
#box.padding = 2,
#point.padding = 2,
max.overlaps = Inf) +
labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bar plot",
subtitle = "CSU vs. Other-Orgs",
x = "Award Sum: (USD) - M$",
y = "Organizations")+
scale_fill_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_alternative_summary, aes(y = as/1000000, x = ac , color = Highlight)) +
geom_point(alpha=0.5, stroke = 1.5)+
geom_text_repel(aes(label = Names), hjust=0.5, vjust=0.5)+
geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),
size = 3,
hjust = 1,
nudge_y = 2, # Move below Award Count
direction = "x",
color = "red", # Different color for Award Sum
box.padding = 0.5,
point.padding = 0.5,
max.overlaps = Inf) +
geom_text_repel(aes(label = paste0("Awards Count: ", ac)),
size = 3,
hjust = 1,
nudge_y = 5,
direction = "y",
color = "blue",
box.padding = 0.2,
point.padding = 0.2,
max.overlaps = Inf) +
scale_y_continuous(labels = comma)+
labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Scatter Plot",
subtitle = "CSU vs. Other-Orgs",
x = "AwardCount",
y = "Award Sum: (USD) - M$")+
scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_alternative_summary, aes(y = as/1000000, x = ac , size = ac , color = Highlight)) +
geom_point(alpha=0.5, stroke = 1.5)+
geom_text_repel(aes(label = Names), size=5)+
geom_text_repel(aes(label = paste0("Award Sum: $", round(as / 1000000, 1), "M")),
size = 3,
hjust = 1,
nudge_y = 2,
direction = "x",
color = "red",
box.padding = 0.3,
point.padding = 0.3,
max.overlaps = Inf) +
geom_text_repel(aes(label = paste0("Awards Count: ", ac)),
size = 3,
hjust = 1,
nudge_y = 5,
direction = "y",
color = "blue",
box.padding = 0.5,
point.padding = 0.5,
max.overlaps = Inf) +
scale_x_continuous(labels = comma)+
labs(title = "Organizations Award Counts and Award Sum (Year 2012) - CSU vs Other Organizations - Bubble plot",
subtitle = "CSU vs. Other-Orgs",
x = "AwardCount",
y = "Award Sum: (USD) - M$")+
scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
#Create a subset for visualization using the `filter()` and `select()` functions from the `dplyr` package.
data_time_subset <- data_all %>%
dplyr::select(Organization, AwardYear, AWARD_SUM, AWARD_COUNT, AvarageAwardDuration) %>%
filter(str_detect(Organization, "Washington") | str_detect(Organization, "Virginia") | str_detect(Organization, "Tennessee") | str_detect(Organization, "Illinois") | str_detect(Organization, "California") | str_detect(Organization, "Texas A&M") | str_detect(Organization, "Purdue") | str_detect(Organization, "Oregon") | str_detect(Organization, "North Carolina") | str_detect(Organization, "Michigan") | str_detect(Organization, "Kansas") | str_detect(Organization, "Iowa") | str_detect(Organization, "Colorado") | str_detect(Organization, "Oklahoma") )
# %>% droplevels()
# View Filtered Dataset
view(data_time_subset)
data_time_subset$Organization <- as.factor(data_time_subset$Organization)
levels(data_time_subset$Organization)
data_time_alternative <- data_time_subset %>%
mutate(Names = str_replace(Organization, ".*Washington State University.*", "WSU")) %>%
mutate(Names = str_replace(Names, ".*Virginia Polytechnic Institute and State University.*", "VPISU")) %>%
mutate(Names = str_replace(Names, ".*University of Tennessee.*", "UT")) %>%
mutate(Names = str_replace(Names, ".*University of Illinois at Urbana-Champaign.*", "UIUC")) %>%
mutate(Names = str_replace(Names, ".*University of California-Davis.*", "UCD")) %>%
mutate(Names = str_replace(Names, ".*Texas A&M University.*", "TAMU")) %>%
mutate(Names = str_replace(Names, ".*Purdue University.*", "PU")) %>%
mutate(Names = str_replace(Names, ".*Oregon State University.*", "ORSU")) %>%
mutate(Names = str_replace(Names, ".*Oklahoma State University.*", "OKSU")) %>%
mutate(Names = str_replace(Names, ".*North Carolina State University.*", "NCSU")) %>%
mutate(Names = str_replace(Names, ".*Michigan State University.*", "MSU")) %>%
mutate(Names = str_replace(Names, ".*Kansas State University.*", "KSU")) %>%
mutate(Names = str_replace(Names, ".*Iowa State University.*", "ISU")) %>%
mutate(Names = str_replace(Names, ".*Colorado State University.*", "CSU")) %>%
filter (Names == "WSU" | Names == "VPISU" | Names == "UT" | Names == "UIUC" | Names == "UCD" | Names == "TAMU" | Names == "PU" | Names == "ORSU" | Names == "OKSU" | Names == "NCSU" | Names == "MSU" | Names == "KSU" | Names == "ISU" | Names == "CSU" ) %>%
droplevels()
levels(as.factor(data_time_alternative$Names))
data_time_alternative_summary <- data_time_alternative %>%
group_by(AwardYear, Names) %>%
summarise(ttas=sum(AWARD_SUM), ttac=sum(AWARD_COUNT))
data_time_alternative_summary
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names)) +
geom_line() +
geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)),
aes(label = Names),
hjust = 1,
nudge_x = 1,
direction = "y",
box.padding = 0.5,
point.padding = 0.5,
size = 4,
max.overlaps = Inf) +
xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
ggtitle("Oganizations - Award Sum in years (2001 -2012) - CSU vs Other Organizations-Time Series")
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttac, group=Names)) +
geom_line() +
geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)),
aes(label = Names),
hjust = 1,
nudge_x = 1,
direction = "y",
box.padding = 0.5,
point.padding = 0.5,
size = 4,
max.overlaps = Inf) +
xlab("AwardYear") + ylab("Award Count") +
ggtitle("Oganizations - Award Count in years (2001 - 2012) - CSU vs Other Organizations-Time Series")
data_time_alternative_summary <- data_time_alternative_summary %>%
mutate(focus = ifelse(Names=="CSU", "Yes", "No"))
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttac, group=Names, color=focus)) +
geom_line(size=1) +
geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)),
aes(label = Names),
hjust = 1,
nudge_x = 1,
direction = "y",
box.padding = 0.5,
point.padding = 0.5,
size = 4,
max.overlaps = Inf) +
xlab("AwardYear") + ylab("Award Count") +
ggtitle("Oganizations - Award Count in years (2001 - 2012) - CSU vs Other Organizations-Time Series")+
scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names, color=focus)) +
geom_line(size=1) +
geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)),
aes(label = Names),
hjust = 1,
nudge_x = 1,
direction = "y",
box.padding = 0.5,
point.padding = 0.5,
size = 4,
max.overlaps = Inf) +
xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
ggtitle("Oganizations - Award Sum in years (2001 - 2012) - CSU vs Other Organizations-Time Series")+
scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)
ggplot(data_time_alternative_summary, aes(x=as.factor(AwardYear), y=ttas/1000000, group=Names, color=focus)) +
geom_line() +
facet_wrap(~Names) +
# Add labels for the last available Award Year in each facet
geom_text_repel(data = data_time_alternative_summary %>% group_by(Names) %>% filter(AwardYear == max(AwardYear)),
aes(label = paste0("Awards Sum: ", round(ttas/1000000, 1), "M")),
nudge_x = 1,
hjust = 0,
size = 3,
box.padding = 0.5,
point.padding = 0.5) +
xlab("AwardYear") + ylab("Award Sum: (USD) - M$") +
theme(axis.text.x = element_text(size = 5),
legend.position = "none") +
ggtitle("Oganizations - Award Sum in years (2001 - 2012) - CSU vs Other Organizations-Time Series(Small Multiples)")+
scale_color_manual(values = c("Yes"="darkgreen", "No"="darkgray"), guide=NULL)