project <- read.csv("indicators_econ/Cordis/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".") 
project <- rename(project, Month = YearMonth, Projects = X..Projects)
project$Date <- as.Date(paste(project$Month, "-01", sep = ""), format = "%Y-%m-%d")
project <- project[project$Date >= as.Date("2010-01-01"), ]
project$Month <- NULL
project$Year <- lubridate::year(project$Date)

# Group by Country, Year, and all other columns in the original dataset
# Summarize by summing variables (excluding Country and Date) for each year
aggregated_data <- project %>%
  group_by(across(-c(Country, Date))) %>%
  dplyr::summarize(across(where(is.numeric), sum))
## `summarise()` has grouped output by 'Projects', 'Funding'. You can override
## using the `.groups` argument.
# Filter out the rows with specific years
filtered_data <- project %>%
  filter(Year %in% 2010:2022)

# Group by Country and Year, and summarize by summing variables (excluding Country and Date) for each year
summed_data <- filtered_data %>%
  group_by(Country, Year) %>%
  dplyr::summarize(across(where(is.numeric), sum))
## `summarise()` has grouped output by 'Country'. You can override using the
## `.groups` argument.
countries_EU <- c("BEL", "BGR", "CZE", "DNK", "DEU", "EST", "IRL", "GRC", "ESP",
                  "FRA", "HRV", "ITA", "CYP", "LTU", "LUX", "LVA", "POL", "HUN",
                  "NLD", "MLT", "AUT", "SVN", "SVK", "ROU", "PRT", "FIN", "SWE", 
                  "GBR", "CHE", "NOR", "ISL", "LIE", "ALB", "SRB", "MNE", "MKD")

merged_EU <- summed_data %>%
  filter(Country %in% countries_EU)%>%
  group_by(Year) %>%
  dplyr::summarize(
    Country = "EU",
    Projects = sum(Projects, na.rm = TRUE),
    Funding = sum(Funding, na.rm = TRUE)
  ) %>%
  ungroup()
ggplot(merged_EU, aes(x = Year)) +
  geom_line(aes(y = (Funding/1000000)), color = "black") +
  ylab("Amount of funding in millions of EUR") +
  scale_y_continuous(
    labels = label_comma()
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 12)
  ) +
  scale_x_continuous(breaks = seq(2010, 2021, by = 2), limits = c(2010, 2021))
## Warning: Removed 1 row containing missing values (`geom_line()`).

vc <- data.frame(
  Country = c(rep("United States", 11), rep("China", 11), rep("Europe", 11)),
  Year = c(2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022,
           2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022,
           2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022),
  Funding = c(2000, 3500, 12000, 20000, 18000, 21000, 33000, 41000, 48000, 113000, 58000,
              200, 500, 1500, 8000, 16000, 29000, 32500, 21000, 25000, 48000, 22000,
              200, 280, 550, 1400, 1300, 2750, 4250, 7700, 7100, 21000, 22800)
)
ggplot(vc, 
       aes(x = Year, y = Funding, color = Country, group = Country)) +
  geom_line(size = 1) +
  labs(title = "Number of Research Paper Citations (per Year)\nper million people from 2010-2022 by Region",
       x = "Year", y = "Amount of VC funding in\nmillion USD") +
  scale_color_manual(values = c("Europe" = "black", "United States" = "grey40",
                                "China" = "grey60")) +
  theme_minimal() +
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
        axis.text = element_text(size = 12),
        axis.title = element_text(size = 14),
        legend.title = element_text(size = 12, face = "bold"),
        legend.text = element_text(size = 12)) +
  scale_x_continuous(breaks = seq(2012, 2022, by = 2), limits = c(2012, 2022)) +
  geom_point(aes(shape = Country, col = Country), size = 3) +
  scale_y_continuous(breaks = seq(0, 120000, by = 30000), limits = c(0, 120000), labels = scales::comma)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

head(merged_EU, 13)
## # A tibble: 13 × 4
##     Year Country Projects     Funding
##    <dbl> <chr>      <int>       <dbl>
##  1  2010 EU           380  219057887.
##  2  2011 EU           364  235185188.
##  3  2012 EU           361  271833386.
##  4  2013 EU           323  183961058.
##  5  2014 EU           233  126516827.
##  6  2015 EU           378  259413250.
##  7  2016 EU           444  290140063.
##  8  2017 EU           552  342156553.
##  9  2018 EU           695  529791414.
## 10  2019 EU          1270  878194174.
## 11  2020 EU          1456 1138974012.
## 12  2021 EU          1638 1348309344.
## 13  2022 EU            99   92927586.