# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(R.matlab)
## R.matlab v3.7.0 (2022-08-25 21:52:34 UTC) successfully loaded. See ?R.matlab for help.
##
## Attaching package: 'R.matlab'
## The following objects are masked from 'package:base':
##
## getOption, isOpen
# Manually Loading Bureau of Labour Statistics Data
blsdata <- data.frame(
Year = 1948:2015,
Level = c(44.186, 44.323, 47.495, 48.629, 49.466, 50.755, 50.728, 52.889, 52.523, 53.310, 53.528, 55.923, 56.267, 57.470, 59.560, 61.302, 63.698, 65.763, 67.773, 67.853, 69.562, 69.288, 69.134, 71.302, 73.337, 75.357, 72.747, 73.430, 76.096, 77.356, 78.401, 78.094, 76.292, 76.455, 73.887, 76.026, 78.305, 79.309, 80.563, 80.844, 81.622, 81.978, 82.686, 82.304, 85.038, 84.721, 85.246, 84.797, 86.421, 87.418, 88.517, 90.378, 91.957, 92.435, 94.397, 96.638, 99.089, 100.593, 100.917, 101.381, 100.174, 100.000, 102.869, 102.986, 103.594, 103.841, 104.558, 104.798)
)
# Manually Loading R&D Data
rnd_data <- data.frame(
Year <- seq(1929, 2015, by = 1),
PvtIPP = c(0.6, 0.6, 0.5, 0.4, 0.4, 0.5, 0.6, 0.6, 0.7, 0.8, 0.8, 0.8, 1.1, 1.2, 1.1, 1.2, 1.4, 1.8, 2.0, 2.1, 2.0, 2.3, 2.4, 3.0, 3.7, 3.9, 4.3, 5.2, 5.6, 6.0, 6.6, 7.1, 8.0, 8.4, 9.2, 9.8, 11.1, 12.8, 14.0, 15.6, 17.2, 17.9, 18.7, 20.6, 22.7, 25.5, 27.8, 32.2, 35.8, 40.4, 48.1, 54.4, 64.8, 72.7, 81.3, 95.1, 105.3, 113.5, 120.1, 132.7, 150.1, 164.4, 179.1, 187.7, 196.9, 205.7, 226.8, 253.3, 288.0, 317.7, 364.0, 409.5, 412.6, 406.4, 420.9, 442.1, 475.1, 504.6, 537.9, 563.4, 550.9, 564.3, 592.2, 621.7, 649.9, 690.0, 728.6),
GovtIPP = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.3, 0.5, 0.9, 1.6, 1.5, 1.4, 1.4, 1.6, 1.7, 1.9, 2.1, 2.4, 2.7, 3.0, 3.6, 4.8, 5.9, 6.4, 7.0, 7.8, 8.8, 9.9, 11.7, 12.9, 13.8, 15.4, 16.2, 17.0, 17.7, 17.6, 18.1, 19.3, 20.3, 21.5, 23.3, 25.5, 27.9, 31.0, 35.0, 39.6, 45.0, 49.7, 55.2, 62.2, 71.0, 75.2, 81.7, 85.1, 87.8, 91.1, 91.4, 91.6, 91.4, 92.0, 94.0, 95.5, 98.3, 102.3, 106.5, 113.2, 119.7, 126.0, 136.5, 145.5, 153.9, 160.6, 169.0, 177.2, 179.8, 187.4, 191.6, 189.2, 188.1, 187.2, 190.8),
TotalIPP = c(0.7, 0.7, 0.6, 0.5, 0.5, 0.6, 0.7, 0.7, 0.8, 0.9, 0.9, 0.9, 1.4, 1.7, 2.0, 2.8, 2.9, 3.2, 3.4, 3.7, 3.7, 4.2, 4.5, 5.4, 6.4, 6.9, 7.9, 10.0, 11.5, 12.4, 13.6, 14.9, 16.8, 18.3, 20.9, 22.7, 24.9, 28.2, 30.2, 32.6, 34.9, 35.5, 36.8, 39.9, 43.0, 47.0, 51.1, 57.7, 63.7, 71.4, 83.1, 94.0, 109.8, 122.4, 136.5, 157.3, 176.3, 188.7, 201.8, 217.8, 237.9, 255.5, 270.5, 279.3, 288.3, 297.7, 320.8, 348.8, 386.3, 420.0, 470.5, 522.7, 532.3, 532.4, 557.4, 587.6, 629.0, 665.2, 706.9, 740.6, 730.7, 751.7, 783.8, 810.9, 838.0, 877.2, 919.4)
)
colnames(rnd_data)[1] <- "Year"
# Load Wage Data from .mat File
wage_data <- readMat("WageScientistData.mat")
scientist_wages <- as.numeric(as.vector(wage_data$WageScientist))
wage_years <- as.numeric(as.vector(wage_data$WageYears))
# Create Scientist Wages DataFrame
scientist_wage_data <- data.frame(
Year = wage_years,
ScientistWages = scientist_wages
)
# Merge Datasets
merged_data <- merge(rnd_data, scientist_wage_data, by = "Year", all = TRUE)
# Renaming total R&D Budget for Clarity
colnames(merged_data)[colnames(merged_data) == "TotalIPP"] <- "annualrnd"
# Calculate Effective Number of Scientists
merged_data <- merged_data %>%
mutate(
Effective_Scientists_Annual = (annualrnd / ScientistWages) * 10^9,
)
# Define the function
calculate_growth_chunks_and_plot <- function(blsdata, merged_data, start_year, chunk_size) {
# Filter data to include only years after the start_year
blsdata_filtered <- blsdata %>% filter(Year >= start_year)
# Create periods based on chunk_size
# Adjust breaks to include the maximum year explicitly
breaks <- c(seq(start_year, max(blsdata_filtered$Year), by = chunk_size), max(blsdata_filtered$Year) + 1)
labels <- paste0(head(breaks, -1), "-", tail(breaks, -1) - 1)
# Attach the period labels to the datasets
blsdata_filtered <- blsdata_filtered %>%
mutate(
Period = cut(Year, breaks = breaks, labels = labels, right = FALSE, include.lowest = TRUE)
)
# Calculate TFP growth for each period
tfp_growth <- blsdata_filtered %>%
group_by(Period) %>%
arrange(Year) %>%
summarise(
FirstYear = first(Year),
LastYear = last(Year),
FirstYear = first(Year),
LastYear = last(Year),
n_years = LastYear-FirstYear +1,
FirstValue = first(Level),
LastValue = last(Level),
n_years = LastYear - FirstYear + 1,
AnnualTFPGrowth = 100 * ((LastValue / FirstValue)^(1 / n_years) - 1)
)
scientistdata1 <- merged_data %>% filter(Year >= start_year)
scientistdata2 <- scientistdata1 %>%
mutate(
Period = cut(Year, breaks = breaks, labels = labels, right = FALSE, include.lowest = TRUE)
)
# Calculate effective scientists for each period
scientistdata3 <- scientistdata2 %>%
group_by(Period) %>%
arrange(Year) %>%
summarise(
FirstYear = first(Year),
LastYear = last(Year),
FirstValue = first(Effective_Scientists_Annual),
LastValue = last(Effective_Scientists_Annual),
n_years = LastYear-FirstYear +1,
MeanScientistsAnnual = mean(Effective_Scientists_Annual), na.rm = TRUE,
Scientists_Growth = ((LastValue / FirstValue)^(1 / n_years) - 1) * 100)
# Merge the two datasets
merged_stats <- inner_join(tfp_growth, scientistdata3, by = "Period")
View(merged_stats)
ref_value <- merged_stats %>%
arrange(FirstYear.x) %>%
filter(FirstYear.x == first(FirstYear.x)) %>% # Filter for the earliest period
pull(MeanScientistsAnnual) # Extract the MeanScientistsAnnual value
merged_stats <- merged_stats %>%
mutate(
Phi = 1 - Scientists_Growth/AnnualTFPGrowth,
NormalizedScientistsAnnual = MeanScientistsAnnual / ref_value
)
return(merged_stats)
}
# Run the function
merged_stats1<- calculate_growth_chunks_and_plot(blsdata, merged_data, start_year = 1948, chunk_size =10)
plot1 <- ggplot(data = merged_stats1, aes(x = Period)) +
# Add the TFP Growth line (blue, left y-axis)
geom_line(aes(y = AnnualTFPGrowth, group = 1, color = "TFP Growth (left scale)"), size = 1) +
# Add the Normalized Scientists Annual line (green, right y-axis)
geom_line(aes(y = NormalizedScientistsAnnual, group = 1, color = "Effective Researchers (right scale)"), size = 1) +
# Scale for the left y-axis (TFP Growth)
scale_y_continuous(
name = "Growth rate (%)",
sec.axis = sec_axis(~ ., name = "Factor increase")
) +
# Set the color of the lines and legends
scale_color_manual(
values = c("TFP Growth (left scale)" = "blue", "Effective Researchers (right scale)" = "green")
) +
# Add labels and theme adjustments
labs(
x = "Period",
y = NULL,
color = NULL,
title = "Aggregate Data on Growth and Research Effort"
) +
theme_minimal() +
theme(
axis.title.y.left = element_text(color = "blue"),
axis.title.y.right = element_text(color = "green"),
legend.position = "bottom"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(plot1)

mergedstats2 <- calculate_growth_chunks_and_plot(blsdata, merged_data, start_year = 1948, chunk_size =8)
# Plot 2: Phi Over Time
plot2 <- ggplot(data = mergedstats2, aes(x = Period, y = Phi, group = 1)) +
geom_line(color = "blue", size = 1) + # Line plot for Phi
geom_point(color = "blue", size = 2) + # Points for each period
geom_hline(yintercept = 0, linetype = "dashed", color = "red", size = 1) + # Reference line at y = 0
labs(
title = "The Evolution of Phi: Standing on Shoulders vs Fishing Out",
subtitle = "Phi > 0: Standing on Shoulders dominates | Phi < 0: Fishing Out dominates",
x = "Time Period",
y = "Phi (Derived from TFP Growth and Researcher Growth)"
) +
theme_minimal() + # Apply minimal theme
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, face = "italic"),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)
)
print(plot2)
