Replicating Bloom

# Load necessary libraries
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(R.matlab)

## R.matlab v3.7.0 (2022-08-25 21:52:34 UTC) successfully loaded. See ?R.matlab for help.

## 
## Attaching package: 'R.matlab'

## The following objects are masked from 'package:base':
## 
##     getOption, isOpen

# Manually Loading Bureau of Labour Statistics Data
blsdata <- data.frame(
  Year = 1948:2015,
  Level = c(44.186, 44.323, 47.495, 48.629, 49.466, 50.755, 50.728, 52.889, 52.523, 53.310, 53.528, 55.923, 56.267, 57.470, 59.560, 61.302, 63.698, 65.763, 67.773, 67.853, 69.562, 69.288, 69.134, 71.302, 73.337, 75.357, 72.747, 73.430, 76.096, 77.356, 78.401, 78.094, 76.292, 76.455, 73.887, 76.026, 78.305, 79.309, 80.563, 80.844, 81.622, 81.978, 82.686, 82.304, 85.038, 84.721, 85.246, 84.797, 86.421, 87.418, 88.517, 90.378, 91.957, 92.435, 94.397, 96.638, 99.089, 100.593, 100.917, 101.381, 100.174, 100.000, 102.869, 102.986, 103.594, 103.841, 104.558, 104.798)
)


# Manually Loading R&D Data
rnd_data <- data.frame(
  Year <- seq(1929, 2015, by = 1),
  PvtIPP = c(0.6, 0.6, 0.5, 0.4, 0.4, 0.5, 0.6, 0.6, 0.7, 0.8, 0.8, 0.8, 1.1, 1.2, 1.1, 1.2, 1.4, 1.8, 2.0, 2.1, 2.0, 2.3, 2.4, 3.0, 3.7, 3.9, 4.3, 5.2, 5.6, 6.0, 6.6, 7.1, 8.0, 8.4, 9.2, 9.8, 11.1, 12.8, 14.0, 15.6, 17.2, 17.9, 18.7, 20.6, 22.7, 25.5, 27.8, 32.2, 35.8, 40.4, 48.1, 54.4, 64.8, 72.7, 81.3, 95.1, 105.3, 113.5, 120.1, 132.7, 150.1, 164.4, 179.1, 187.7, 196.9, 205.7, 226.8, 253.3, 288.0, 317.7, 364.0, 409.5, 412.6, 406.4, 420.9, 442.1, 475.1, 504.6, 537.9, 563.4, 550.9, 564.3, 592.2, 621.7, 649.9, 690.0, 728.6),
  GovtIPP = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.3, 0.5, 0.9, 1.6, 1.5, 1.4, 1.4, 1.6, 1.7, 1.9, 2.1, 2.4, 2.7, 3.0, 3.6, 4.8, 5.9, 6.4, 7.0, 7.8, 8.8, 9.9, 11.7, 12.9, 13.8, 15.4, 16.2, 17.0, 17.7, 17.6, 18.1, 19.3, 20.3, 21.5, 23.3, 25.5, 27.9, 31.0, 35.0, 39.6, 45.0, 49.7, 55.2, 62.2, 71.0, 75.2, 81.7, 85.1, 87.8, 91.1, 91.4, 91.6, 91.4, 92.0, 94.0, 95.5, 98.3, 102.3, 106.5, 113.2, 119.7, 126.0, 136.5, 145.5, 153.9, 160.6, 169.0, 177.2, 179.8, 187.4, 191.6, 189.2, 188.1, 187.2, 190.8),
  TotalIPP = c(0.7, 0.7, 0.6, 0.5, 0.5, 0.6, 0.7, 0.7, 0.8, 0.9, 0.9, 0.9, 1.4, 1.7, 2.0, 2.8, 2.9, 3.2, 3.4, 3.7, 3.7, 4.2, 4.5, 5.4, 6.4, 6.9, 7.9, 10.0, 11.5, 12.4, 13.6, 14.9, 16.8, 18.3, 20.9, 22.7, 24.9, 28.2, 30.2, 32.6, 34.9, 35.5, 36.8, 39.9, 43.0, 47.0, 51.1, 57.7, 63.7, 71.4, 83.1, 94.0, 109.8, 122.4, 136.5, 157.3, 176.3, 188.7, 201.8, 217.8, 237.9, 255.5, 270.5, 279.3, 288.3, 297.7, 320.8, 348.8, 386.3, 420.0, 470.5, 522.7, 532.3, 532.4, 557.4, 587.6, 629.0, 665.2, 706.9, 740.6, 730.7, 751.7, 783.8, 810.9, 838.0, 877.2, 919.4)
)
colnames(rnd_data)[1] <- "Year"

# Load Wage Data from .mat File
wage_data <- readMat("WageScientistData.mat")
scientist_wages <- as.numeric(as.vector(wage_data$WageScientist))
wage_years <- as.numeric(as.vector(wage_data$WageYears))

# Create Scientist Wages DataFrame
scientist_wage_data <- data.frame(
  Year = wage_years,
  ScientistWages = scientist_wages
)

# Merge Datasets
merged_data <- merge(rnd_data, scientist_wage_data, by = "Year", all = TRUE)

# Renaming total R&D Budget for Clarity
colnames(merged_data)[colnames(merged_data) == "TotalIPP"] <- "annualrnd"

# Calculate Effective Number of Scientists
merged_data <- merged_data %>%
  mutate(
    Effective_Scientists_Annual = (annualrnd / ScientistWages) * 10^9,
  )

# Define the function
calculate_growth_chunks_and_plot <- function(blsdata, merged_data, start_year, chunk_size) {
  # Filter data to include only years after the start_year
  blsdata_filtered <- blsdata %>% filter(Year >= start_year)
  
  # Create periods based on chunk_size
  # Adjust breaks to include the maximum year explicitly
  breaks <- c(seq(start_year, max(blsdata_filtered$Year), by = chunk_size), max(blsdata_filtered$Year) + 1)
  labels <- paste0(head(breaks, -1), "-", tail(breaks, -1) - 1)
  
  # Attach the period labels to the datasets
  blsdata_filtered <- blsdata_filtered %>%
    mutate(
      Period = cut(Year, breaks = breaks, labels = labels, right = FALSE, include.lowest = TRUE)
    )
  
  # Calculate TFP growth for each period
  tfp_growth <- blsdata_filtered %>%
    group_by(Period) %>%
    arrange(Year) %>%
    summarise(
      FirstYear = first(Year),
      LastYear = last(Year),
      FirstYear = first(Year),
      LastYear = last(Year),
      n_years = LastYear-FirstYear +1,
      FirstValue = first(Level),
      LastValue = last(Level),
      n_years = LastYear - FirstYear + 1,
      AnnualTFPGrowth = 100 * ((LastValue / FirstValue)^(1 / n_years) - 1)
    )
  
  scientistdata1 <- merged_data %>% filter(Year >= start_year)
  
  scientistdata2 <- scientistdata1 %>%
    mutate(
      Period = cut(Year, breaks = breaks, labels = labels, right = FALSE, include.lowest = TRUE)
    )
  
  # Calculate effective scientists for each period
  scientistdata3 <- scientistdata2 %>%
    group_by(Period) %>%
    arrange(Year) %>%
    summarise(
      FirstYear = first(Year),
      LastYear = last(Year),
      FirstValue = first(Effective_Scientists_Annual),
      LastValue = last(Effective_Scientists_Annual),
      n_years = LastYear-FirstYear +1,
      MeanScientistsAnnual = mean(Effective_Scientists_Annual), na.rm = TRUE,
      Scientists_Growth = ((LastValue / FirstValue)^(1 / n_years) - 1) * 100)
  
  # Merge the two datasets
  merged_stats <- inner_join(tfp_growth, scientistdata3, by = "Period")
  View(merged_stats)
  ref_value <- merged_stats %>%
    arrange(FirstYear.x) %>%
    filter(FirstYear.x == first(FirstYear.x)) %>%  # Filter for the earliest period
    pull(MeanScientistsAnnual)  # Extract the MeanScientistsAnnual value
  merged_stats <- merged_stats %>%
    mutate(
      Phi = 1 - Scientists_Growth/AnnualTFPGrowth, 
      NormalizedScientistsAnnual = MeanScientistsAnnual / ref_value
    )
  return(merged_stats)
}

# Run the function
merged_stats1<- calculate_growth_chunks_and_plot(blsdata, merged_data, start_year = 1948, chunk_size =10)
plot1 <- ggplot(data = merged_stats1, aes(x = Period)) +
  # Add the TFP Growth line (blue, left y-axis)
  geom_line(aes(y = AnnualTFPGrowth, group = 1, color = "TFP Growth (left scale)"), size = 1) +
  # Add the Normalized Scientists Annual line (green, right y-axis)
  geom_line(aes(y = NormalizedScientistsAnnual, group = 1, color = "Effective Researchers (right scale)"), size = 1) +
  # Scale for the left y-axis (TFP Growth)
  scale_y_continuous(
    name = "Growth rate (%)",
    sec.axis = sec_axis(~ ., name = "Factor increase")
  ) +
  # Set the color of the lines and legends
  scale_color_manual(
    values = c("TFP Growth (left scale)" = "blue", "Effective Researchers (right scale)" = "green")
  ) +
  # Add labels and theme adjustments
  labs(
    x = "Period",
    y = NULL,
    color = NULL,
    title = "Aggregate Data on Growth and Research Effort"
  ) +
  theme_minimal() +
  theme(
    axis.title.y.left = element_text(color = "blue"),
    axis.title.y.right = element_text(color = "green"),
    legend.position = "bottom"
  )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

print(plot1)

mergedstats2 <- calculate_growth_chunks_and_plot(blsdata, merged_data, start_year = 1948, chunk_size =8)
# Plot 2: Phi Over Time
plot2 <- ggplot(data = mergedstats2, aes(x = Period, y = Phi, group = 1)) +
  geom_line(color = "blue", size = 1) +             # Line plot for Phi
  geom_point(color = "blue", size = 2) +           # Points for each period
  geom_hline(yintercept = 0, linetype = "dashed", color = "red", size = 1) +  # Reference line at y = 0
  labs(
    title = "The Evolution of Phi: Standing on Shoulders vs Fishing Out",
    subtitle = "Phi > 0: Standing on Shoulders dominates | Phi < 0: Fishing Out dominates",
    x = "Time Period",
    y = "Phi (Derived from TFP Growth and Researcher Growth)"
  ) +
  theme_minimal() +  # Apply minimal theme
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12, face = "italic"),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12)
  )

print(plot2)

Replicating Bloom

2024-12-10