Rows: 19698 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (9): sysid, aha_id, validated, deal_type, name, sysname, lagsys, lagsysn...
dbl (8): deal, id, parent_id, year, mcrnum, merger, acquirer, target
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Part B: Number of Deals per Year
#Count unique deals per year for OLD datasetraw_deals_per_year <- raw_data %>%group_by(year) %>%summarise(deal =n_distinct(deal), .groups ="drop")#Count unique deals per year for NEW datasetlatest_deals_per_year <- latest_data %>%group_by(year) %>%summarise(deal =n_distinct(deal), .groups ="drop")#Combine datasets for plottingdeals_combined <-bind_rows( raw_deals_per_year %>%mutate(dataset ="Old"), latest_deals_per_year %>%mutate(dataset ="New"))#Plot bar graphggplot(deals_combined, aes(x =factor(year), y = deal, fill = dataset)) +geom_bar(stat ="identity", position ="dodge") +# side-by-side barslabs(title ="Number of Deals Per Year: Old vs New Dataset",x ="Year",y ="Number of Deals",fill ="Dataset" ) +geom_text(aes(label = deal), # add labels on topposition =position_dodge(width =0.9), vjust =-0.5) +scale_fill_manual(values =c("Old"="steelblue", "New"="lightgreen")) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1)) # rotate x-axis if needed
Part C Average Number of Hospitals Participating in a Deal
# Count hospitals per deal for OLD datasetraw_hospitals_per_deal <- raw_data %>%group_by(year, deal) %>%summarise(hospitals_in_deal =n_distinct(aha_id), .groups ="drop")# Count hospitals per deal for NEW datasetlatest_hospitals_per_deal <- latest_data %>%group_by(year, deal) %>%summarise(hospitals_in_deal =n_distinct(aha_id), .groups ="drop")# PLot each deal per year# OLD datasetggplot(raw_hospitals_per_deal, aes(x =factor(deal), y = hospitals_in_deal)) +geom_col(fill ="steelblue") +facet_wrap(~ year, scales ="free_x") +labs(title ="Number of Hospitals per Deal by Year (Old Dataset)",x ="Deal Number",y ="Number of Hospitals" ) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1, size =6))
# NEW datasetggplot(latest_hospitals_per_deal, aes(x =factor(deal), y = hospitals_in_deal)) +geom_col(fill ="lightgreen") +facet_wrap(~ year, scales ="free_x") +labs(title ="Number of Hospitals per Deal by Year (New Dataset)",x ="Deal Number",y ="Number of Hospitals" ) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1, size =6))
#Average number of hospitals per deal across years# Compute averages per year (OLD)raw_avg_per_year <- raw_hospitals_per_deal %>%group_by(year) %>%summarise(avg_hospitals =mean(hospitals_in_deal), .groups ="drop")# Compute averages per year (NEW)latest_avg_per_year <- latest_hospitals_per_deal %>%group_by(year) %>%summarise(avg_hospitals =mean(hospitals_in_deal), .groups ="drop")# Plot line graph comparing OLD vs NEWggplot() +geom_line(data = raw_avg_per_year, aes(x = year, y = avg_hospitals), color ="steelblue", size =1.2) +geom_point(data = raw_avg_per_year, aes(x = year, y = avg_hospitals), color ="steelblue", size =3) +geom_line(data = latest_avg_per_year, aes(x = year, y = avg_hospitals), color ="lightgreen", size =1.2) +geom_point(data = latest_avg_per_year, aes(x = year, y = avg_hospitals), color ="lightgreen", size =3) +labs(title ="Average Number of Hospitals per Deal Across Years",x ="Year",y ="Average Hospitals per Deal",color ="Dataset" ) +theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Ignoring unknown labels:
• colour : "Dataset"
Summary Stats Table
# Overall stats for OLD datasetraw_stats <- raw_hospitals_per_deal %>%summarise(num_deals_total =n(),min_hospitals =min(hospitals_in_deal),median_hospitals =median(hospitals_in_deal),mean_hospitals =mean(hospitals_in_deal),max_hospitals =max(hospitals_in_deal) ) %>%mutate(dataset ="Old") # label# Compute number of deals per year for OLD datasetraw_deals_per_year <- raw_hospitals_per_deal %>%group_by(year) %>%summarise(num_deals_year =n(), .groups ="drop")# Overall stats for NEW datasetlatest_stats <- latest_hospitals_per_deal %>%summarise(num_deals_total =n(),min_hospitals =min(hospitals_in_deal),median_hospitals =median(hospitals_in_deal),mean_hospitals =mean(hospitals_in_deal),max_hospitals =max(hospitals_in_deal) ) %>%mutate(dataset ="New") # label# Compute number of deals per year for NEW datasetlatest_deals_per_year <- latest_hospitals_per_deal %>%group_by(year) %>%summarise(num_deals_year =n(), .groups ="drop")summary_table <-bind_rows(raw_stats, latest_stats) %>%select(dataset, everything())cat("=== Overall Summary Stats (Old vs New) ===\n")
=== Overall Summary Stats (Old vs New) ===
print(summary_table)
# A tibble: 2 × 6
dataset num_deals_total min_hospitals median_hospitals mean_hospitals
<chr> <int> <int> <int> <dbl>
1 Old 445 1 4 44.3
2 New 445 1 4 44.3
# ℹ 1 more variable: max_hospitals <int>
cat("\n=== Number of Deals Per Year (Old Dataset) ===\n")