theme_sector <- function(base_size = 12) {
  theme_minimal(base_size = base_size) +
    theme(
      plot.title = element_text(face = "bold", size = 15, color = "#12355b"),
      plot.subtitle = element_text(size = 11, color = "#52616b"),
      plot.caption = element_text(size = 9, color = "#6b7280"),
      axis.title = element_text(face = "bold", color = "#243b53"),
      axis.text = element_text(color = "#334e68"),
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank(),
      legend.position = "bottom",
      legend.title = element_text(face = "bold"),
      strip.text = element_text(face = "bold", color = "#12355b")
    )
}

1 1. Load Data

DATA_PATH <- "/Users/macbookstruongdat/Desktop/Master of Analytics/Applied Project/P000250DA - CSIRO Data 61/2026-04-14-dn18_23-clean.rds"

if (!file.exists(DATA_PATH)) {
  stop("Data file not found. Please check DATA_PATH.")
}

df_clean <- readRDS(DATA_PATH)

glimpse(df_clean)
## Rows: 3,524,527
## Columns: 43
## $ tax_id                                <dbl> 1101845100, 2600775926, 34000274…
## $ province_code                         <dbl> 80, 25, 60, 1, 1, 79, 79, 30, 79…
## $ survey_year                           <dbl> 2019, 2019, 2019, 2019, 2019, 20…
## $ industry_1digit                       <dbl> 4, 4, 1, 4, 8, 6, 4, 5, 4, 4, 4,…
## $ industry_2digit                       <chr> "41", "42", "10", "49", "82", "6…
## $ industry_3digit                       <dbl> 410, 429, 102, 493, 829, 661, 46…
## $ firm_ownership_type                   <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ export_import_dummy                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ innovation_technology_2021_2023only   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, …
## $ labour_headcount_end                  <dbl> 3, 113, 5, 3, 5, 10, 3, 73, 9, 9…
## $ labour_female_headcount_end           <dbl> 1, 45, 2, 1, 2, 0, 1, 57, 1, 1, …
## $ capital_sources_total_end             <dbl> 856.90, 8132.90, 7042.70, 3578.9…
## $ fixed_assets_gross_end                <dbl> 0.00, 576.80, 0.00, 0.00, 0.00, …
## $ current_assets_end                    <dbl> 856.90, 8090.90, 6739.70, 3543.6…
## $ total_net_revenue                     <dbl> 930.00, 7694.10, 11636.90, 1464.…
## $ net_revenue_main_activity             <dbl> 930.00, 7694.00, 11636.50, 1464.…
## $ profit_pretax                         <dbl> -143.1, 14.9, 29.9, -9.1, 21.2, …
## $ profit_tax                            <dbl> 0.0, 3.0, 6.1, 0.0, 0.0, 0.0, 7.…
## $ labour_income_total                   <dbl> 120.00, 7356.00, 0.00, 253.14, 6…
## $ investment_any_dummy                  <dbl> 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,…
## $ total_investment                      <dbl> 0.00, 0.00, 114.29, 559.88, 282.…
## $ capital_state                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ capital_credit                        <dbl> 0.00, 0.00, 0.00, 245.55, 11.56,…
## $ capital_from_own_sources              <dbl> 0.00, 0.00, 114.29, 312.40, 271.…
## $ capital_from_other_sources            <dbl> 0.00, 0.00, 0.00, 1.93, 0.00, 70…
## $ investment_construction_installation  <dbl> 0.00, 0.00, 0.00, 24.33, 2.28, 0…
## $ investment_machinery_equipment        <dbl> 0.00, 0.00, 0.00, 9.63, 8.22, 0.…
## $ investment_construction_other         <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.…
## $ investment_fixed_assets_purchase      <dbl> 0.00, 0.00, 0.00, 518.99, 268.26…
## $ investment_major_repairs              <dbl> 0.00, 0.00, 0.00, 0.85, 0.29, 5.…
## $ investment_working_capital_supplement <dbl> 0.00, 0.00, 114.29, 5.35, 2.47, …
## $ investment_other                      <dbl> 0.00, 0.00, 0.00, 0.74, 1.22, 0.…
## $ innovation_product_2020only           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, …
## $ innovation_management_2020only        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, …
## $ innovation_process_2020only           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, …
## $ export_dummy_2019only                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ import_dummy_2019only                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ capital_intensity_prebuilt            <dbl> 0.00000, 1.19375, 59.97000, 0.00…
## $ revenue_per_worker_prebuilt           <dbl> 310.00000, 120.22031, 2327.38000…
## $ firm_ownership_type_factor            <fct> Domestic private firms, Domestic…
## $ capital_intensity                     <dbl> 285.63334, 71.97257, 1408.54004,…
## $ revenue_per_worker                    <dbl> 310.00000, 68.08938, 2327.38000,…
## $ CGE_sectorname                        <chr> "Construction", "Construction", …

2 2. Data Validation and Preparation

This report is built for the current cleaned dataset structure with 43 variables. The key sector variable is CGE_sectorname; therefore, the Education sector is filtered using CGE_sectorname == "Education".

required_cols <- c(
  "tax_id", "survey_year", "CGE_sectorname",
  "firm_ownership_type_factor",
  "labour_headcount_end", "labour_female_headcount_end",
  "total_net_revenue", "net_revenue_main_activity",
  "capital_sources_total_end", "fixed_assets_gross_end",
  "profit_pretax", "total_investment",
  "investment_construction_installation",
  "investment_machinery_equipment",
  "investment_fixed_assets_purchase",
  "investment_major_repairs",
  "investment_working_capital_supplement",
  "investment_other",
  "capital_state", "capital_credit",
  "capital_from_own_sources", "capital_from_other_sources",
  "export_import_dummy",
  "innovation_product_2020only",
  "innovation_process_2020only",
  "innovation_management_2020only",
  "innovation_technology_2021_2023only",
  "export_dummy_2019only",
  "import_dummy_2019only"
)

missing_cols <- setdiff(required_cols, names(df_clean))

if (length(missing_cols) > 0) {
  stop(paste("Missing required columns:", paste(missing_cols, collapse = ", ")))
}

available_sectors <- sort(unique(df_clean$CGE_sectorname))

if (!"Education" %in% available_sectors) {
  stop("Education sector not found in CGE_sectorname. Check sector naming in the cleaned dataset.")
}
target_sector_name <- "Education"
target_sector_code <- 85

df_analysis <- df_clean %>%
  mutate(
    survey_year = as.integer(survey_year),
    CGE_sectorname = as.character(CGE_sectorname),
    labour_safe = pmax(labour_headcount_end, 1),
    total_investment = replace_na(total_investment, 0),
    firm_productivity = total_net_revenue / labour_safe,
    firm_capital_intensity = capital_sources_total_end / labour_safe,
    firm_investment_intensity = total_investment / labour_safe,
    firm_size_category = case_when(
      labour_headcount_end >= 1 & labour_headcount_end <= 9 ~ "Micro (1–9)",
      labour_headcount_end >= 10 & labour_headcount_end <= 49 ~ "Small (10–49)",
      labour_headcount_end >= 50 & labour_headcount_end <= 199 ~ "Medium (50–199)",
      labour_headcount_end >= 200 ~ "Large (200+)",
      TRUE ~ "Zero / unknown"
    ),
    firm_size_category = factor(
      firm_size_category,
      levels = c("Micro (1–9)", "Small (10–49)", "Medium (50–199)", "Large (200+)", "Zero / unknown")
    )
  )

edu <- df_analysis %>%
  filter(CGE_sectorname == target_sector_name)

sector_year <- df_analysis %>%
  group_by(CGE_sectorname, survey_year) %>%
  summarise(
    n_firms = n_distinct(tax_id),
    total_labour = sum(labour_headcount_end, na.rm = TRUE),
    female_labour = sum(labour_female_headcount_end, na.rm = TRUE),
    total_revenue = sum(total_net_revenue, na.rm = TRUE),
    main_revenue = sum(net_revenue_main_activity, na.rm = TRUE),
    total_capital = sum(capital_sources_total_end, na.rm = TRUE),
    fixed_assets = sum(fixed_assets_gross_end, na.rm = TRUE),
    total_profit = sum(profit_pretax, na.rm = TRUE),
    total_investment = sum(total_investment, na.rm = TRUE),
    investment_machinery = sum(investment_machinery_equipment, na.rm = TRUE),
    investment_construction = sum(investment_construction_installation, na.rm = TRUE),
    investment_fixed_asset_purchase = sum(investment_fixed_assets_purchase, na.rm = TRUE),
    investment_repairs = sum(investment_major_repairs, na.rm = TRUE),
    investment_working_capital = sum(investment_working_capital_supplement, na.rm = TRUE),
    investment_other = sum(investment_other, na.rm = TRUE),
    capital_state = sum(capital_state, na.rm = TRUE),
    capital_credit = sum(capital_credit, na.rm = TRUE),
    capital_own = sum(capital_from_own_sources, na.rm = TRUE),
    capital_other = sum(capital_from_other_sources, na.rm = TRUE),
    export_import_rate = mean(export_import_dummy == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    labour_productivity = total_revenue / pmax(total_labour, 1),
    capital_intensity = total_capital / pmax(total_labour, 1),
    fixed_asset_intensity = fixed_assets / pmax(total_labour, 1),
    investment_intensity = total_investment / pmax(total_labour, 1),
    profit_margin = total_profit / pmax(total_revenue, 1),
    female_labour_share = female_labour / pmax(total_labour, 1)
  )

economy_year <- sector_year %>%
  group_by(survey_year) %>%
  summarise(
    economy_revenue = sum(total_revenue, na.rm = TRUE),
    economy_labour = sum(total_labour, na.rm = TRUE),
    economy_investment = sum(total_investment, na.rm = TRUE),
    .groups = "drop"
  )

edu_year <- sector_year %>%
  filter(CGE_sectorname == target_sector_name) %>%
  left_join(economy_year, by = "survey_year") %>%
  mutate(
    revenue_share = total_revenue / economy_revenue,
    labour_share = total_labour / economy_labour,
    investment_share = total_investment / economy_investment
  ) %>%
  arrange(survey_year) %>%
  mutate(
    productivity_growth = labour_productivity / lag(labour_productivity) - 1,
    revenue_growth = total_revenue / lag(total_revenue) - 1,
    labour_growth = total_labour / lag(total_labour) - 1,
    investment_growth = total_investment / lag(total_investment) - 1
  )

3 3. Sector Profile Demo Template

3.1 Education (CGE 85)

Sector scope: Education corresponds to CGE sector code 85 in the old CGE mapping. This report uses CGE_sectorname == “Education” because the current cleaned dataset stores sector labels but not CGE_sectorcode.

3.2 3.1 Sector Identity and Policy Relevance

scope_tbl <- tibble(
  Component = c(
    "Sector name",
    "CGE code",
    "Sector type",
    "Main policy role",
    "Main analytical question"
  ),
  Content = c(
    "Education",
    "85",
    "Service / human capital sector",
    "Skills formation, workforce capability, digital readiness and long-term productivity",
    "Is the Education sector expanding, becoming more productive, and investing enough to support Vietnam’s productivity transition?"
  )
)

scope_tbl %>%
  kable(format = "html", caption = "Sector identity and policy relevance") %>%
  kable_styling(full_width = TRUE, bootstrap_options = c("striped", "hover"))
Sector identity and policy relevance
Component Content
Sector name Education
CGE code 85
Sector type Service / human capital sector
Main policy role Skills formation, workforce capability, digital readiness and long-term productivity
Main analytical question Is the Education sector expanding, becoming more productive, and investing enough to support Vietnam’s productivity transition?

Text placeholder: The Education sector plays a strategic role in Vietnam’s productivity transition by supporting human capital formation and workforce capability. This profile assesses whether the sector is expanding, improving productivity, and investing sufficiently to support broader technology adoption and economic upgrading.

3.3 3.2 Executive Snapshot

latest_year <- max(edu_year$survey_year, na.rm = TRUE)

snapshot <- edu_year %>%
  filter(survey_year == latest_year) %>%
  transmute(
    `Number of firms` = comma(n_firms),
    `Total labour` = comma(total_labour),
    `Total revenue (mn VND)` = comma(round(total_revenue, 0)),
    `Labour productivity (mn VND/worker)` = comma(round(labour_productivity, 1)),
    `Capital intensity (mn VND/worker)` = comma(round(capital_intensity, 1)),
    `Investment intensity (mn VND/worker)` = comma(round(investment_intensity, 1)),
    `Revenue share` = percent(revenue_share, accuracy = 0.01),
    `Labour share` = percent(labour_share, accuracy = 0.01)
  ) %>%
  pivot_longer(everything(), names_to = "KPI", values_to = "Value")

snapshot %>%
  kable(format = "html", caption = glue("Executive snapshot — {target_sector_name}, {latest_year}")) %>%
  kable_styling(full_width = TRUE, bootstrap_options = c("striped", "hover"))
Executive snapshot — Education, 2023
KPI Value
Number of firms 11,223
Total labour 189,983
Total revenue (mn VND) 85,419,725
Labour productivity (mn VND/worker) 450
Capital intensity (mn VND/worker) 874
Investment intensity (mn VND/worker) 52
Revenue share 0.25%
Labour share 1.25%

Analyst notes:
1. Is Education a large employer or a small sector?
2. Is revenue growing faster than labour?
3. Is productivity increasing or stagnant?
4. Does investment intensity look low compared with other service sectors?

4 Page 1 — Industry Structure and Performance

4.1 4. Visual 1 — Sector Size Trend

Purpose. To show whether the Education sector is expanding in economic output and employment.

edu_size_long <- edu_year %>%
  select(survey_year, total_revenue, total_labour) %>%
  mutate(
    Revenue = total_revenue / first(total_revenue),
    Labour = total_labour / first(total_labour)
  ) %>%
  select(survey_year, Revenue, Labour) %>%
  pivot_longer(-survey_year, names_to = "Metric", values_to = "Index")

ggplot(edu_size_long, aes(x = survey_year, y = Index, colour = Metric)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.8) +
  scale_y_continuous(labels = number_format(accuracy = 0.1)) +
  scale_x_continuous(breaks = sort(unique(edu_year$survey_year))) +
  scale_colour_manual(values = c("Revenue" = "#12355b", "Labour" = "#2a9d8f")) +
  labs(
    title = "Education sector size trend",
    subtitle = "Indexed to 2019 = 1.0 to compare revenue and labour growth",
    x = NULL,
    y = "Index (2019 = 1.0)",
    colour = NULL,
    caption = "Source: Cleaned VES panel, 2019–2023."
  ) +
  theme_sector()

Text placeholder: Between 2019 and 2023, Education sector revenue [increased/decreased/remained stable], while employment [increased/decreased/remained stable]. This suggests that sector expansion was primarily driven by [output growth / labour expansion / both / neither].

4.2 5. Visual 2 — Revenue Share and Labour Share

Purpose. To compare the economic weight of Education in terms of output and employment.

edu_share_long <- edu_year %>%
  select(survey_year, revenue_share, labour_share) %>%
  pivot_longer(
    cols = c(revenue_share, labour_share),
    names_to = "Share type",
    values_to = "Share"
  ) %>%
  mutate(
    `Share type` = recode(
      `Share type`,
      revenue_share = "Revenue share",
      labour_share = "Labour share"
    )
  )

ggplot(edu_share_long, aes(x = survey_year, y = Share, colour = `Share type`)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.8) +
  scale_y_continuous(labels = percent_format(accuracy = 0.01)) +
  scale_x_continuous(breaks = sort(unique(edu_year$survey_year))) +
  scale_colour_manual(values = c("Revenue share" = "#12355b", "Labour share" = "#2a9d8f")) +
  labs(
    title = "Education revenue share versus labour share",
    subtitle = "Compares sector economic weight against employment weight",
    x = NULL,
    y = "Share of all mapped CGE sectors",
    colour = NULL
  ) +
  theme_sector()

Text placeholder: Education accounts for [X%] of total employment but [Y%] of total revenue, indicating that the sector is relatively [labour-intensive / revenue-productive] compared with the economy-wide average.

4.3 6. Visual 3 — Firm Size Structure

Purpose. To identify whether the sector is fragmented or concentrated.

edu_size_dist <- edu %>%
  group_by(survey_year, firm_size_category) %>%
  summarise(n_firms = n_distinct(tax_id), .groups = "drop") %>%
  group_by(survey_year) %>%
  mutate(share = n_firms / sum(n_firms)) %>%
  ungroup()

ggplot(edu_size_dist, aes(x = factor(survey_year), y = share, fill = firm_size_category)) +
  geom_col(width = 0.75) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Firm size structure in Education",
    subtitle = "Share of firms by employment size category",
    x = NULL,
    y = "Share of firms",
    fill = "Firm size"
  ) +
  theme_sector()

Text placeholder: The Education sector is dominated by [micro/small/medium/large] firms. This structure suggests [high fragmentation / moderate scale / concentration], which may affect the sector’s ability to invest in digital platforms, staff training, and productivity-enhancing systems.

4.4 7. Visual 4 — Ownership Structure

Purpose. To understand whether Education is mainly domestic private, state-linked, or FDI-driven.

edu_owner <- edu %>%
  group_by(survey_year, firm_ownership_type_factor) %>%
  summarise(n_firms = n_distinct(tax_id), .groups = "drop") %>%
  group_by(survey_year) %>%
  mutate(share = n_firms / sum(n_firms)) %>%
  ungroup()

ggplot(edu_owner, aes(x = factor(survey_year), y = share, fill = firm_ownership_type_factor)) +
  geom_col(width = 0.75) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Ownership structure in Education",
    subtitle = "Share of firms by ownership type",
    x = NULL,
    y = "Share of firms",
    fill = "Ownership"
  ) +
  theme_sector()

Text placeholder: Education is primarily composed of [domestic private / state / FDI] firms. This suggests that policy interventions should focus on [private-sector capability building / public-sector coordination / foreign investment and skills transfer].

4.5 8. Visual 5 — Labour Productivity Trend

Purpose. To measure whether the sector is becoming more efficient in generating revenue per worker.

ggplot(edu_year, aes(x = survey_year, y = labour_productivity)) +
  geom_line(linewidth = 1.2, colour = "#e76f51") +
  geom_point(size = 2.8, colour = "#e76f51") +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(breaks = sort(unique(edu_year$survey_year))) +
  labs(
    title = "Labour productivity trend in Education",
    subtitle = "Total net revenue divided by total labour",
    x = NULL,
    y = "Million VND per worker",
    caption = "Rule: sum(total_net_revenue) / sum(labour_headcount_end), not mean of firm-level ratios."
  ) +
  theme_sector()

Text placeholder: Labour productivity in Education [increased/decreased/stagnated] between 2019 and 2023. This suggests that sector performance has been driven by [efficiency gains / employment growth without revenue gains / structural constraints].

4.6 9. Visual 6 — Productivity Distribution

Purpose. To detect inequality between high-performing and low-performing firms within Education.

edu_dist <- edu %>%
  filter(labour_headcount_end > 0, firm_productivity > 0)

ggplot(edu_dist, aes(x = factor(survey_year), y = firm_productivity)) +
  geom_boxplot(outlier.alpha = 0.08, fill = "#a8dadc", colour = "#12355b") +
  scale_y_log10(labels = comma) +
  labs(
    title = "Firm-level productivity distribution in Education",
    subtitle = "Log scale highlights dispersion between laggard and high-performing firms",
    x = NULL,
    y = "Revenue per worker (log scale)"
  ) +
  theme_sector()

Text placeholder: The productivity distribution shows [wide/narrow] dispersion across firms. This indicates [large performance gaps / relatively consistent firm performance]. A wide gap would suggest that productivity gains may depend on diffusing best practices from leading education providers to smaller institutions.

4.7 10. Visual 7 — Capital Intensity Trend

Purpose. To understand whether Education is becoming more capital-intensive.

ggplot(edu_year, aes(x = survey_year, y = capital_intensity)) +
  geom_line(linewidth = 1.2, colour = "#457b9d") +
  geom_point(size = 2.8, colour = "#457b9d") +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(breaks = sort(unique(edu_year$survey_year))) +
  labs(
    title = "Capital intensity trend in Education",
    subtitle = "Total capital sources divided by total labour",
    x = NULL,
    y = "Million VND per worker"
  ) +
  theme_sector()

Text placeholder: Capital intensity in Education [increased/decreased/remained stable]. If productivity did not increase at the same time, this may suggest that additional capital has not yet translated into measurable efficiency gains.

4.8 11. Visual 8 — Profitability

Purpose. To assess whether the sector is financially sustainable.

ggplot(edu_year, aes(x = survey_year, y = profit_margin)) +
  geom_hline(yintercept = 0, linewidth = 0.4, linetype = "dashed", colour = "grey50") +
  geom_line(linewidth = 1.2, colour = "#12355b") +
  geom_point(size = 2.8, colour = "#12355b") +
  scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
  scale_x_continuous(breaks = sort(unique(edu_year$survey_year))) +
  labs(
    title = "Profit margin trend in Education",
    subtitle = "Pre-tax profit divided by total net revenue",
    x = NULL,
    y = "Profit margin"
  ) +
  theme_sector()

Text placeholder: The profit margin of Education [improved/declined/remained low]. This may reflect [cost pressure / limited pricing power / investment burden / improved operating efficiency].

5 Page 2 — Technology, Investment and Innovation

5.1 12. Visual 9 — Total Investment Trend

Purpose. To assess whether the sector is investing in future capacity.

ggplot(edu_year, aes(x = survey_year, y = total_investment)) +
  geom_line(linewidth = 1.2, colour = "#f4a261") +
  geom_point(size = 2.8, colour = "#f4a261") +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(breaks = sort(unique(edu_year$survey_year))) +
  labs(
    title = "Total investment trend in Education",
    subtitle = "Aggregate development investment by year",
    x = NULL,
    y = "Million VND"
  ) +
  theme_sector()

Text placeholder: Total investment in Education [increased/decreased/stagnated], suggesting [stronger/weaker] capacity expansion and future productivity potential.

5.2 13. Visual 10 — Investment Composition

Purpose. To distinguish whether investment is going into physical expansion, equipment upgrading, maintenance, or capability development.

investment_long <- edu_year %>%
  select(
    survey_year,
    `Construction and installation` = investment_construction,
    `Machinery and equipment` = investment_machinery,
    `Fixed asset purchase` = investment_fixed_asset_purchase,
    `Major repairs` = investment_repairs,
    `Working capital supplement` = investment_working_capital,
    `Other investment` = investment_other
  ) %>%
  pivot_longer(-survey_year, names_to = "Investment category", values_to = "Value")

ggplot(investment_long, aes(x = factor(survey_year), y = Value, fill = `Investment category`)) +
  geom_col(width = 0.75) +
  scale_y_continuous(labels = comma) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Investment composition in Education",
    subtitle = "Breakdown of investment by category",
    x = NULL,
    y = "Million VND",
    fill = "Investment category"
  ) +
  theme_sector()

Text placeholder: The investment mix is dominated by [construction / equipment / repairs / other investment]. This suggests that Education investment is mainly directed toward [capacity expansion / technology upgrading / maintenance / capability development].

5.3 14. Visual 11 — Investment Intensity vs Productivity Growth

Purpose. To test whether investment is associated with productivity improvement.

sector_growth <- sector_year %>%
  arrange(CGE_sectorname, survey_year) %>%
  group_by(CGE_sectorname) %>%
  summarise(
    productivity_2019 = labour_productivity[survey_year == 2019][1],
    productivity_2023 = labour_productivity[survey_year == 2023][1],
    investment_intensity_avg = mean(investment_intensity[survey_year %in% 2019:2023], na.rm = TRUE),
    revenue_2023 = total_revenue[survey_year == 2023][1],
    .groups = "drop"
  ) %>%
  mutate(
    productivity_growth_2019_2023 = productivity_2023 / productivity_2019 - 1,
    is_target = CGE_sectorname == target_sector_name
  ) %>%
  filter(is.finite(productivity_growth_2019_2023), is.finite(investment_intensity_avg))

ggplot(sector_growth, aes(x = investment_intensity_avg, y = productivity_growth_2019_2023)) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey55") +
  geom_vline(xintercept = median(sector_growth$investment_intensity_avg, na.rm = TRUE),
             linetype = "dashed", colour = "grey55") +
  geom_point(aes(size = revenue_2023, colour = is_target), alpha = 0.75) +
  scale_colour_manual(values = c("FALSE" = "grey65", "TRUE" = "#e76f51"), guide = "none") +
  scale_size_continuous(labels = comma, range = c(2, 9)) +
  scale_x_continuous(labels = comma) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  labs(
    title = "Investment intensity versus productivity growth",
    subtitle = "Education is highlighted against all CGE sectors",
    x = "Average investment intensity, 2019–2023 (mn VND per worker)",
    y = "Productivity growth, 2019–2023",
    size = "2023 revenue"
  ) +
  theme_sector()

Text placeholder: Education shows [high/low] investment intensity and [high/low] productivity growth. This indicates [effective upgrading / inefficient capital use / underinvestment / strong operational efficiency].

5.4 15. Visual 12 — Innovation Adoption Snapshot

Purpose. To measure innovation activity while respecting the year-specific survey design.

innovation_snapshot <- bind_rows(
  edu %>%
    filter(survey_year == 2020) %>%
    summarise(
      `Product innovation (2020)` = mean(innovation_product_2020only == 1, na.rm = TRUE),
      `Process innovation (2020)` = mean(innovation_process_2020only == 1, na.rm = TRUE),
      `Management innovation (2020)` = mean(innovation_management_2020only == 1, na.rm = TRUE)
    ) %>%
    pivot_longer(everything(), names_to = "Indicator", values_to = "Adoption rate"),
  edu %>%
    filter(survey_year %in% 2021:2023) %>%
    group_by(survey_year) %>%
    summarise(
      Indicator = paste0("Technology innovation (", survey_year, ")"),
      `Adoption rate` = mean(innovation_technology_2021_2023only == 1, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    select(Indicator, `Adoption rate`)
)

ggplot(innovation_snapshot, aes(x = fct_reorder(Indicator, `Adoption rate`), y = `Adoption rate`)) +
  geom_col(fill = "#2a9d8f", width = 0.7) +
  coord_flip() +
  scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
  labs(
    title = "Innovation adoption snapshot in Education",
    subtitle = "Innovation indicators are year-specific and should not be interpreted as one continuous time series",
    x = NULL,
    y = "Adoption rate"
  ) +
  theme_sector()

Important note: Do not directly compare 2020 innovation variables with 2021–2023 technology innovation as if they measure the same concept.

Text placeholder: Innovation indicators suggest that Education has [low/moderate/high] reported adoption. Because innovation variables differ across survey years, these indicators should be interpreted as snapshots rather than a continuous trend.

5.5 16. Visual 13 — Export / Import and External Linkages

Purpose. To assess whether the sector is internationally connected.

external_linkages <- tibble(
  Indicator = c(
    "Export or import participation (all years)",
    "Export participation (2019 only)",
    "Import participation (2019 only)"
  ),
  Rate = c(
    mean(edu$export_import_dummy == 1, na.rm = TRUE),
    mean(edu$export_dummy_2019only == 1, na.rm = TRUE),
    mean(edu$import_dummy_2019only == 1, na.rm = TRUE)
  )
)

ggplot(external_linkages, aes(x = fct_reorder(Indicator, Rate), y = Rate)) +
  geom_col(fill = "#457b9d", width = 0.7) +
  coord_flip() +
  scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
  labs(
    title = "External linkages in Education",
    subtitle = "Direct export/import participation indicators",
    x = NULL,
    y = "Participation rate"
  ) +
  theme_sector()

Text placeholder: Education shows [low/high] direct export-import participation. However, this does not fully capture international linkages such as digital education services, foreign curricula, or imported learning technologies.

5.6 17. Visual 14 — Capital Source Structure

Purpose. To diagnose financial constraints and investment dependency.

capital_source_long <- edu_year %>%
  select(
    survey_year,
    `State capital` = capital_state,
    `Credit` = capital_credit,
    `Own source` = capital_own,
    `Other sources` = capital_other
  ) %>%
  pivot_longer(-survey_year, names_to = "Capital source", values_to = "Value") %>%
  group_by(survey_year) %>%
  mutate(Share = Value / pmax(sum(Value, na.rm = TRUE), 1)) %>%
  ungroup()

ggplot(capital_source_long, aes(x = factor(survey_year), y = Share, fill = `Capital source`)) +
  geom_col(width = 0.75) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Capital source structure in Education",
    subtitle = "Share of investment funding by source",
    x = NULL,
    y = "Share of reported capital sources",
    fill = "Capital source"
  ) +
  theme_sector()

Text placeholder: Education investment is mainly financed by [own sources / credit / state capital / other sources]. This suggests that future upgrading may depend on [private retained earnings / access to finance / public investment / alternative funding channels].

6 18. Final Diagnosis Box

diagnosis_tbl <- tibble(
  `Diagnostic question` = c(
    "Is the sector economically important?",
    "Is it becoming more productive?",
    "Is growth driven by labour, capital, or efficiency?",
    "Is the sector investing in technology?",
    "Are there structural constraints?",
    "What policy issue emerges?"
  ),
  `Required evidence` = c(
    "Revenue share, labour share",
    "Productivity trend",
    "Revenue, labour, capital intensity",
    "Investment mix, machinery/equipment, other investment",
    "Firm size, ownership, productivity dispersion",
    "Combined interpretation"
  ),
  `Insert conclusion` = c(
    "[Insert conclusion]",
    "[Insert conclusion]",
    "[Insert conclusion]",
    "[Insert conclusion]",
    "[Insert conclusion]",
    "[Insert conclusion]"
  )
)

diagnosis_tbl %>%
  kable(format = "html", caption = "Final diagnosis box — Education (CGE 85)") %>%
  kable_styling(full_width = TRUE, bootstrap_options = c("striped", "hover"))
Final diagnosis box — Education (CGE 85)
Diagnostic question Required evidence Insert conclusion
Is the sector economically important? Revenue share, labour share [Insert conclusion]
Is it becoming more productive? Productivity trend [Insert conclusion]
Is growth driven by labour, capital, or efficiency? Revenue, labour, capital intensity [Insert conclusion]
Is the sector investing in technology? Investment mix, machinery/equipment, other investment [Insert conclusion]
Are there structural constraints? Firm size, ownership, productivity dispersion [Insert conclusion]
What policy issue emerges? Combined interpretation [Insert conclusion]

Final text placeholder: Overall, the Education sector appears to be [expanding / stagnant / under pressure]. The main evidence suggests [insert key pattern]. The sector’s policy challenge is likely to be [productivity improvement / digital adoption / investment access / scaling small providers / workforce capability]. Future policy should focus on [insert intervention area].