Ramp Land Use

# -----------------------------
# LIC Faceted Bubble Plot (panel d style)
# Times New Roman everywhere (NO showtext required)
# -----------------------------
library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(stringr)
library(tibble)

# --- Register Times New Roman for Windows graphics ---
windowsFonts(`Times New Roman` = windowsFont("Times New Roman"))

setwd("C:/Users/hbx29/OneDrive - Texas State University/Das, Subasish's files - FARSPaper/ARM_LIC/FARS_4362_papers/Ramp/LandUse/Data")

# -----------------------------
# 1) Load ALL rules dataset (cloud)
# -----------------------------
file_path <- "LIC S C lift_urban.xlsx"
df <- read_excel(file_path, sheet = "Sheet2")

# Fix empty/odd column names
names(df) <- trimws(names(df))
names(df)[names(df) == "" | is.na(names(df))] <- "blank_col"
names(df) <- make.names(names(df), unique = TRUE)

# Auto-rename common variations
rename_map <- list(
  support    = c("support", "Support", "S"),
  confidence = c("confidence", "Confidence", "C"),
  lift       = c("lift", "Lift", "L"),
  LIC        = c("LIC", "Lic", "lic")
)
for (newname in names(rename_map)) {
  hit <- intersect(rename_map[[newname]], names(df))
  if (!(newname %in% names(df)) && length(hit) > 0) {
    df <- df %>% rename(!!newname := all_of(hit[1]))
  }
}

# ---- Thresholds ----
SUP_MIN  <- 0.10
CONF_MIN <- 0.35 # for rural it will be .30
LIFT_MIN <- 1.00
LIC_MIN  <- 1.00

df <- df %>%
  mutate(
    support    = as.numeric(support),
    confidence = as.numeric(confidence),
    lift       = as.numeric(lift),
    LIC        = as.numeric(LIC)
  ) %>%
  filter(!is.na(support), !is.na(confidence), !is.na(lift), !is.na(LIC)) %>%
  mutate(LIC_group = ifelse(LIC >= LIC_MIN, "LIC \u2265 1.0", "LIC < 1.0"))

# -----------------------------
# 2) Manual highlighted rules (from your table/image)  [UPDATED: M1–M5 and U1–U10]
# -----------------------------
manual_rules <- tribble(
  ~RuleID, ~S,     ~C,     ~L,     ~LIC,
  "M1",    0.193,  0.484,  1.418,  NA,
  "U1",    0.153,  0.520,  1.523,  1.074,
  "U2",    0.100,  0.535,  1.565,  1.104,
  "M2",    0.262,  0.385,  1.127,  NA,
  "U3",    0.153,  0.520,  1.523,  1.351,
  "U4",    0.107,  0.428,  1.254,  1.113,
  "U5",    0.100,  0.535,  1.565,  1.389,
  "M3",    0.235,  0.632,  1.088,  NA,
  "U6",    0.107,  0.693,  1.195,  1.098,
  "U7",    0.106,  0.642,  1.106,  1.017,
  "M4",    0.166,  0.678,  1.079,  NA,
  "U8",    0.103,  0.723,  1.151,  1.067,
  "U9",    0.123,  0.686,  1.092,  1.012,
  "M5",    0.188,  0.750,  1.102,  NA,
  "U10",   0.105,  0.754,  1.109,  1.006
) %>%
  mutate(
    support = S, confidence = C, lift = L,
    # Force M rules into LIC ≥ 1 panel (since LIC missing)
    LIC_for_facet = ifelse(str_detect(RuleID, "^M"), 1.0001, LIC),
    LIC_group = ifelse(LIC_for_facet >= LIC_MIN, "LIC \u2265 1.0", "LIC < 1.0"),
    rule_type = ifelse(str_detect(RuleID, "^M"), "Base Rules: M1\u2013M5", "Associate Rules: U1\u2013U10")
  )

# -----------------------------
# 3) Facet titles with counts from FULL dataset
# -----------------------------
facet_counts <- df %>%
  group_by(LIC_group) %>%
  summarise(rules = n(), .groups = "drop") %>%
  mutate(facet_lab = paste0(LIC_group, ",  Rules = ", rules))

lab_map <- setNames(facet_counts$facet_lab, facet_counts$LIC_group)

# -----------------------------
# 4) Plot (no point labels)
# -----------------------------
p <- ggplot() +
  geom_point(
    data = df,
    aes(x = support, y = lift, size = confidence, color = confidence),
    alpha = 0.80
  ) +
  geom_vline(xintercept = SUP_MIN, linetype = "dashed", linewidth = 1.2) +
  geom_hline(yintercept = LIFT_MIN, linetype = "solid",  linewidth = 1.2) +
  geom_point(
    data = manual_rules,
    aes(x = support, y = lift, shape = rule_type, fill = rule_type),
    size = 7, color = "black", stroke = 1.6, alpha = 1
  ) +
  facet_wrap(~ LIC_group, nrow = 1, labeller = as_labeller(lab_map)) +
  scale_color_gradient(low = "grey85", high = "red3", name = "confidence") +
  scale_size_continuous(range = c(2.5, 3), name = "confidence\n(bubble size)") +
  scale_fill_manual(
    name = "",
    values = c("Base Rules: M1\u2013M5" = "dodgerblue3",
               "Associate Rules: U1\u2013U10" = "goldenrod2"),
    breaks = c("Base Rules: M1\u2013M5", "Associate Rules: U1\u2013U10")
  ) +
  scale_shape_manual(
    name = "",
    values = c("Base Rules: M1\u2013M5" = 22,
               "Associate Rules: U1\u2013U10" = 21),
    breaks = c("Base Rules: M1\u2013M5", "Associate Rules: U1\u2013U10")
  ) +
  labs(x = "support", y = "lift") +
  theme_bw(base_size = 30) +
  theme(
    text = element_text(family = "Times New Roman"),
    legend.position = "right",
    strip.text = element_text(face = "bold", size = 30),
    axis.title = element_text(size = 30),
    axis.text = element_text(size = 28),
    legend.text = element_text(size = 26),
    panel.grid.minor = element_blank()
  ) +
  guides(
    fill  = guide_legend(order = 1, override.aes = list(size = 7, color = "black")),
    shape = guide_legend(order = 1, override.aes = list(size = 7, color = "black")),
    color = guide_colorbar(order = 2),
    size  = guide_legend(order = 3)
  )

print(p)

## Warning: Duplicated `override.aes` is ignored.

# Save high-res
ggsave("LIC_support_lift_facets_base_associate_TNR.png", p, width = 16, height = 7, dpi = 300, device = "png")

## Warning: Duplicated `override.aes` is ignored.