전문판매업자 신고 게시글 종합 분석 대시보드 v1.0

---
title: "전문판매업자 신고 게시글 종합 분석 대시보드 v1.0"
output: 
  flexdashboard::flex_dashboard:
    orientation: columns
    vertical_layout: scroll
    theme: cosmo
    includes:
      in_header: password_protection.html
    source_code: embed
---

```{r setup, include=FALSE}
# 패키지 설치 및 로드
required_packages <- c("flexdashboard", "tidyverse", "plotly", "DT", "stringr", "scales", "lubridate")

for (pkg in required_packages) {
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg, repos = "http://cran.r-project.org")
    library(pkg, character.only = TRUE)
  }
}

# 데이터 로드
data <- read.csv("전문판매업자 신고 데이터 11월 기준 Raw data - results.csv",
                 encoding = "UTF-8",
                 stringsAsFactors = FALSE,
                 check.names = FALSE)  # 컬럼명 변경 방지

cat("데이터 로드 완료:", nrow(data), "행\n")

# 컬럼명 간소화
names(data)[names(data) == "전문판매업자 신고 인입 수"] <- "report_count"

# 데이터 타입 확인 및 변환
data$report_count <- as.numeric(data$report_count)
cat("신고 인입 수 데이터 타입:", class(data$report_count), "\n")
cat("신고 인입 수 범위:", min(data$report_count, na.rm=TRUE), "-", max(data$report_count, na.rm=TRUE), "\n")

# 데이터 전처리
# 날짜 처리 - UTC 문자열 제거
data$created_at <- as.character(data$created_at)
data$updated_at <- as.character(data$updated_at)

data$created_at <- sub(" UTC$", "", data$created_at)
data$updated_at <- sub(" UTC$", "", data$updated_at)

data$created_at <- as.POSIXct(data$created_at, format="%Y-%m-%d %H:%M:%OS")
data$updated_at <- as.POSIXct(data$updated_at, format="%Y-%m-%d %H:%M:%OS")

# price 처리 - 숫자로 변환 (쉼표 제거)
data$price <- as.character(data$price)
data$price <- gsub(",", "", data$price)
data$price_numeric <- suppressWarnings(as.numeric(data$price))
data$price_numeric[is.na(data$price_numeric)] <- 0

cat("가격 데이터 타입:", class(data$price_numeric), "\n")

# 가격대 분류
data$price_range <- cut(
  data$price_numeric,
  breaks = c(-Inf, 0, 10000, 50000, 100000, 500000, 1000000, Inf),
  labels = c("무료", "1만원 이하", "1-5만원", "5-10만원", "10-50만원", "50-100만원", "100만원 이상"),
  include.lowest = TRUE
)

# 카테고리 한글 매핑
category_names <- c(
  "1" = "디지털기기",
  "172" = "생활가전",
  "8" = "가구/인테리어",
  "7" = "생활/주방",
  "4" = "유아동",
  "173" = "유아도서",
  "5" = "여성의류",
  "31" = "여성잡화",
  "14" = "남성패션/잡화",
  "6" = "뷰티/미용",
  "3" = "스포츠/레저",
  "2" = "취미/게임/음반",
  "9" = "도서",
  "304" = "티켓/교환권",
  "305" = "가공식품",
  "483" = "건강기능식품",
  "16" = "반려동물용품",
  "139" = "식물",
  "13" = "기타 중고물품",
  "32" = "삽니다",
  "516" = "GarbageBin(건강기능식품 복구용)"
)

data$category_name <- category_names[as.character(data$category_id)]
data$category_name[is.na(data$category_name)] <- paste0("카테고리 ", data$category_id[is.na(data$category_name)])

# 신고 횟수 그룹화
cat("신고 횟수 그룹화 시작...\n")
cat("report_count 값 확인:", range(data$report_count, na.rm=TRUE), "\n")

data$report_group <- cut(
  data$report_count,
  breaks = c(0, 1, 2, 3, 5, Inf),
  labels = c("1회", "2회", "3회", "4-5회", "6회 이상"),
  include.lowest = TRUE,
  right = TRUE
)

cat("report_group 생성 완료:", table(data$report_group), "\n")

# 전문판매업자 의심 키워드 탐지
professional_keywords <- list(
  "연락처" = c("010-", "010 ", "전화", "연락", "문의", "카톡", "카카오톡", "오픈채팅", "텔레그램"),
  "사업용어" = c("대량", "도매", "소매", "재고", "물량", "납품", "공급", "판매합니다", "판매중", "판매가능", "구매문의"),
  "프로모션" = c("할인", "특가", "이벤트", "프로모션", "행사", "세일", "무료배송", "당일배송", "빠른배송"),
  "이모지과다" = c("💥", "✅", "🔥", "⭐", "🎁", "📱", "🔴", "🟡", "🟢", "🔵"),
  "통신사" = c("SKT", "KT", "LG유플러스", "U+", "통신사", "개통", "가입", "번호이동", "요금제"),
  "대량거래" = c("대량구매", "대량판매", "재고처리", "떨이", "처분", "정리"),
  "새제품강조" = c("새상품", "새제품", "미개봉", "미사용", "신품", "새거", "풀박스", "정품미개봉")
)

# 키워드 탐지 함수
detect_keywords <- function(text, keywords) {
  text_lower <- tolower(text)
  matches <- character(0)
  for (category_name in names(keywords)) {
    keywords_list <- keywords[[category_name]]
    for (keyword in keywords_list) {
      if (str_detect(text_lower, fixed(tolower(keyword)))) {
        matches <- c(matches, category_name)
        break
      }
    }
  }
  return(paste(unique(matches), collapse = ", "))
}

# 각 게시글에 키워드 카테고리 추가
cat("키워드 분석 중...\n")
data$keyword_categories <- mapply(function(title, content) {
  detect_keywords(paste(title, content), professional_keywords)
}, data$title, data$content)

# 키워드 존재 여부
data$has_keywords <- data$keyword_categories != ""

# 전문판매업자 의심도 점수 계산
data$suspicion_score <- 0

# 사용자별 게시글 수 계산
user_post_counts <- data %>% 
  count(user_id, name = "user_post_count")
data <- data %>% left_join(user_post_counts, by = "user_id")

# 점수 부여
data$suspicion_score <- data$suspicion_score + (data$report_count * 10)
data$suspicion_score <- data$suspicion_score + ifelse(data$user_post_count >= 20, 30, 
                                                       ifelse(data$user_post_count >= 10, 20, 
                                                              ifelse(data$user_post_count >= 5, 10, 0)))
data$suspicion_score <- data$suspicion_score + ifelse(data$has_keywords, 20, 0)
data$suspicion_score <- data$suspicion_score + ifelse(data$price_numeric <= 10000, 10, 0)
data$suspicion_score <- data$suspicion_score + ifelse(str_count(data$content, "[🔥💥✅⭐🎁📱🔴🟡🟢🔵]") >= 5, 15, 0)

# 의심도 등급
data$suspicion_level <- cut(
  data$suspicion_score,
  breaks = c(-Inf, 30, 50, 70, Inf),
  labels = c("낮음", "보통", "높음", "매우높음")
)

cat("데이터 전처리 완료!\n")
```

# 개요 {data-icon="fa-chart-line"}

## Column {data-width=350}

### 📊 데이터 기본 정보

```{r}
total_posts <- nrow(data)
total_users <- length(unique(data$user_id))
total_reports <- sum(data$report_count)
date_range <- paste(format(min(data$created_at, na.rm=TRUE), "%Y-%m-%d"), "~", 
                    format(max(data$created_at, na.rm=TRUE), "%Y-%m-%d"))
avg_price <- mean(data$price_numeric[data$price_numeric > 0], na.rm = TRUE)
high_suspicion <- sum(data$suspicion_level %in% c("높음", "매우높음"))

info_df <- data.frame(
  항목 = c("총 게시글 수", "총 사용자 수", "총 신고 인입 수", "데이터 기간", "평균 가격", "고위험 게시글"),
  값 = c(
    format(total_posts, big.mark = ","),
    format(total_users, big.mark = ","),
    format(total_reports, big.mark = ","),
    date_range,
    paste0(format(round(avg_price), big.mark = ","), "원"),
    paste0(format(high_suspicion, big.mark = ","), "건 (", 
           round(high_suspicion/total_posts*100, 1), "%)")
  )
)

datatable(info_df, 
          options = list(dom = 't', pageLength = 10),
          rownames = FALSE)
```

### 🎯 전문판매업자 의심도 분포

```{r}
suspicion_dist <- data %>%
  count(suspicion_level) %>%
  mutate(percentage = round(n / sum(n) * 100, 1))

# 색상 매핑
suspicion_colors <- c("낮음" = "#a8d5e2", "보통" = "#f9d5a7", 
                      "높음" = "#f5b895", "매우높음" = "#e74c3c")

p <- plot_ly(suspicion_dist,
             labels = ~suspicion_level,
             values = ~n,
             type = 'pie',
             marker = list(colors = suspicion_colors[suspicion_dist$suspicion_level]),
             textinfo = 'label+percent',
             textfont = list(size = 13)) %>%
  layout(title = "",
         showlegend = TRUE,
         height = 400)

p
```

### 📈 신고 횟수 분포

```{r}
report_dist <- data %>%
  count(report_group) %>%
  mutate(percentage = round(n / sum(n) * 100, 1))

p <- plot_ly(report_dist,
             x = ~report_group,
             y = ~n,
             type = 'bar',
             marker = list(
               color = ~n,
               colorscale = list(
                 c(0, '#e8f4f8'),
                 c(0.5, '#ff9800'),
                 c(1, '#e74c3c')
               ),
               showscale = FALSE
             ),
             text = ~paste0(n, "건 (", percentage, "%)"),
             textposition = 'outside') %>%
  layout(title = "",
         xaxis = list(title = "신고 횟수"),
         yaxis = list(title = "게시글 수"),
         height = 400)

p
```

## Column {data-width=350}

### 💰 가격대별 게시글 분포

```{r}
price_dist <- data %>%
  count(price_range) %>%
  mutate(percentage = round(n / sum(n) * 100, 1))

# 가격대 순서 정의
price_order <- c("무료", "1만원 이하", "1-5만원", "5-10만원", "10-50만원", "50-100만원", "100만원 이상")
price_dist$price_range <- factor(price_dist$price_range, levels = rev(price_order))
price_dist <- price_dist %>% arrange(desc(price_range))

# 색상
colors_map <- c("무료" = "#e8b4b8", "1만원 이하" = "#a8d5e2",
                "1-5만원" = "#b8e6d5", "5-10만원" = "#f9d5a7",
                "10-50만원" = "#d5c4e8", "50-100만원" = "#f5b895",
                "100만원 이상" = "#ff9800")

p <- plot_ly(price_dist, 
             y = ~price_range,
             x = ~n,
             type = 'bar',
             orientation = 'h',
             marker = list(color = colors_map[as.character(price_dist$price_range)]),
             text = ~paste0(n, "건 (", percentage, "%)"),
             textposition = 'outside',
             textfont = list(size = 12)) %>%
  layout(
    title = "",
    xaxis = list(title = "게시글 수", showgrid = TRUE),
    yaxis = list(title = "", tickfont = list(size = 12)),
    margin = list(l = 120, r = 120),
    height = 400,
    showlegend = FALSE
  )

p
```

### 📂 카테고리별 게시글 분포 (Top 15)

```{r}
category_dist <- data %>%
  count(category_name, sort = TRUE) %>%
  head(15)

p <- plot_ly(category_dist,
             x = ~reorder(category_name, -n),
             y = ~n,
             type = 'bar',
             marker = list(color = '#3498db'),
             text = ~paste0(n, "건"),
             textposition = 'outside') %>%
  layout(title = "",
         xaxis = list(
           title = "",
           tickangle = -45,
           tickfont = list(size = 11),
           automargin = TRUE
         ),
         yaxis = list(title = "게시글 수"),
         margin = list(b = 120),
         height = 450)

p
```

# 신고 분석 {data-icon="fa-exclamation-triangle"}

## Column {data-width=500}

### 🚨 신고 횟수가 높은 게시글 (Top 30)

```{r}
top_reports <- data %>%
  arrange(desc(report_count), desc(suspicion_score)) %>%
  head(30) %>%
  mutate(
    제목 = ifelse(nchar(title) > 50, paste0(substr(title, 1, 50), "..."), title),
    내용미리보기 = ifelse(nchar(content) > 80, paste0(substr(content, 1, 80), "..."), content),
    가격 = ifelse(price_numeric > 0, paste0(format(price_numeric, big.mark = ","), "원"), "무료")
  ) %>%
  select(
    게시글ID = id,
    제목,
    내용미리보기,
    신고횟수 = report_count,
    의심도점수 = suspicion_score,
    의심등급 = suspicion_level,
    가격,
    카테고리 = category_name,
    사용자ID = user_id
  )

datatable(
  top_reports,
  filter = 'top',
  options = list(
    pageLength = 15,
    scrollX = TRUE,
    columnDefs = list(
      list(width = '80px', targets = c(0, 3, 4, 6)),
      list(width = '150px', targets = 1),
      list(width = '250px', targets = 2)
    ),
    order = list(list(3, 'desc'))
  ),
  rownames = FALSE
) %>%
  formatStyle(
    "신고횟수",
    backgroundColor = styleInterval(
      cuts = c(2, 4, 6),
      values = c("#ffffff", "#fff3e0", "#ffcdd2", "#e74c3c")
    ),
    fontWeight = "bold"
  ) %>%
  formatStyle(
    "의심등급",
    backgroundColor = styleEqual(
      levels = c("낮음", "보통", "높음", "매우높음"),
      values = c("#a8d5e2", "#f9d5a7", "#f5b895", "#e74c3c")
    ),
    fontWeight = "bold"
  )
```

### 📊 신고 횟수별 카테고리 분포

```{r}
report_category <- data %>%
  filter(report_count >= 2) %>%
  count(category_name, report_count) %>%
  group_by(category_name) %>%
  mutate(total = sum(n)) %>%
  ungroup() %>%
  arrange(desc(total)) %>%
  group_by(category_name) %>%
  slice_head(n = 20) %>%
  ungroup()

top_categories <- report_category %>%
  distinct(category_name, total) %>%
  arrange(desc(total)) %>%
  head(10) %>%
  pull(category_name)

plot_data <- report_category %>%
  filter(category_name %in% top_categories)

p <- plot_ly(plot_data,
             x = ~category_name,
             y = ~n,
             color = ~factor(report_count),
             type = 'bar',
             colors = c('#3498db', '#e67e22', '#e74c3c', '#c0392b', '#8b0000')) %>%
  layout(
    title = "카테고리별 신고 횟수 분포 (2회 이상)",
    xaxis = list(title = "", tickangle = -45),
    yaxis = list(title = "게시글 수"),
    barmode = 'stack',
    legend = list(title = list(text = "신고횟수")),
    height = 450
  )

p
```

## Column {data-width=500}

### 🔥 신고 다발 시간대 분석

```{r}
hourly_reports <- data %>%
  mutate(hour = hour(created_at)) %>%
  group_by(hour) %>%
  summarise(
    게시글수 = n(),
    평균신고횟수 = mean(report_count),
    총신고수 = sum(report_count)
  )

p <- plot_ly(hourly_reports) %>%
  add_trace(
    x = ~hour,
    y = ~게시글수,
    name = '게시글 수',
    type = 'scatter',
    mode = 'lines+markers',
    yaxis = 'y',
    line = list(color = '#3498db')
  ) %>%
  add_trace(
    x = ~hour,
    y = ~총신고수,
    name = '총 신고 수',
    type = 'scatter',
    mode = 'lines+markers',
    yaxis = 'y2',
    line = list(color = '#e74c3c')
  ) %>%
  layout(
    title = "시간대별 게시 및 신고 패턴",
    xaxis = list(title = "시간 (0-23시)", dtick = 2),
    yaxis = list(title = "게시글 수", side = "left"),
    yaxis2 = list(
      title = "총 신고 수",
      overlaying = "y",
      side = "right"
    ),
    legend = list(x = 0.1, y = 1),
    height = 400
  )

p
```

### 💎 고위험 게시글의 가격대 분포

```{r}
high_risk_price <- data %>%
  filter(suspicion_level %in% c("높음", "매우높음")) %>%
  count(price_range) %>%
  mutate(percentage = round(n / sum(n) * 100, 1))

# 가격대 순서
price_order <- c("무료", "1만원 이하", "1-5만원", "5-10만원", "10-50만원", "50-100만원", "100만원 이상")
high_risk_price$price_range <- factor(high_risk_price$price_range, levels = price_order)

p <- plot_ly(high_risk_price,
             labels = ~price_range,
             values = ~n,
             type = 'pie',
             textinfo = 'label+percent',
             marker = list(colors = c('#e8b4b8', '#a8d5e2', '#b8e6d5', 
                                     '#f9d5a7', '#d5c4e8', '#f5b895', '#ff9800')),
             textfont = list(size = 11)) %>%
  layout(title = "고위험 게시글 가격대 분포",
         showlegend = TRUE,
         height = 400)

p
```

# 사용자 분석 {data-icon="fa-user"}

## Column {data-width=600}

### 👤 다수 게시글 작성 사용자 (Top 30)

```{r}
user_summary <- data %>%
  group_by(user_id) %>%
  summarise(
    게시글수 = n(),
    평균신고횟수 = round(mean(report_count), 2),
    총신고수 = sum(report_count),
    평균의심점수 = round(mean(suspicion_score), 1),
    고위험게시글 = sum(suspicion_level %in% c("높음", "매우높음")),
    주요카테고리 = names(sort(table(category_name), decreasing = TRUE))[1],
    키워드검출 = sum(has_keywords)
  ) %>%
  arrange(desc(게시글수)) %>%
  head(30)

datatable(
  user_summary,
  filter = 'top',
  options = list(
    pageLength = 15,
    scrollX = TRUE,
    columnDefs = list(
      list(className = 'dt-center', targets = 1:7)
    ),
    order = list(list(1, 'desc'))
  ),
  rownames = FALSE
) %>%
  formatStyle(
    "게시글수",
    backgroundColor = styleInterval(
      cuts = c(10, 20, 30),
      values = c("#ffffff", "#fff3e0", "#ffcdd2", "#e74c3c")
    ),
    fontWeight = "bold"
  ) %>%
  formatStyle(
    "평균의심점수",
    backgroundColor = styleInterval(
      cuts = c(30, 50, 70),
      values = c("#a8d5e2", "#f9d5a7", "#f5b895", "#e74c3c")
    )
  ) %>%
  formatStyle(
    "고위험게시글",
    color = styleInterval(
      cuts = c(5, 10, 15),
      values = c("#6c757d", "#e67e22", "#e74c3c", "#c0392b")
    ),
    fontWeight = "bold"
  )
```

### 📈 사용자 게시글 수 분포

```{r}
user_post_dist <- data %>%
  count(user_id) %>%
  mutate(post_group = case_when(
    n == 1 ~ "1개",
    n == 2 ~ "2개",
    n >= 3 & n < 5 ~ "3-4개",
    n >= 5 & n < 10 ~ "5-9개",
    n >= 10 & n < 20 ~ "10-19개",
    n >= 20 ~ "20개 이상"
  )) %>%
  count(post_group) %>%
  mutate(
    post_group = factor(post_group, 
                        levels = c("1개", "2개", "3-4개", "5-9개", "10-19개", "20개 이상")),
    percentage = round(n / sum(n) * 100, 1)
  ) %>%
  arrange(post_group)

p <- plot_ly(user_post_dist,
             x = ~post_group,
             y = ~n,
             type = 'bar',
             marker = list(
               color = ~n,
               colorscale = list(
                 c(0, '#e8f4f8'),
                 c(0.5, '#ff9800'),
                 c(1, '#e74c3c')
               ),
               showscale = FALSE
             ),
             text = ~paste0(n, "명 (", percentage, "%)"),
             textposition = 'outside') %>%
  layout(
    title = "사용자별 게시글 수 분포",
    xaxis = list(title = "게시글 수"),
    yaxis = list(title = "사용자 수"),
    height = 400
  )

p
```

## Column {data-width=400}

### 🎯 고위험 사용자 프로필 (Top 10)

```{r}
high_risk_users <- data %>%
  group_by(user_id) %>%
  summarise(
    게시글수 = n(),
    의심도등급분포 = paste(
      "매우높음:", sum(suspicion_level == "매우높음"),
      "/ 높음:", sum(suspicion_level == "높음"),
      "/ 보통:", sum(suspicion_level == "보통")
    ),
    평균가격 = mean(price_numeric[price_numeric > 0], na.rm = TRUE),
    무료게시글 = sum(price_numeric == 0),
    키워드검출률 = paste0(round(sum(has_keywords) / n() * 100, 1), "%")
  ) %>%
  filter(게시글수 >= 10) %>%
  arrange(desc(게시글수)) %>%
  head(10) %>%
  mutate(
    평균가격 = ifelse(is.nan(평균가격), "N/A", 
                     paste0(format(round(평균가격), big.mark = ","), "원"))
  )

datatable(
  high_risk_users,
  options = list(
    pageLength = 10,
    scrollX = TRUE,
    dom = 'tip'
  ),
  rownames = FALSE
) %>%
  formatStyle(
    "게시글수",
    backgroundColor = "#fff3e0",
    fontWeight = "bold"
  )
```

### 📊 카테고리별 다수 게시자 분포

```{r}
category_frequent_users <- data %>%
  filter(user_post_count >= 10) %>%
  count(category_name, sort = TRUE) %>%
  head(10)

p <- plot_ly(category_frequent_users,
             y = ~reorder(category_name, n),
             x = ~n,
             type = 'bar',
             orientation = 'h',
             marker = list(color = '#9b59b6'),
             text = ~paste0(n, "건"),
             textposition = 'outside') %>%
  layout(
    title = "다수 게시자(10건+)의 주요 카테고리",
    xaxis = list(title = "게시글 수"),
    yaxis = list(title = ""),
    margin = list(l = 150),
    height = 450
  )

p
```

# 키워드 분석 {data-icon="fa-key"}

## Column {data-width=600}

### 🔍 전문판매업자 의심 키워드 카테고리별 빈도

```{r}
# 키워드 카테고리별 빈도 계산
keyword_freq <- data.frame(
  카테고리 = character(),
  빈도 = numeric(),
  stringsAsFactors = FALSE
)

for (category_name in names(professional_keywords)) {
  keywords <- professional_keywords[[category_name]]
  freq <- sum(sapply(keywords, function(kw) {
    sum(str_detect(paste(data$title, data$content), fixed(kw, ignore_case = TRUE)))
  }))
  keyword_freq <- rbind(keyword_freq, data.frame(
    카테고리 = category_name,
    빈도 = freq
  ))
}

keyword_freq <- keyword_freq %>% arrange(desc(빈도))

p <- plot_ly(keyword_freq,
             x = ~reorder(카테고리, 빈도),
             y = ~빈도,
             type = 'bar',
             marker = list(
               color = ~빈도,
               colorscale = list(
                 c(0, '#fff3e0'),
                 c(0.5, '#ff9800'),
                 c(1, '#e74c3c')
               ),
               showscale = FALSE
             ),
             text = ~paste0(format(빈도, big.mark = ","), "회"),
             textposition = 'outside',
             textfont = list(size = 12)) %>%
  layout(
    title = "",
    xaxis = list(title = "", tickangle = -45),
    yaxis = list(title = "출현 빈도"),
    margin = list(b = 100),
    height = 450
  )

p
```

### 📊 주요 키워드 상세 분석 (Top 40)

```{r}
# 개별 키워드별 빈도
keyword_detail <- data.frame(
  키워드 = character(),
  빈도 = numeric(),
  카테고리 = character(),
  stringsAsFactors = FALSE
)

for (category_name in names(professional_keywords)) {
  keywords <- professional_keywords[[category_name]]
  for (keyword in keywords) {
    freq <- sum(str_detect(paste(data$title, data$content), fixed(keyword, ignore_case = TRUE)))
    if (freq > 0) {
      keyword_detail <- rbind(keyword_detail, data.frame(
        키워드 = keyword,
        빈도 = freq,
        카테고리 = category_name
      ))
    }
  }
}

top_keywords <- keyword_detail %>%
  arrange(desc(빈도)) %>%
  head(40)

# 카테고리별 색상
keyword_colors <- c(
  "연락처" = "#e74c3c",
  "사업용어" = "#e67e22",
  "프로모션" = "#f39c12",
  "이모지과다" = "#d35400",
  "통신사" = "#c0392b",
  "대량거래" = "#a93226",
  "새제품강조" = "#ff9800"
)

top_keywords$색상 <- keyword_colors[top_keywords$카테고리]

p <- plot_ly(top_keywords,
             y = ~reorder(키워드, 빈도),
             x = ~빈도,
             type = 'bar',
             orientation = 'h',
             marker = list(color = ~색상),
             text = ~paste0(format(빈도, big.mark = ","), "회"),
             textposition = 'outside',
             hovertemplate = paste(
               '<b>%{y}</b><br>',
               '빈도: %{x:,}회<br>',
               '카테고리: ', top_keywords$카테고리, '<br>',
               '<extra></extra>'
             )) %>%
  layout(
    title = "",
    xaxis = list(title = "출현 빈도"),
    yaxis = list(title = "", tickfont = list(size = 10)),
    margin = list(l = 120),
    showlegend = FALSE,
    height = 800
  )

p
```

## Column {data-width=400}

### 📈 키워드 검출 게시글의 의심도 분포

```{r}
keyword_suspicion <- data %>%
  filter(has_keywords) %>%
  count(suspicion_level) %>%
  mutate(percentage = round(n / sum(n) * 100, 1))

keyword_suspicion$suspicion_level <- factor(
  keyword_suspicion$suspicion_level,
  levels = c("낮음", "보통", "높음", "매우높음")
)

p <- plot_ly(keyword_suspicion,
             labels = ~suspicion_level,
             values = ~n,
             type = 'pie',
             textinfo = 'label+percent',
             marker = list(colors = c('#a8d5e2', '#f9d5a7', '#f5b895', '#e74c3c')),
             textfont = list(size = 12)) %>%
  layout(title = "키워드 검출 게시글의 의심도",
         showlegend = TRUE,
         height = 400)

p
```

### 🎯 키워드 카테고리별 평균 신고 횟수

```{r}
# 각 키워드 카테고리가 포함된 게시글의 평균 신고 횟수
keyword_report_avg <- data.frame(
  카테고리 = character(),
  평균신고횟수 = numeric(),
  게시글수 = numeric(),
  stringsAsFactors = FALSE
)

for (category_name in names(professional_keywords)) {
  keywords <- professional_keywords[[category_name]]
  matched_posts <- data %>%
    filter(sapply(1:n(), function(i) {
      text <- paste(title[i], content[i])
      any(sapply(keywords, function(kw) {
        str_detect(text, fixed(kw, ignore_case = TRUE))
      }))
    }))
  
  if (nrow(matched_posts) > 0) {
    keyword_report_avg <- rbind(keyword_report_avg, data.frame(
      카테고리 = category_name,
      평균신고횟수 = round(mean(matched_posts$report_count), 2),
      게시글수 = nrow(matched_posts)
    ))
  }
}

keyword_report_avg <- keyword_report_avg %>% arrange(desc(평균신고횟수))

datatable(
  keyword_report_avg,
  options = list(
    dom = 't',
    pageLength = 10
  ),
  rownames = FALSE
) %>%
  formatStyle(
    "평균신고횟수",
    backgroundColor = styleInterval(
      cuts = c(1.0, 1.2, 1.5),
      values = c("#ffffff", "#fff3e0", "#ffcdd2", "#e74c3c")
    ),
    fontWeight = "bold"
  )
```

### 💡 키워드 조합 분석

```{r}
# 키워드가 2개 이상 검출된 게시글
keyword_combo <- data %>%
  filter(has_keywords) %>%
  mutate(
    keyword_count = str_count(keyword_categories, ",") + 
                    ifelse(keyword_categories != "", 1, 0)
  ) %>%
  count(keyword_count) %>%
  mutate(percentage = round(n / sum(n) * 100, 1))

keyword_combo$keyword_count <- factor(
  keyword_combo$keyword_count,
  levels = sort(unique(keyword_combo$keyword_count)),
  labels = paste0(sort(unique(keyword_combo$keyword_count)), "개")
)

p <- plot_ly(keyword_combo,
             x = ~keyword_count,
             y = ~n,
             type = 'bar',
             marker = list(color = '#9b59b6'),
             text = ~paste0(n, "건 (", percentage, "%)"),
             textposition = 'outside') %>%
  layout(
    title = "게시글당 검출된 키워드 카테고리 수",
    xaxis = list(title = "키워드 카테고리 수"),
    yaxis = list(title = "게시글 수"),
    height = 350
  )

p
```

# 시계열 분석 {data-icon="fa-calendar"}

## Column {data-width=600}

### 📅 일별 신고 게시글 추이

```{r}
daily_trend <- data %>%
  mutate(date = as.Date(created_at)) %>%
  group_by(date) %>%
  summarise(
    게시글수 = n(),
    평균신고횟수 = mean(report_count),
    고위험게시글 = sum(suspicion_level %in% c("높음", "매우높음"))
  ) %>%
  arrange(date)

p <- plot_ly(daily_trend) %>%
  add_trace(
    x = ~date,
    y = ~게시글수,
    name = '게시글 수',
    type = 'scatter',
    mode = 'lines+markers',
    line = list(color = '#3498db')
  ) %>%
  add_trace(
    x = ~date,
    y = ~고위험게시글,
    name = '고위험 게시글',
    type = 'scatter',
    mode = 'lines+markers',
    line = list(color = '#e74c3c')
  ) %>%
  layout(
    title = "",
    xaxis = list(title = "날짜"),
    yaxis = list(title = "게시글 수"),
    legend = list(x = 0.1, y = 1),
    height = 450
  )

p
```

### 🔥 카테고리별 시계열 트렌드 (Top 5)

```{r}
top5_categories <- data %>%
  count(category_name, sort = TRUE) %>%
  head(5) %>%
  pull(category_name)

category_daily <- data %>%
  filter(category_name %in% top5_categories) %>%
  mutate(date = as.Date(created_at)) %>%
  count(date, category_name) %>%
  arrange(date)

p <- plot_ly()

for(cat in top5_categories) {
  cat_data <- category_daily %>% filter(category_name == cat)
  p <- p %>% add_trace(
    data = cat_data,
    x = ~date,
    y = ~n,
    name = cat,
    type = 'scatter',
    mode = 'lines+markers'
  )
}

p <- p %>% layout(
  title = "주요 카테고리별 일별 추이",
  xaxis = list(title = "날짜"),
  yaxis = list(title = "게시글 수"),
  height = 450
)

p
```

## Column {data-width=400}

### ⏰ 요일별 게시 패턴

```{r}
weekday_pattern <- data %>%
  mutate(
    weekday_num = wday(created_at),
    weekday = factor(weekday_num, 
                    levels = c(1, 2, 3, 4, 5, 6, 7),
                    labels = c("일요일", "월요일", "화요일", "수요일", "목요일", "금요일", "토요일"))
  ) %>%
  count(weekday)

p <- plot_ly(weekday_pattern,
             x = ~weekday,
             y = ~n,
             type = 'bar',
             marker = list(color = '#9b59b6'),
             text = ~paste0(n, "건"),
             textposition = 'outside') %>%
  layout(
    title = "요일별 게시글 수",
    xaxis = list(title = ""),
    yaxis = list(title = "게시글 수"),
    height = 400
  )

p
```

### 📊 시간대별 의심도 분포

```{r}
hourly_suspicion <- data %>%
  mutate(hour = hour(created_at)) %>%
  group_by(hour, suspicion_level) %>%
  summarise(count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = suspicion_level, values_from = count, values_fill = 0)

# 컬럼 순서 정렬
suspicion_cols <- c("낮음", "보통", "높음", "매우높음")
existing_cols <- intersect(suspicion_cols, names(hourly_suspicion))
hourly_suspicion <- hourly_suspicion %>% select(hour, all_of(existing_cols))

p <- plot_ly(hourly_suspicion, x = ~hour, type = 'bar')

for(col in existing_cols) {
  color_map <- c("낮음" = "#a8d5e2", "보통" = "#f9d5a7", 
                 "높음" = "#f5b895", "매우높음" = "#e74c3c")
  p <- p %>% add_trace(y = ~get(col), name = col, marker = list(color = color_map[col]))
}

p <- p %>% layout(
  title = "시간대별 의심도 분포",
  xaxis = list(title = "시간 (0-23시)", dtick = 2),
  yaxis = list(title = "게시글 수"),
  barmode = 'stack',
  height = 450
)

p
```

# 데이터 테이블 {data-icon="fa-table"}

## Column

### 📋 전체 데이터 (필터링 가능)

```{r}
display_data <- data %>%
  select(
    id, title, content, price_numeric, 
    report_count, suspicion_score, suspicion_level,
    keyword_categories, user_id, user_post_count, 
    category_name, created_at
  ) %>%
  mutate(
    내용 = ifelse(nchar(content) > 150, paste0(substr(content, 1, 150), "..."), content),
    가격 = ifelse(price_numeric > 0, paste0(format(price_numeric, big.mark = ","), "원"), "무료"),
    생성일 = format(created_at, "%Y-%m-%d %H:%M")
  ) %>%
  select(
    게시글ID = id,
    제목 = title,
    내용,
    가격,
    신고횟수 = report_count,
    의심점수 = suspicion_score,
    의심등급 = suspicion_level,
    검출키워드 = keyword_categories,
    사용자ID = user_id,
    사용자게시글수 = user_post_count,
    카테고리 = category_name,
    생성일
  )

datatable(display_data,
          filter = 'top',
          options = list(
            pageLength = 25,
            scrollX = TRUE,
            autoWidth = TRUE,
            columnDefs = list(
              list(width = '80px', targets = c(0, 4, 5, 8, 9)),
              list(width = '150px', targets = c(1, 7)),
              list(width = '250px', targets = 2)
            )
          ),
          rownames = FALSE) %>%
  formatStyle(
    "신고횟수",
    backgroundColor = styleInterval(
      cuts = c(2, 4, 6),
      values = c("#ffffff", "#fff3e0", "#ffcdd2", "#e74c3c")
    ),
    fontWeight = "bold"
  ) %>%
  formatStyle(
    "의심등급",
    backgroundColor = styleEqual(
      levels = c("낮음", "보통", "높음", "매우높음"),
      values = c("#a8d5e2", "#f9d5a7", "#f5b895", "#e74c3c")
    ),
    fontWeight = "bold"
  ) %>%
  formatStyle(
    "의심점수",
    backgroundColor = styleColorBar(range(display_data$의심점수), '#fff3e0'),
    backgroundSize = '80% 70%',
    backgroundRepeat = 'no-repeat',
    backgroundPosition = 'right'
  )
```