상표권 위반 명품 가품 게시글 종합 분석 대시보드 v2.0

---
title: "상표권 위반 명품 가품 게시글 종합 분석 대시보드 v2.0"
output: 
  flexdashboard::flex_dashboard:
    orientation: columns
    vertical_layout: scroll
    theme: cosmo
    source_code: embed
---

```{r setup, include=FALSE}
# 패키지 설치 및 로드
required_packages <- c("flexdashboard", "tidyverse", "plotly", "DT", "stringr", "scales")

for (pkg in required_packages) {
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg, repos = "http://cran.r-project.org")
    library(pkg, character.only = TRUE)
  }
}

# 데이터 로드 - 직접 파일명 지정
data <- read.csv("bquxjob_20af82e5_19a214747de.csv", 
                 encoding = "UTF-8",
                 stringsAsFactors = FALSE)

cat("데이터 로드 완료:", nrow(data), "행\n")

# 데이터 전처리
# 날짜 처리 - UTC 문자열 제거
data$created_at <- as.character(data$created_at)
data$updated_at <- as.character(data$updated_at)

data$created_at <- sub(" UTC$", "", data$created_at)
data$updated_at <- sub(" UTC$", "", data$updated_at)

data$created_at <- as.POSIXct(data$created_at, format="%Y-%m-%d %H:%M:%OS")
data$updated_at <- as.POSIXct(data$updated_at, format="%Y-%m-%d %H:%M:%OS")

# price는 이미 숫자형이므로 그대로 사용
data$price_numeric <- as.numeric(data$price)

# 나눔 표기 확인 - price가 0이거나 title/content에 '나눔'이 있는 경우
data$is_naum <- (data$price == 0) | 
                grepl("나눔", data$title, ignore.case = TRUE) |
                grepl("나눔", data$content, ignore.case = TRUE)

# 브랜드 매핑
brand_mapping <- list(
  "샤넬" = c("샤넬", "chanel", "채널", "사넬"),
  "루이비통" = c("루이비통", "루이", "louis vuitton", "lv", "비통"),
  "구찌" = c("구찌", "gucci", "구씨"),
  "에르메스" = c("에르메스", "hermes", "에루메스", "에르"),
  "프라다" = c("프라다", "prada"),
  "디올" = c("디올", "dior"),
  "발렌시아가" = c("발렌시아가", "balenciaga", "발렌"),
  "셀린" = c("셀린", "celine", "셀리느"),
  "펜디" = c("펜디", "fendi"),
  "버버리" = c("버버리", "burberry", "버버"),
  "롤렉스" = c("롤렉스", "rolex", "롤"),
  "까르띠에" = c("까르띠에", "cartier", "까르띠", "카르티에"),
  "몽블랑" = c("몽블랑", "montblanc", "몽블"),
  "오메가" = c("오메가", "omega"),
  "보테가베네타" = c("보테가", "bottega", "베네타", "보떼가"),
  "생로랑" = c("생로랑", "saint laurent", "ysl"),
  "발렌티노" = c("발렌티노", "valentino"),
  "지방시" = c("지방시", "givenchy"),
  "톰브라운" = c("톰브라운", "thom browne"),
  "메종마르지엘라" = c("메종마르지엘라", "maison margiela", "마르지엘라")
)

# 브랜드 감지 함수
detect_brand <- function(title, content) {
  text <- tolower(paste(title, content))
  for (brand in names(brand_mapping)) {
    keywords <- brand_mapping[[brand]]
    for (keyword in keywords) {
      if (str_detect(text, tolower(keyword))) {
        return(brand)
      }
    }
  }
  return("기타")
}

# 브랜드 컬럼 생성
cat("브랜드 분석 중...\n")
data$brand <- mapply(detect_brand, data$title, data$content)

# 품목 매핑
item_mapping <- list(
  "가방" = c("가방", "백", "bag", "숄더백", "크로스백", "토트백", "클러치"),
  "지갑" = c("지갑", "월렛", "wallet", "카드지갑"),
  "시계" = c("시계", "watch", "워치"),
  "신발" = c("신발", "스니커즈", "로퍼", "슈즈", "shoes", "sneakers", "부츠"),
  "벨트" = c("벨트", "belt"),
  "의류" = c("티셔츠", "셔츠", "자켓", "코트", "후드", "니트", "원피스", "스커트", "팬츠"),
  "액세서리" = c("목걸이", "반지", "귀걸이", "팔찌", "브로치", "스카프", "선글라스")
)

# 품목 감지 함수
detect_item <- function(title, content) {
  text <- tolower(paste(title, content))
  for (item in names(item_mapping)) {
    keywords <- item_mapping[[item]]
    for (keyword in keywords) {
      if (str_detect(text, tolower(keyword))) {
        return(item)
      }
    }
  }
  return("기타")
}

# 품목 컬럼 생성
cat("품목 분석 중...\n")
data$item <- mapply(detect_item, data$title, data$content)

# 가격대 분류 (나눔 포함)
data$price_range <- cut(
  data$price_numeric,
  breaks = c(-Inf, 0, 100000, 300000, 500000, 1000000, Inf),
  labels = c("나눔/무료", "10만원 이하", "10-30만원", "30-50만원", "50-100만원", "100만원 이상"),
  include.lowest = TRUE
)

# 나눔으로 표기된 것들은 price_range를 명시적으로 "나눔/무료"로 설정
data$price_range[data$is_naum] <- "나눔/무료"

# 카테고리 한글 매핑
category_names <- c(
  "1" = "디지털기기",
  "172" = "생활가전",
  "8" = "가구/인테리어",
  "7" = "생활/주방",
  "4" = "유아동",
  "173" = "유아도서",
  "5" = "여성의류",
  "31" = "여성잡화",
  "14" = "남성패션/잡화",
  "6" = "뷰티/미용",
  "3" = "스포츠/레저",
  "2" = "취미/게임/음반",
  "9" = "도서",
  "304" = "티켓/교환권",
  "305" = "가공식품",
  "483" = "건강기능식품",
  "16" = "반려동물용품",
  "139" = "식물",
  "13" = "기타 중고물품",
  "32" = "삽니다",
  "516" = "GarbageBin(건강기능식품 복구용)"
)

# 카테고리명 컬럼 추가
data$category_name <- category_names[as.character(data$category_id)]
data$category_name[is.na(data$category_name)] <- paste0("카테고리 ", data$category_id[is.na(data$category_name)])

cat("데이터 전처리 완료!\n")
```

# 개요 {data-icon="fa-chart-line"}

## Column {data-width=350}

### 📊 데이터 기본 정보

```{r}
total_posts <- nrow(data)
total_users <- length(unique(data$user_id))
date_range <- paste(format(min(data$created_at, na.rm=TRUE), "%Y-%m-%d"), "~", 
                    format(max(data$created_at, na.rm=TRUE), "%Y-%m-%d"))
avg_price <- mean(data$price_numeric[data$price_numeric > 0], na.rm = TRUE)
median_price <- median(data$price_numeric[data$price_numeric > 0], na.rm = TRUE)

info_df <- data.frame(
  항목 = c("총 게시글 수", "총 사용자 수", "데이터 기간", "평균 가격", "중앙 가격"),
  값 = c(
    format(total_posts, big.mark = ","),
    format(total_users, big.mark = ","),
    date_range,
    paste0(format(round(avg_price), big.mark = ","), "원"),
    paste0(format(round(median_price), big.mark = ","), "원")
  )
)

datatable(info_df, 
          options = list(dom = 't', pageLength = 10),
          rownames = FALSE)
```

### 🏆 상위 검출 브랜드 (Top 20)

```{r}
brand_counts <- data %>%
  filter(brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(20)

p <- plot_ly(brand_counts, 
             x = ~reorder(brand, -n),
             y = ~n,
             type = 'bar',
             marker = list(color = '#3498db'),
             text = ~paste0(n, "건"),
             textposition = 'outside') %>%
  layout(title = "",
         xaxis = list(
           title = "",
           tickangle = -45,
           tickfont = list(size = 11),
           automargin = TRUE
         ),
         yaxis = list(title = "게시글 수"),
         margin = list(b = 120, l = 60),
         height = 500)

p
```

## Column {data-width=350}

### 💰 가격대별 게시글 분포 (나눔 포함)

```{r}
price_dist <- data %>%
  count(price_range) %>%
  mutate(percentage = n / sum(n) * 100)

# 가격대 순서 정의
price_order <- c("10만원 이하", "10-30만원", "30-50만원", "50-100만원", "100만원 이상", "나눔/무료")
price_dist$price_range <- factor(price_dist$price_range, levels = rev(price_order))
price_dist <- price_dist %>% arrange(desc(price_range))

# 톤다운된 파스텔 색상
colors_map <- c("10만원 이하" = "#a8d5e2",
                "10-30만원" = "#b8e6d5",
                "30-50만원" = "#f9d5a7",
                "50-100만원" = "#d5c4e8",
                "100만원 이상" = "#f5b895",
                "나눔/무료" = "#e8b4b8")

# 가로 막대 그래프
p <- plot_ly(price_dist, 
             y = ~price_range,
             x = ~n,
             type = 'bar',
             orientation = 'h',
             marker = list(color = colors_map[as.character(price_dist$price_range)]),
             text = ~paste0(n, "건 (", round(percentage, 1), "%)"),
             textposition = 'outside',
             textfont = list(size = 13, color = '#333333'),
             hovertemplate = paste(
               '<b>%{y}</b><br>',
               '건수: %{x:,}건<br>',
               '비율: %{text}<br>',
               '<extra></extra>'
             )) %>%
  layout(
    title = "",
    xaxis = list(
      title = "게시글 수", 
      showgrid = TRUE,
      range = c(0, 2500)
    ),
    yaxis = list(title = "", tickfont = list(size = 13)),
    margin = list(l = 120, r = 150),
    height = 400,
    showlegend = FALSE
  )

p
```

### 📂 게시글 등록 카테고리별 분포 (Top 20)

```{r}
category_dist <- data %>%
  count(category_id, sort = TRUE) %>%
  head(20) %>%
  mutate(category_label = ifelse(
    as.character(category_id) %in% names(category_names),
    category_names[as.character(category_id)],
    paste0("카테고리 ", category_id)
  ))

p <- plot_ly(category_dist,
             x = ~reorder(category_label, -n),
             y = ~n,
             type = 'bar',
             marker = list(color = '#95a5a6'),
             text = ~paste0(n, "건"),
             textposition = 'outside') %>%
  layout(title = "",
         xaxis = list(
           title = "",
           tickangle = -45,
           tickfont = list(size = 10),
           automargin = TRUE
         ),
         yaxis = list(title = "게시글 수"),
         margin = list(b = 150),
         height = 450)

p
```

# 브랜드 분석 {data-icon="fa-tags"}

## Column {data-width=500}

### 📈 브랜드별 등록 건수 (Top 20)

```{r}
brand_summary_chart <- data %>%
  filter(brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(20) %>%
  rename(총건수 = n)

p <- plot_ly(brand_summary_chart,
             x = ~reorder(brand, -총건수),
             y = ~총건수,
             type = 'bar',
             marker = list(
               color = ~총건수,
               colorscale = list(
                 c(0, '#e8f4f8'),
                 c(0.5, '#a8d5e2'), 
                 c(1, '#6bb6d6')
               ),
               showscale = FALSE
             ),
             text = ~paste0(총건수, "건"),
             textposition = 'outside',
             textfont = list(size = 11, color = '#333333')) %>%
  layout(
    title = "",
    xaxis = list(
      title = "",
      tickangle = -45,
      tickfont = list(size = 11)
    ),
    yaxis = list(title = "등록 건수"),
    margin = list(b = 120),
    height = 450
  )

p
```

### 📊 브랜드별 평균 가격 (Top 20)

```{r}
brand_price <- data %>%
  filter(brand != "기타" & !is.na(price_numeric) & price_numeric > 0) %>%
  group_by(brand) %>%
  summarise(
    평균가격 = mean(price_numeric, na.rm = TRUE),
    중앙가격 = median(price_numeric, na.rm = TRUE),
    게시글수 = n()
  ) %>%
  arrange(desc(평균가격)) %>%
  head(20)

p <- plot_ly(brand_price, 
             x = ~reorder(brand, 평균가격), 
             y = ~평균가격,
             type = 'bar',
             name = '평균가격',
             marker = list(color = '#e8b4b8')) %>%
  add_trace(y = ~중앙가격, 
            name = '중앙가격',
            marker = list(color = '#a8d5e2')) %>%
  layout(title = "",
         xaxis = list(title = "", tickangle = -45, tickfont = list(size = 10)),
         yaxis = list(title = "가격 (원)"),
         barmode = 'group',
         margin = list(b = 120),
         height = 450,
         legend = list(orientation = 'h', y = 1.1))

p
```

## Column {data-width=500}

### 🏷️ 브랜드별 품목 분포 (Top 20)

```{r}
# 상위 20개 브랜드 선택
top_brands <- data %>%
  filter(brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(20) %>%
  pull(brand)

heatmap_data <- data %>%
  filter(brand %in% top_brands & item != "기타") %>%
  count(brand, item) %>%
  pivot_wider(names_from = item, values_from = n, values_fill = 0)

# plotly 히트맵
heatmap_matrix <- as.matrix(heatmap_data[,-1])
rownames(heatmap_matrix) <- heatmap_data$brand

# 품목명 강제 추출
item_names <- colnames(heatmap_matrix)

# 파스텔 컬러 스케일 정의
pastel_colors <- list(
  c(0, "rgb(255, 255, 255)"),
  c(0.2, "rgb(230, 240, 255)"),
  c(0.4, "rgb(200, 225, 255)"),
  c(0.6, "rgb(170, 210, 255)"),
  c(0.8, "rgb(140, 195, 255)"),
  c(1, "rgb(110, 180, 245)")
)

p <- plot_ly(
  x = item_names,
  y = rownames(heatmap_matrix),
  z = heatmap_matrix,
  type = "heatmap",
  colorscale = pastel_colors,
  text = heatmap_matrix,
  texttemplate = "%{z}",
  textfont = list(size = 12, color = '#333333'),
  showscale = TRUE,
  colorbar = list(title = "건수")
) %>%
  layout(
    title = "",
    xaxis = list(
      title = list(text = "품목", font = list(size = 14, color = '#000000')),
      tickfont = list(size = 13, color = '#000000'),
      tickangle = 0,
      showticklabels = TRUE,
      tickmode = "array",
      tickvals = seq(0, length(item_names)-1),
      ticktext = item_names,
      side = "bottom"
    ),
    yaxis = list(
      title = list(text = "브랜드", font = list(size = 14, color = '#000000')),
      tickfont = list(size = 12, color = '#000000'),
      autorange = "reversed",
      showticklabels = TRUE
    ),
    height = 500,
    margin = list(l = 120, r = 100, t = 20, b = 80),
    plot_bgcolor = 'white',
    paper_bgcolor = 'white'
  )

p
```

### 💎 고가 브랜드 분포 (50만원 이상)

```{r}
high_price_brand <- data %>%
  filter(!is.na(price_numeric) & price_numeric >= 500000) %>%
  count(brand) %>%
  arrange(desc(n)) %>%
  head(10)

pastel_palette <- c("#a8d5e2", "#b8e6d5", "#f9d5a7", "#d5c4e8", 
                    "#f5b895", "#e8b4b8", "#c5e3f6", "#d4edda",
                    "#fff3cd", "#f8d7da")

p <- plot_ly(high_price_brand,
             labels = ~brand,
             values = ~n,
             type = 'pie',
             textinfo = 'label+percent',
             marker = list(colors = pastel_palette[1:nrow(high_price_brand)]),
             textfont = list(size = 11)) %>%
  layout(title = "",
         showlegend = TRUE,
         height = 400)

p
```

# 주요 상품 분석 {data-icon="fa-shopping-bag"}

## Column

### 🔍 브랜드별 주요 상품 (Top 20 브랜드)

```{r}
# 불필요한 단어 제거 함수
clean_product_name <- function(title, brand_name) {
  brand_keywords <- unlist(brand_mapping[[brand_name]])
  clean_title <- title
  
  for (keyword in brand_keywords) {
    clean_title <- str_replace_all(clean_title, paste0("(?i)", keyword), "")
  }
  
  if (brand_name == "셀린") {
    clean_title <- str_replace_all(clean_title, "느\\s", "")
    clean_title <- str_replace_all(clean_title, "느$", "")
  }
  
  exclude_words <- c(
    # 판매 관련
    "팔아요", "팝니다", "판매", "팔아용", "파라요", "파라용", "팔아여", "팝니당", "팔게요", "팔아욘",
    "급처", "급매", "급해요", "급합니다", "급하게", "빨리", "빨리요",
    
    # 교환/나눔 관련
    "네고", "교환", "나눔", "나눠요", "드려요", "줍니다", "드립니다",
    
    # 상태 표현
    "새상품", "새거", "새것", "새제품", "새물건", "미개봉", "미착용",
    "정품", "미사용", "사용감", "사용안함", "안썼어요", "안써요",
    "중고", "중고품", "거의새것", "거의안", "거의", "사용했어요",
    
    # 외관/상태 형용사
    "깨끗", "깨끗해요", "깨끗한", "깔끔", "깔끔해요", "깔끔한",
    "예쁜", "이쁜", "예뻐요", "이뻐요", "예쁨", "이쁨",
    "좋아요", "좋은", "좋음", "양호", "최상", "상태좋음", "상태양호",
    "완전", "진짜", "정말", "너무", "엄청", "매우", "아주",
    
    # 가격 관련
    "싸게", "싸요", "저렴", "저렴해요", "저렴하게", "할인", "세일",
    "비싸요", "비싼", "고가", "가격", "만원", "천원",
    
    # 거래 방식
    "연락", "문의", "직거래", "택배", "배송", "편택", "반택",
    "입니다", "해요", "해용", "합니다", "있어요", "없어요", "있음", "없음",
    "주세요", "사세요", "연락주세요", "문의주세요",
    
    # 기타 불필요한 표현
    "상태", "정도", "느낌", "색상", "컬러", "색깔",
    "사이즈", "싸이즈", "크기", "치수",
    "구매", "구입", "샀어요", "샀습니다",
    "하나", "한개", "개", "장", "개입",
    "이거", "이것", "저거", "저것", "요거", "요것",
    "여기", "저기", "요기",
    
    # 추가 동사 변형
    "해드려요", "드려용", "팔아볼게요", "내놔요", "올려요", "올립니다",
    "가져가세요", "가져가요", "양도", "양도해요",
    
    # 감탄사/추임새
    "완전", "대박", "진심", "레알", "찐",
    "ㅠㅠ", "ㅜㅜ", "ㅎㅎ", "ㅋㅋ"
  )
  
  for (word in exclude_words) {
    clean_title <- str_replace_all(clean_title, paste0("(?i)", word), "")
  }
  
  # 특수문자 및 숫자 제거 (품목명만 남김)
  clean_title <- str_replace_all(clean_title, "[^가-힣a-zA-Z\\s]", " ")
  clean_title <- str_squish(clean_title)
  clean_title <- str_trim(clean_title)
  
  # 너무 짧은 경우 제외 (2글자 미만)
  if (nchar(clean_title) < 2) {
    return(NA)
  }
  
  # 단일 글자가 반복되는 경우 제외 (예: "ㅋㅋㅋ")
  if (str_detect(clean_title, "^(.)\\1+$")) {
    return(NA)
  }
  
  return(clean_title)
}

# 상위 20개 브랜드의 주요 상품 5개씩
top_20_brands <- data %>%
  filter(brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(20) %>%
  pull(brand)

product_summary_all <- data.frame()

for (brand_name in top_20_brands) {
  total_count <- data %>%
    filter(brand == brand_name) %>%
    nrow()
  
  brand_data <- data %>%
    filter(brand == brand_name) %>%
    select(title)
  
  top_products <- brand_data %>%
    mutate(clean_product = sapply(title, function(x) clean_product_name(x, brand_name))) %>%
    filter(!is.na(clean_product) & clean_product != "") %>%
    count(clean_product, sort = TRUE) %>%
    head(5)
  
  if (nrow(top_products) > 0) {
    for (i in 1:nrow(top_products)) {
      product_summary_all <- rbind(product_summary_all, data.frame(
        브랜드 = brand_name,
        총건수 = total_count,
        순위 = i,
        상품명 = top_products$clean_product[i],
        건수 = top_products$n[i],
        비율 = paste0(round(top_products$n[i] / total_count * 100, 1), "%"),
        stringsAsFactors = FALSE
      ))
    }
  }
}

# 개선된 DT 테이블 - 각 컬럼별 개별 검색 기능
datatable(
  product_summary_all,
  
  # 필터 옵션 추가
  filter = 'top',
  
  # 확장된 옵션
  options = list(
    pageLength = 20,
    lengthMenu = c(10, 20, 50, 100),
    dom = 'Blfrtip',
    buttons = list(
      'copy',
      'csv', 
      'excel',
      list(
        extend = 'colvis',
        text = '컬럼 선택'
      )
    ),
    
    # 컬럼 정의
    columnDefs = list(
      list(width = '100px', targets = c(0, 1, 2, 4, 5)),
      list(width = '250px', targets = 3),
      list(className = 'dt-center', targets = c(1, 2, 4, 5)),
      list(
        targets = 0,
        render = JS(
          "function(data, type, row, meta) {",
          "if(type === 'display'){",
          "return '<span style=\"background-color: #f8f9fa; padding: 2px 6px; border-radius: 3px; font-weight: bold;\">' + data + '</span>';",
          "} else {",
          "return data;",
          "}",
          "}"
        )
      )
    ),
    
    # 스크롤 설정
    scrollY = "500px",
    scrollCollapse = TRUE,
    
    # 정렬 설정
    order = list(list(1, 'desc'), list(0, 'asc')),
    
    # 언어 설정
    language = list(
      lengthMenu = "페이지당 _MENU_ 개",
      info = "_START_-_END_ / 총 _TOTAL_개",
      infoEmpty = "데이터 없음",
      infoFiltered = "(전체 _MAX_개 중 검색됨)",
      search = "검색:"
    )
  ),
  
  # 기본 설정
  rownames = FALSE,
  extensions = c('Buttons', 'ColReorder', 'FixedHeader'),
  escape = FALSE
  
) %>%
  
  # 스타일링
  formatStyle(
    "비율",
    backgroundColor = styleInterval(
      cuts = c(5, 10, 15, 25),
      values = c("#f8f9fa", "#e3f2fd", "#fff3e0", "#ffecb3", "#ffcdd2")
    ),
    fontWeight = styleInterval(
      cuts = c(15),
      values = c("normal", "bold")
    )
  ) %>%
  
  formatStyle(
    "건수",
    fontWeight = "bold",
    color = styleInterval(
      cuts = c(10, 30, 50, 100),
      values = c("#6c757d", "#495057", "#333333", "#dc3545", "#cc0000")
    )
  ) %>%
  
  formatStyle(
    "브랜드",
    fontWeight = "bold"
  ) %>%
  
  formatStyle(
    "총건수",
    background = styleColorBar(range(product_summary_all$총건수), '#e8f4f8'),
    backgroundSize = '80% 70%',
    backgroundRepeat = 'no-repeat',
    backgroundPosition = 'right'
  )
```

# 키워드 분석 {data-icon="fa-key"}

## Column {data-width=600}

### 📊 키워드 카테고리별 빈도

```{r}
# 키워드 카테고리 정의
keyword_categories <- list(
  "판매 관련" = c("팔아요", "팝니다", "판매", "팔아용", "파라요", "파라용", "팔아여", "팝니당", "팔게요", "급처", "급매"),
  "상태 표현" = c("새상품", "새거", "새것", "새제품", "미개봉", "미착용", "정품", "미사용", "거의새것", "중고"),
  "외관 형용사" = c("깨끗", "깨끗해요", "깨끗한", "깔끔", "깔끔해요", "예쁜", "이쁜", "예뻐요", "이뻐요", "좋아요"),
  "가격 관련" = c("싸게", "싸요", "저렴", "저렴해요", "할인", "세일", "비싸요", "네고"),
  "거래 방식" = c("직거래", "택배", "배송", "문의", "연락", "편택", "반택"),
  "동사 표현" = c("드려요", "드려용", "내놔요", "올려요", "양도", "양도해요", "가져가세요", "교환", "나눔"),
  "감탄사" = c("대박", "진심", "레알", "완전", "ㅋㅋ", "ㅎㅎ")
)

# 키워드 빈도 계산 함수
count_keywords <- function(text_vector, keywords) {
  total_count <- 0
  for (keyword in keywords) {
    count <- sum(str_count(text_vector, paste0("(?i)", keyword)))
    total_count <- total_count + count
  }
  return(total_count)
}

# 전체 텍스트 결합
all_text <- paste(data$title, data$content, collapse = " ")

# 카테고리별 빈도 계산
category_freq <- data.frame(
  카테고리 = character(),
  빈도 = numeric(),
  stringsAsFactors = FALSE
)

for (category_name in names(keyword_categories)) {
  keywords <- keyword_categories[[category_name]]
  freq <- count_keywords(paste(data$title, data$content), keywords)
  category_freq <- rbind(category_freq, data.frame(
    카테고리 = category_name,
    빈도 = freq
  ))
}

# 카테고리별 빈도 차트
p <- plot_ly(category_freq,
             x = ~reorder(카테고리, 빈도),
             y = ~빈도,
             type = 'bar',
             marker = list(
               color = ~빈도,
               colorscale = list(
                 c(0, '#e8f4f8'),
                 c(0.5, '#a8d5e2'),
                 c(1, '#6bb6d6')
               ),
               showscale = FALSE
             ),
             text = ~paste0(format(빈도, big.mark = ","), "회"),
             textposition = 'outside',
             textfont = list(size = 12)) %>%
  layout(
    title = "",
    xaxis = list(title = ""),
    yaxis = list(title = "출현 빈도"),
    margin = list(b = 100)
  )

p
```

### 🔤 주요 키워드 Top 30

```{r}
# 모든 키워드를 하나의 벡터로
all_keywords <- unlist(keyword_categories)

# 각 키워드별 빈도 계산
keyword_freq <- data.frame(
  키워드 = character(),
  빈도 = numeric(),
  카테고리 = character(),
  stringsAsFactors = FALSE
)

for (category_name in names(keyword_categories)) {
  keywords <- keyword_categories[[category_name]]
  for (keyword in keywords) {
    freq <- sum(str_count(paste(data$title, data$content), paste0("(?i)", keyword)))
    if (freq > 0) {
      keyword_freq <- rbind(keyword_freq, data.frame(
        키워드 = keyword,
        빈도 = freq,
        카테고리 = category_name
      ))
    }
  }
}

# 상위 30개 선택
top_keywords <- keyword_freq %>%
  arrange(desc(빈도)) %>%
  head(30)

# 카테고리별 색상
category_colors <- c(
  "판매 관련" = "#3498db",
  "상태 표현" = "#2ecc71",
  "외관 형용사" = "#e74c3c",
  "가격 관련" = "#f39c12",
  "거래 방식" = "#9b59b6",
  "동사 표현" = "#1abc9c",
  "감탄사" = "#e67e22"
)

top_keywords$색상 <- category_colors[top_keywords$카테고리]

p <- plot_ly(top_keywords,
             y = ~reorder(키워드, 빈도),
             x = ~빈도,
             type = 'bar',
             orientation = 'h',
             marker = list(color = ~색상),
             text = ~paste0(format(빈도, big.mark = ","), "회"),
             textposition = 'outside',
             hovertemplate = paste(
               '<b>%{y}</b><br>',
               '빈도: %{x:,}회<br>',
               '카테고리: ', top_keywords$카테고리, '<br>',
               '<extra></extra>'
             )) %>%
  layout(
    title = "",
    xaxis = list(title = "출현 빈도"),
    yaxis = list(title = ""),
    margin = list(l = 100),
    showlegend = FALSE
  )

p
```

## Column {data-width=400}

### 📈 카테고리별 비율

```{r}
category_freq <- category_freq %>%
  mutate(비율 = round(빈도 / sum(빈도) * 100, 1))

p <- plot_ly(category_freq,
             labels = ~카테고리,
             values = ~빈도,
             type = 'pie',
             textinfo = 'label+percent',
             marker = list(
               colors = c('#3498db', '#2ecc71', '#e74c3c', '#f39c12', '#9b59b6', '#1abc9c', '#e67e22')
             ),
             textfont = list(size = 12)) %>%
  layout(title = "",
         showlegend = TRUE)

p
```

### 📋 키워드 상세 통계

```{r}
category_summary <- category_freq %>%
  arrange(desc(빈도)) %>%
  mutate(
    비율 = paste0(비율, "%"),
    빈도 = format(빈도, big.mark = ",")
  )

datatable(category_summary,
          options = list(
            dom = 't',
            pageLength = 10
          ),
          rownames = FALSE) %>%
  formatStyle(
    "빈도",
    fontWeight = "bold"
  )
```

### 🔍 카테고리별 주요 키워드

```{r}
# 각 카테고리의 상위 3개 키워드
category_top_keywords <- keyword_freq %>%
  group_by(카테고리) %>%
  arrange(desc(빈도)) %>%
  slice(1:3) %>%
  ungroup() %>%
  mutate(순위 = rep(1:3, length.out = n()))

datatable(category_top_keywords %>% 
            select(카테고리, 순위, 키워드, 빈도) %>%
            mutate(빈도 = format(빈도, big.mark = ",")),
          filter = 'top',
          options = list(
            pageLength = 21,
            scrollY = "300px",
            dom = 'ftp'
          ),
          rownames = FALSE) %>%
  formatStyle(
    "빈도",
    fontWeight = "bold",
    color = styleInterval(
      cuts = c(1000, 3000, 5000),
      values = c("#6c757d", "#495057", "#dc3545", "#cc0000")
    )
  )
```

# 신고자 분석 {data-icon="fa-user"}

## Column {data-width=600}

### 🚨 최다 신고 대상자 (Top 20)

```{r}
user_violations <- data %>%
  count(user_id, sort = TRUE) %>%
  head(20) %>%
  mutate(user_label = paste0("User ", user_id))

p <- plot_ly(user_violations,
             x = ~reorder(user_label, n),
             y = ~n,
             type = 'bar',
             marker = list(color = ~n,
                          colorscale = list(c(0, '#3498db'), c(1, '#e74c3c')),
                          showscale = TRUE),
             text = ~paste0(n, "건"),
             textposition = 'outside') %>%
  layout(title = "",
         xaxis = list(title = ""),
         yaxis = list(title = "신고 건수"),
         margin = list(b = 150))

p
```

### 📈 신고 빈도 분포

```{r}
violation_freq <- data %>%
  count(user_id) %>%
  mutate(frequency_group = case_when(
    n == 1 ~ "1회",
    n == 2 ~ "2회",
    n >= 3 & n < 5 ~ "3-4회",
    n >= 5 & n < 10 ~ "5-9회",
    n >= 10 & n < 20 ~ "10-19회",
    n >= 20 ~ "20회 이상"
  )) %>%
  count(frequency_group) %>%
  mutate(frequency_group = factor(frequency_group, 
                                  levels = c("1회", "2회", "3-4회", "5-9회", "10-19회", "20회 이상")))

p <- plot_ly(violation_freq,
             x = ~frequency_group,
             y = ~n,
             type = 'bar',
             marker = list(color = '#9b59b6'),
             text = ~paste0(n, "명"),
             textposition = 'outside') %>%
  layout(title = "사용자별 신고 횟수 분포",
         xaxis = list(title = "신고 횟수"),
         yaxis = list(title = "사용자 수"))

p
```

## Column {data-width=400}

### 👤 신고 통계 요약

```{r}
user_stats <- data %>%
  count(user_id) %>%
  summarise(
    `총 사용자 수` = n(),
    `1회 신고` = sum(n == 1),
    `2회 이상` = sum(n >= 2),
    `5회 이상` = sum(n >= 5),
    `10회 이상` = sum(n >= 10),
    `20회 이상` = sum(n >= 20)
  ) %>%
  pivot_longer(everything(), names_to = "구분", values_to = "인원수")

datatable(user_stats,
          options = list(dom = 't', pageLength = 10),
          rownames = FALSE)
```

### 🎯 신고 대상자의 주요 브랜드

```{r}
frequent_violators <- data %>%
  count(user_id) %>%
  filter(n >= 5) %>%
  pull(user_id)

frequent_brands <- data %>%
  filter(user_id %in% frequent_violators & brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(10)

p <- plot_ly(frequent_brands,
             labels = ~brand,
             values = ~n,
             type = 'pie',
             textinfo = 'label+value',
             marker = list(colors = RColorBrewer::brewer.pal(min(10, nrow(frequent_brands)), "Paired"))) %>%
  layout(title = "다발 신고자(5회+)의 브랜드 분포")

p
```

### ⚠️ 고위험 사용자 상세 (Top 10)

```{r}
user_violations_top <- data %>%
  count(user_id, sort = TRUE) %>%
  head(10)

high_risk_users <- data %>%
  filter(user_id %in% user_violations_top$user_id) %>%
  group_by(user_id) %>%
  summarise(
    신고건수 = n(),
    주요브랜드 = names(sort(table(brand), decreasing = TRUE))[1],
    평균가격 = mean(price_numeric[price_numeric > 0], na.rm = TRUE),
    최고가격 = max(price_numeric, na.rm = TRUE)
  ) %>%
  arrange(desc(신고건수)) %>%
  mutate(
    평균가격 = paste0(format(round(평균가격), big.mark = ","), "원"),
    최고가격 = paste0(format(round(최고가격), big.mark = ","), "원")
  ) %>%
  rename(사용자ID = user_id)

datatable(high_risk_users,
          options = list(pageLength = 10, dom = 'tip'),
          rownames = FALSE)
```

# 시계열 분석 {data-icon="fa-calendar"}

## Column {data-width=600}

### 📅 월별 신고 건수 추이

```{r}
monthly_trend <- data %>%
  mutate(year_month = format(created_at, "%Y-%m")) %>%
  count(year_month) %>%
  arrange(year_month)

p <- plot_ly(monthly_trend,
             x = ~year_month,
             y = ~n,
             type = 'scatter',
             mode = 'lines+markers',
             line = list(color = '#3498db', width = 2),
             marker = list(size = 6)) %>%
  layout(title = "",
         xaxis = list(title = "월", tickangle = -45),
         yaxis = list(title = "신고 건수"))

p
```

### 🔥 브랜드별 시계열 트렌드 (Top 5)

```{r}
top5_brands <- data %>%
  filter(brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(5) %>%
  pull(brand)

brand_monthly <- data %>%
  filter(brand %in% top5_brands) %>%
  mutate(year_month = format(created_at, "%Y-%m")) %>%
  count(year_month, brand) %>%
  arrange(year_month)

p <- plot_ly()

for(b in top5_brands) {
  brand_data <- brand_monthly %>% filter(brand == b)
  p <- p %>% add_trace(
    data = brand_data,
    x = ~year_month,
    y = ~n,
    name = b,
    type = 'scatter',
    mode = 'lines+markers'
  )
}

p <- p %>% layout(
  title = "",
  xaxis = list(title = "월", tickangle = -45),
  yaxis = list(title = "신고 건수")
)

p
```

## Column {data-width=400}

### ⏰ 시간대별 게시 패턴

```{r}
hourly_pattern <- data %>%
  mutate(hour = as.numeric(format(created_at, "%H"))) %>%
  count(hour) %>%
  arrange(hour)

p <- plot_ly(hourly_pattern,
             x = ~hour,
             y = ~n,
             type = 'scatter',
             mode = 'lines+markers',
             fill = 'tozeroy',
             line = list(color = '#9b59b6'),
             marker = list(color = '#9b59b6')) %>%
  layout(title = "",
         xaxis = list(title = "시간 (0-23시)", dtick = 2),
         yaxis = list(title = "게시글 수"))

p
```

# 데이터 테이블 {data-icon="fa-table"}

## Column

### 📋 전체 데이터 (필터링 가능)

```{r}
display_data <- data %>%
  select(id, title, brand, item, price_numeric, user_id, created_at, category_name) %>%
  rename(
    게시글ID = id,
    제목 = title,
    브랜드 = brand,
    품목 = item,
    가격 = price_numeric,
    사용자ID = user_id,
    생성일 = created_at,
    카테고리 = category_name
  ) %>%
  mutate(
    가격 = ifelse(!is.na(가격) & 가격 > 0, paste0(format(round(가격), big.mark = ","), "원"), "나눔/무료"),
    생성일 = format(생성일, "%Y-%m-%d %H:%M")
  )

datatable(display_data,
          filter = 'top',
          options = list(
            pageLength = 25,
            scrollX = TRUE,
            autoWidth = TRUE
          ),
          rownames = FALSE)
```