상표권 위반 명품 가품 게시글 종합 분석 대시보드 v2.0

---
title: "상표권 위반 명품 가품 게시글 종합 분석 대시보드 v2.0"
output: 
  flexdashboard::flex_dashboard:
    orientation: columns
    vertical_layout: scroll
    theme: cosmo
    source_code: embed
---

```{r setup, include=FALSE}
# 패키지 설치 및 로드
required_packages <- c("flexdashboard", "tidyverse", "plotly", "DT", "stringr", "scales")

for (pkg in required_packages) {
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg, repos = "http://cran.r-project.org")
    library(pkg, character.only = TRUE)
  }
}

# 데이터 로드
data <- read.csv("bquxjob_5a586c0b_19a11c22e8b.csv", 
                 encoding = "UTF-8",
                 stringsAsFactors = FALSE)

# 데이터 전처리
data$created_at <- as.POSIXct(data$created_at, format="%Y-%m-%d %H:%M:%S")
data$price_numeric <- as.numeric(data$price)

# 나눔 표기 확인 (price가 문자열인 경우)
data$is_naum <- grepl("나눔", data$price, ignore.case = TRUE)

# 브랜드 매핑
brand_mapping <- list(
  "샤넬" = c("샤넬", "chanel", "채널", "사넬"),
  "루이비통" = c("루이비통", "루이", "louis vuitton", "lv", "비통"),
  "구찌" = c("구찌", "gucci", "구씨"),
  "에르메스" = c("에르메스", "hermes", "에루메스", "에르"),
  "프라다" = c("프라다", "prada"),
  "디올" = c("디올", "dior"),
  "발렌시아가" = c("발렌시아가", "balenciaga", "발렌"),
  "셀린" = c("셀린", "celine", "셀리느"),
  "펜디" = c("펜디", "fendi"),
  "버버리" = c("버버리", "burberry", "버버"),
  "롤렉스" = c("롤렉스", "rolex", "롤"),
  "까르띠에" = c("까르띠에", "cartier", "까르띠", "카르티에"),
  "몽블랑" = c("몽블랑", "montblanc", "몽블"),
  "오메가" = c("오메가", "omega"),
  "보테가베네타" = c("보테가", "bottega", "베네타", "보떼가"),
  "생로랑" = c("생로랑", "saint laurent", "ysl"),
  "발렌티노" = c("발렌티노", "valentino"),
  "지방시" = c("지방시", "givenchy")
)

# 브랜드 감지 함수
detect_brand <- function(title, content) {
  text <- tolower(paste(title, content))
  for (brand in names(brand_mapping)) {
    keywords <- brand_mapping[[brand]]
    for (keyword in keywords) {
      if (str_detect(text, tolower(keyword))) {
        return(brand)
      }
    }
  }
  return("기타")
}

# 브랜드 컬럼 생성
data$brand <- mapply(detect_brand, data$title, data$content)

# 품목 매핑
item_mapping <- list(
  "가방" = c("가방", "백", "bag", "숄더백", "크로스백", "토트백", "클러치"),
  "지갑" = c("지갑", "월렛", "wallet", "카드지갑"),
  "시계" = c("시계", "watch", "워치"),
  "신발" = c("신발", "스니커즈", "로퍼", "슈즈", "shoes", "sneakers", "부츠"),
  "벨트" = c("벨트", "belt"),
  "의류" = c("티셔츠", "셔츠", "자켓", "코트", "후드", "니트", "원피스", "스커트", "팬츠"),
  "액세서리" = c("목걸이", "반지", "귀걸이", "팔찌", "브로치", "스카프", "선글라스")
)

# 품목 감지 함수
detect_item <- function(title, content) {
  text <- tolower(paste(title, content))
  for (item in names(item_mapping)) {
    keywords <- item_mapping[[item]]
    for (keyword in keywords) {
      if (str_detect(text, tolower(keyword))) {
        return(item)
      }
    }
  }
  return("기타")
}

# 품목 컬럼 생성
data$item <- mapply(detect_item, data$title, data$content)

# 가격대 분류 (나눔 포함)
data$price_range <- cut(
  data$price_numeric,
  breaks = c(-Inf, 0, 100000, 300000, 500000, 1000000, Inf),
  labels = c("나눔/무료", "10만원 이하", "10-30만원", "30-50만원", "50-100만원", "100만원 이상"),
  include.lowest = TRUE
)

# 나눔으로 표기된 것들은 price_range를 명시적으로 "나눔/무료"로 설정
data$price_range[data$is_naum] <- "나눔/무료"
```

# 개요 {data-icon="fa-chart-line"}

## Column {data-width=350}

### 📊 데이터 기본 정보

```{r}
total_posts <- nrow(data)
total_users <- length(unique(data$user_id))
date_range <- paste(format(min(data$created_at, na.rm=TRUE), "%Y-%m-%d"), "~", 
                    format(max(data$created_at, na.rm=TRUE), "%Y-%m-%d"))
avg_price <- mean(data$price_numeric, na.rm = TRUE)
median_price <- median(data$price_numeric, na.rm = TRUE)

info_df <- data.frame(
  항목 = c("총 게시글 수", "총 사용자 수", "데이터 기간", "평균 가격", "중앙 가격"),
  값 = c(
    format(total_posts, big.mark = ","),
    format(total_users, big.mark = ","),
    date_range,
    paste0(format(round(avg_price), big.mark = ","), "원"),
    paste0(format(round(median_price), big.mark = ","), "원")
  )
)

datatable(info_df, 
          options = list(dom = 't', pageLength = 10),
          rownames = FALSE)
```

### 🏆 상위 검출 브랜드 (Top 10)

```{r}
brand_counts <- data %>%
  filter(brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(10)

p <- plot_ly(brand_counts, 
             x = ~reorder(brand, n), 
             y = ~n,
             type = 'bar',
             marker = list(color = '#3498db'),
             text = ~paste0(n, "건"),
             textposition = 'outside') %>%
  layout(title = "",
         xaxis = list(title = ""),
         yaxis = list(title = "게시글 수"),
         margin = list(b = 100))

p
```

## Column {data-width=350}

### 💰 가격대별 게시글 분포 (나눔 포함)

```{r}
price_dist <- data %>%
  count(price_range) %>%
  mutate(percentage = n / sum(n) * 100)

# 색상 매핑 (나눔은 특별한 색으로)
colors <- c("나눔/무료" = "#e74c3c",
            "10만원 이하" = "#3498db",
            "10-30만원" = "#2ecc71", 
            "30-50만원" = "#f39c12",
            "50-100만원" = "#9b59b6",
            "100만원 이상" = "#e67e22")

p <- plot_ly(price_dist, 
             labels = ~price_range, 
             values = ~n,
             type = 'pie',
             marker = list(colors = colors[price_dist$price_range]),
             textinfo = 'label+percent',
             textposition = 'inside') %>%
  layout(title = "")

p
```

### 📂 카테고리별 분포 (Top 10)

```{r}
category_dist <- data %>%
  count(category_id, sort = TRUE) %>%
  head(10) %>%
  mutate(category_label = paste0("카테고리 ", category_id))

p <- plot_ly(category_dist,
             x = ~reorder(category_label, -n),
             y = ~n,
             type = 'bar',
             marker = list(color = '#2ecc71'),
             text = ~paste0(n, "건"),
             textposition = 'outside') %>%
  layout(title = "",
         xaxis = list(title = ""),
         yaxis = list(title = "게시글 수"))

p
```

# 브랜드 분석 {data-icon="fa-tags"}

## Column {data-width=500}

### 🏷️ 브랜드별 품목 분포

```{r}
# 상위 브랜드와 품목 조합
brand_item <- data %>%
  filter(brand != "기타" & item != "기타") %>%
  count(brand, item) %>%
  arrange(desc(n)) %>%
  head(50)

# 히트맵 생성
top_brands <- data %>%
  filter(brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(10) %>%
  pull(brand)

heatmap_data <- data %>%
  filter(brand %in% top_brands & item != "기타") %>%
  count(brand, item) %>%
  pivot_wider(names_from = item, values_from = n, values_fill = 0)

# plotly 히트맵
heatmap_matrix <- as.matrix(heatmap_data[,-1])
rownames(heatmap_matrix) <- heatmap_data$brand

p <- plot_ly(
  x = colnames(heatmap_matrix),
  y = rownames(heatmap_matrix),
  z = heatmap_matrix,
  type = "heatmap",
  colorscale = "Blues",
  text = heatmap_matrix,
  texttemplate = "%{z}",
  showscale = TRUE
) %>%
  layout(
    title = "브랜드 × 품목 히트맵",
    xaxis = list(title = "품목"),
    yaxis = list(title = "브랜드")
  )

p
```

### 📊 브랜드별 평균 가격

```{r}
brand_price <- data %>%
  filter(brand != "기타" & !is.na(price_numeric) & price_numeric > 0) %>%
  group_by(brand) %>%
  summarise(
    평균가격 = mean(price_numeric, na.rm = TRUE),
    중앙가격 = median(price_numeric, na.rm = TRUE),
    게시글수 = n()
  ) %>%
  arrange(desc(평균가격)) %>%
  head(15)

p <- plot_ly(brand_price, 
             x = ~reorder(brand, 평균가격), 
             y = ~평균가격,
             type = 'bar',
             name = '평균가격',
             marker = list(color = '#e74c3c')) %>%
  add_trace(y = ~중앙가격, 
            name = '중앙가격',
            marker = list(color = '#3498db')) %>%
  layout(title = "",
         xaxis = list(title = ""),
         yaxis = list(title = "가격 (원)"),
         barmode = 'group',
         margin = list(b = 100))

p
```

## Column {data-width=500}

### 🔍 브랜드별 주요 상품명 (Top 30)

```{r}
# 브랜드별 상품명 추출 (title에서)
extract_product_name <- function(df, target_brand) {
  brand_data <- df %>%
    filter(brand == target_brand) %>%
    select(title, content)
  
  # 간단한 상품명 추출 (첫 3-4 단어)
  product_names <- brand_data %>%
    mutate(product = str_extract(title, "^[가-힣a-zA-Z0-9\\s]{3,30}")) %>%
    count(product, sort = TRUE) %>%
    head(5)
  
  return(product_names)
}

# 상위 브랜드에 대해 상품명 추출
top_brands_for_products <- data %>%
  filter(brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(6) %>%
  pull(brand)

product_summary <- data.frame()

for (b in top_brands_for_products) {
  products <- extract_product_name(data, b)
  if(nrow(products) > 0) {
    products$brand <- b
    product_summary <- rbind(product_summary, products)
  }
}

# 테이블로 표시
product_display <- product_summary %>%
  select(brand, product, n) %>%
  rename(브랜드 = brand, 상품명 = product, 빈도 = n) %>%
  head(30)

datatable(product_display,
          options = list(pageLength = 15, dom = 'tip'),
          rownames = FALSE)
```

### 💎 고가 브랜드 분석 (50만원 이상)

```{r}
high_price_brand <- data %>%
  filter(!is.na(price_numeric) & price_numeric >= 500000) %>%
  count(brand) %>%
  arrange(desc(n)) %>%
  head(10)

p <- plot_ly(high_price_brand,
             labels = ~brand,
             values = ~n,
             type = 'pie',
             textinfo = 'label+percent',
             marker = list(colors = RColorBrewer::brewer.pal(10, "Set3"))) %>%
  layout(title = "")

p
```

# 신고자 분석 {data-icon="fa-user"}

## Column {data-width=600}

### 🚨 신고 다발 사용자 (Top 20)

```{r}
user_violations <- data %>%
  count(user_id, sort = TRUE) %>%
  head(20) %>%
  mutate(user_label = paste0("User ", user_id))

p <- plot_ly(user_violations,
             x = ~reorder(user_label, n),
             y = ~n,
             type = 'bar',
             marker = list(color = ~n,
                          colorscale = list(c(0, '#3498db'), c(1, '#e74c3c')),
                          showscale = TRUE),
             text = ~paste0(n, "건"),
             textposition = 'outside') %>%
  layout(title = "",
         xaxis = list(title = ""),
         yaxis = list(title = "신고 건수"),
         margin = list(b = 150))

p
```

### 📈 신고 빈도 분포

```{r}
violation_freq <- data %>%
  count(user_id) %>%
  mutate(frequency_group = case_when(
    n == 1 ~ "1회",
    n == 2 ~ "2회",
    n >= 3 & n < 5 ~ "3-4회",
    n >= 5 & n < 10 ~ "5-9회",
    n >= 10 & n < 20 ~ "10-19회",
    n >= 20 ~ "20회 이상"
  )) %>%
  count(frequency_group) %>%
  mutate(frequency_group = factor(frequency_group, 
                                  levels = c("1회", "2회", "3-4회", "5-9회", "10-19회", "20회 이상")))

p <- plot_ly(violation_freq,
             x = ~frequency_group,
             y = ~n,
             type = 'bar',
             marker = list(color = '#9b59b6'),
             text = ~paste0(n, "명"),
             textposition = 'outside') %>%
  layout(title = "사용자별 신고 횟수 분포",
         xaxis = list(title = "신고 횟수"),
         yaxis = list(title = "사용자 수"))

p
```

## Column {data-width=400}

### 👤 신고 통계 요약

```{r}
user_stats <- data %>%
  count(user_id) %>%
  summarise(
    `총 사용자 수` = n(),
    `1회 신고` = sum(n == 1),
    `2회 이상` = sum(n >= 2),
    `5회 이상` = sum(n >= 5),
    `10회 이상` = sum(n >= 10),
    `20회 이상` = sum(n >= 20)
  ) %>%
  pivot_longer(everything(), names_to = "구분", values_to = "인원수")

datatable(user_stats,
          options = list(dom = 't', pageLength = 10),
          rownames = FALSE)
```

### 🎯 다발 신고자의 주요 브랜드

```{r}
# 5회 이상 신고받은 사용자들의 브랜드 분포
frequent_violators <- data %>%
  count(user_id) %>%
  filter(n >= 5) %>%
  pull(user_id)

frequent_brands <- data %>%
  filter(user_id %in% frequent_violators & brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(10)

p <- plot_ly(frequent_brands,
             labels = ~brand,
             values = ~n,
             type = 'pie',
             textinfo = 'label+value',
             marker = list(colors = RColorBrewer::brewer.pal(10, "Paired"))) %>%
  layout(title = "다발 신고자(5회+)의 브랜드 분포")

p
```

### ⚠️ 고위험 사용자 상세 (Top 10)

```{r}
high_risk_users <- data %>%
  filter(user_id %in% head(user_violations$user_id, 10)) %>%
  group_by(user_id) %>%
  summarise(
    신고건수 = n(),
    주요브랜드 = names(sort(table(brand), decreasing = TRUE))[1],
    평균가격 = mean(price_numeric, na.rm = TRUE),
    최고가격 = max(price_numeric, na.rm = TRUE)
  ) %>%
  arrange(desc(신고건수)) %>%
  mutate(
    평균가격 = paste0(format(round(평균가격), big.mark = ","), "원"),
    최고가격 = paste0(format(round(최고가격), big.mark = ","), "원")
  ) %>%
  rename(사용자ID = user_id)

datatable(high_risk_users,
          options = list(pageLength = 10, dom = 'tip'),
          rownames = FALSE)
```

# 시계열 분석 {data-icon="fa-calendar"}

## Column {data-width=600}

### 📅 월별 신고 건수 추이

```{r}
monthly_trend <- data %>%
  mutate(year_month = format(created_at, "%Y-%m")) %>%
  count(year_month) %>%
  arrange(year_month)

p <- plot_ly(monthly_trend,
             x = ~year_month,
             y = ~n,
             type = 'scatter',
             mode = 'lines+markers',
             line = list(color = '#3498db', width = 2),
             marker = list(size = 6)) %>%
  layout(title = "",
         xaxis = list(title = "월", tickangle = -45),
         yaxis = list(title = "신고 건수"))

p
```

### 🔥 브랜드별 시계열 트렌드 (Top 5)

```{r}
top5_brands <- data %>%
  filter(brand != "기타") %>%
  count(brand, sort = TRUE) %>%
  head(5) %>%
  pull(brand)

brand_monthly <- data %>%
  filter(brand %in% top5_brands) %>%
  mutate(year_month = format(created_at, "%Y-%m")) %>%
  count(year_month, brand) %>%
  arrange(year_month)

p <- plot_ly()

for(b in top5_brands) {
  brand_data <- brand_monthly %>% filter(brand == b)
  p <- p %>% add_trace(
    data = brand_data,
    x = ~year_month,
    y = ~n,
    name = b,
    type = 'scatter',
    mode = 'lines+markers'
  )
}

p <- p %>% layout(
  title = "",
  xaxis = list(title = "월", tickangle = -45),
  yaxis = list(title = "신고 건수")
)

p
```

## Column {data-width=400}

### 📊 요일별 게시 패턴

```{r}
weekday_pattern <- data %>%
  mutate(weekday = weekdays(created_at)) %>%
  mutate(weekday = factor(weekday, 
                         levels = c("월요일", "화요일", "수요일", "목요일", "금요일", "토요일", "일요일"))) %>%
  count(weekday)

p <- plot_ly(weekday_pattern,
             x = ~weekday,
             y = ~n,
             type = 'bar',
             marker = list(color = '#f39c12')) %>%
  layout(title = "",
         xaxis = list(title = ""),
         yaxis = list(title = "게시글 수"))

p
```

### ⏰ 시간대별 게시 패턴

```{r}
hourly_pattern <- data %>%
  mutate(hour = as.numeric(format(created_at, "%H"))) %>%
  count(hour) %>%
  arrange(hour)

p <- plot_ly(hourly_pattern,
             x = ~hour,
             y = ~n,
             type = 'scatter',
             mode = 'lines+markers',
             fill = 'tozeroy',
             line = list(color = '#9b59b6'),
             marker = list(color = '#9b59b6')) %>%
  layout(title = "",
         xaxis = list(title = "시간 (0-23시)", dtick = 2),
         yaxis = list(title = "게시글 수"))

p
```

# 데이터 테이블 {data-icon="fa-table"}

## Column

### 📋 전체 데이터 (필터링 가능)

```{r}
display_data <- data %>%
  select(id, title, brand, item, price_numeric, user_id, created_at, category_id) %>%
  rename(
    게시글ID = id,
    제목 = title,
    브랜드 = brand,
    품목 = item,
    가격 = price_numeric,
    사용자ID = user_id,
    생성일 = created_at,
    카테고리 = category_id
  ) %>%
  mutate(
    가격 = ifelse(!is.na(가격), paste0(format(round(가격), big.mark = ","), "원"), "N/A"),
    생성일 = format(생성일, "%Y-%m-%d %H:%M")
  )

datatable(display_data,
          filter = 'top',
          options = list(
            pageLength = 25,
            scrollX = TRUE,
            autoWidth = TRUE
          ),
          rownames = FALSE)
```