```
---
title: "상표권 위반 명품 가품 게시글 종합 분석 대시보드"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: fill
theme: cosmo
source_code: embed
---
```{r setup, include=FALSE}
# 패키지 설치 및 로드
if (!require("flexdashboard")) install.packages("flexdashboard")
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("plotly")) install.packages("plotly")
if (!require("DT")) install.packages("DT")
if (!require("stringr")) install.packages("stringr")
library(flexdashboard)
library(tidyverse)
library(plotly)
library(DT)
library(stringr)
# 데이터 로드
data <- read.csv("bquxjob_4539bf34_19a07a988b2.csv",
encoding = "UTF-8",
stringsAsFactors = FALSE)
# 브랜드 매핑
brand_mapping <- list(
"샤넬" = c("샤넬", "chanel", "채널", "사넬"),
"루이비통" = c("루이비통", "루이", "louis vuitton", "lv", "비통"),
"구찌" = c("구찌", "gucci", "구씨"),
"에르메스" = c("에르메스", "hermes", "에루메스", "에르"),
"프라다" = c("프라다", "prada"),
"디올" = c("디올", "dior"),
"발렌시아가" = c("발렌시아가", "balenciaga", "발렌"),
"셀린" = c("셀린", "celine", "셀리느"),
"펜디" = c("펜디", "fendi"),
"버버리" = c("버버리", "burberry", "버버"),
"롤렉스" = c("롤렉스", "rolex", "롤"),
"까르띠에" = c("까르띠에", "cartier", "까르띠", "카르티에", "칼리브"),
"몽블랑" = c("몽블랑", "montblanc", "몽블"),
"오메가" = c("오메가", "omega"),
"보테가베네타" = c("보테가", "bottega", "베네타", "보떼가")
)
detect_brand <- function(title) {
title_lower <- tolower(title)
for (brand in names(brand_mapping)) {
keywords <- brand_mapping[[brand]]
for (keyword in keywords) {
if (str_detect(title_lower, tolower(keyword))) {
return(brand)
}
}
}
return("기타")
}
# 품목 키워드
item_keywords <- list(
"가방" = c("가방", "백", "bag", "크로스백", "숄더백", "토트백", "호보백", "클러치", "보스턴백", "백팩"),
"시계" = c("시계", "watch", "워치"),
"지갑" = c("지갑", "wallet", "월렛", "반지갑", "장지갑"),
"신발" = c("신발", "슈즈", "shoes", "스니커즈", "구두", "샌들", "슬리퍼", "운동화"),
"의류" = c("티셔츠", "셔츠", "후드", "자켓", "코트", "니트", "맨투맨", "원피스", "스커트", "바지", "청바지"),
"악세서리" = c("목걸이", "귀걸이", "팔찌", "반지", "머리핀", "헤어핀", "브로치", "키링"),
"선글라스" = c("선글라스", "안경", "썬글라스"),
"벨트" = c("벨트", "belt"),
"모자" = c("모자", "캡", "hat", "비니"),
"스카프" = c("스카프", "쁘띠", "쁘띠에스카", "스카프")
)
detect_item <- function(title) {
title_lower <- tolower(title)
for (item in names(item_keywords)) {
keywords <- item_keywords[[item]]
for (keyword in keywords) {
if (str_detect(title_lower, tolower(keyword))) {
return(item)
}
}
}
return("기타")
}
# 데이터 전처리
data$brand <- sapply(data$title, detect_brand)
data$item_type <- sapply(data$title, detect_item)
data_clean <- data %>%
filter(price > 1000 & price < 10000000,
brand != "기타",
item_type != "기타")
data_clean$price_range <- cut(data_clean$price,
breaks = c(0, 50000, 100000, 200000, 300000, 500000, Inf),
labels = c("5만원 이하", "5-10만원", "10-20만원",
"20-30만원", "30-50만원", "50만원 이상"))
# 통계 계산
total_posts <- nrow(data)
brand_identified <- sum(data$brand != "기타")
avg_price <- mean(data_clean$price, na.rm = TRUE)
brand_count <- data %>%
filter(brand != "기타") %>%
count(brand, sort = TRUE)
top_brand <- brand_count$brand[1]
```
## Row {data-height="150"}
### 총 게시글 {.value-box}
```{r}
valueBox(
value = format(total_posts, big.mark = ","),
icon = "fa-list",
color = "danger"
)
```
### 브랜드 식별 {.value-box}
```{r}
valueBox(
value = format(brand_identified, big.mark = ","),
icon = "fa-tag",
color = "primary"
)
```
### 평균 가격 {.value-box}
```{r}
valueBox(
value = paste0(round(avg_price/10000, 1), "만원"),
icon = "fa-won",
color = "success"
)
```
### TOP 브랜드 {.value-box}
```{r}
valueBox(
value = top_brand,
icon = "fa-crown",
color = "warning"
)
```
## Row {data-height="425"}
### 브랜드별 검출 빈도 TOP 8
```{r}
brand_top8 <- head(brand_count, 8)
p <- plot_ly(
data = brand_top8,
y = ~reorder(brand, n),
x = ~n,
type = "bar",
orientation = 'h',
marker = list(
color = c('#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0',
'#9966FF', '#FF9F40', '#FF6384', '#C9CBCF')
),
text = ~paste(format(n, big.mark = ","), "건"),
textposition = 'outside',
hoverinfo = 'text',
hovertext = ~paste(brand, ":", format(n, big.mark = ","), "건")
) %>%
layout(
xaxis = list(title = "게시글 수", showgrid = TRUE),
yaxis = list(title = ""),
font = list(family = "Apple SD Gothic Neo, Malgun Gothic", size = 13),
margin = list(l = 100, r = 50, t = 20, b = 50)
)
p
```
### 품목별 검출 빈도
```{r}
item_count <- data %>%
filter(item_type != "기타") %>%
count(item_type, sort = TRUE)
p <- plot_ly(
data = item_count,
y = ~reorder(item_type, n),
x = ~n,
type = "bar",
orientation = 'h',
marker = list(color = '#3498db'),
text = ~paste(format(n, big.mark = ","), "건"),
textposition = 'outside',
hoverinfo = 'text',
hovertext = ~paste(item_type, ":", format(n, big.mark = ","), "건")
) %>%
layout(
xaxis = list(title = "게시글 수", showgrid = TRUE),
yaxis = list(title = ""),
font = list(family = "Apple SD Gothic Neo, Malgun Gothic", size = 13),
margin = list(l = 100, r = 50, t = 20, b = 50)
)
p
```
## Row {data-height="425"}
### 가격대별 게시글 분포
```{r}
price_dist <- data_clean %>%
count(price_range)
p <- plot_ly(
data = price_dist,
x = ~price_range,
y = ~n,
type = "bar",
marker = list(
color = c('#FFF3CD', '#FFE5A1', '#FFCE56', '#FFA500', '#FF8C00', '#FF4500')
),
text = ~paste(format(n, big.mark = ","), "건"),
textposition = 'outside',
hoverinfo = 'text',
hovertext = ~paste(price_range, ":", format(n, big.mark = ","), "건")
) %>%
layout(
xaxis = list(title = "가격대"),
yaxis = list(title = "게시글 수"),
font = list(family = "Apple SD Gothic Neo, Malgun Gothic", size = 13),
margin = list(l = 50, r = 50, t = 20, b = 80)
)
p
```
### 품목별 평균 가격
```{r}
item_avg_price <- data_clean %>%
group_by(item_type) %>%
summarise(avg_price = mean(price, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_price))
p <- plot_ly(
data = item_avg_price,
y = ~reorder(item_type, avg_price),
x = ~avg_price,
type = "bar",
orientation = 'h',
marker = list(color = '#2ecc71'),
text = ~paste(round(avg_price/10000, 1), "만원"),
textposition = 'outside',
hoverinfo = 'text',
hovertext = ~paste(item_type, ":", format(round(avg_price), big.mark = ","), "원")
) %>%
layout(
xaxis = list(title = "평균 가격 (원)", showgrid = TRUE),
yaxis = list(title = ""),
font = list(family = "Apple SD Gothic Neo, Malgun Gothic", size = 13),
margin = list(l = 100, r = 50, t = 20, b = 50)
)
p
```
## Row {data-height="425"}
### 가품 의심 키워드 TOP 8
```{r}
suspect_keywords <- c("s급", "1대1", "미러급", "정품급", "sa급", "aa급",
"최상급", "오리지널", "명품급", "퀄리티")
keyword_counts <- sapply(suspect_keywords, function(keyword) {
sum(str_detect(tolower(data$title), tolower(keyword)))
})
keyword_df <- data.frame(
keyword = suspect_keywords,
count = keyword_counts
) %>%
arrange(desc(count)) %>%
head(8)
p <- plot_ly(
data = keyword_df,
y = ~reorder(keyword, count),
x = ~count,
type = "bar",
orientation = 'h',
marker = list(
color = c('#DC143C', '#E23744', '#E8574E', '#EE7758',
'#F49762', '#FAB76C', '#FFD776', '#FFF380')
),
text = ~paste(format(count, big.mark = ","), "회"),
textposition = 'outside',
hoverinfo = 'text',
hovertext = ~paste(keyword, ":", format(count, big.mark = ","), "회")
) %>%
layout(
xaxis = list(title = "출현 횟수", showgrid = TRUE),
yaxis = list(title = ""),
font = list(family = "Apple SD Gothic Neo, Malgun Gothic", size = 13),
margin = list(l = 100, r = 50, t = 20, b = 50)
)
p
```
### 브랜드 × 품목 조합 TOP 10
```{r}
brand_item_combo <- data %>%
filter(brand != "기타", item_type != "기타") %>%
count(brand, item_type, sort = TRUE) %>%
head(10) %>%
mutate(combo = paste(brand, "-", item_type))
p <- plot_ly(
data = brand_item_combo,
y = ~reorder(combo, n),
x = ~n,
type = "bar",
orientation = 'h',
marker = list(color = '#e74c3c'),
text = ~paste(format(n, big.mark = ","), "건"),
textposition = 'outside',
hoverinfo = 'text',
hovertext = ~paste(combo, ":", format(n, big.mark = ","), "건")
) %>%
layout(
xaxis = list(title = "게시글 수", showgrid = TRUE),
yaxis = list(title = ""),
font = list(family = "Apple SD Gothic Neo, Malgun Gothic", size = 13),
margin = list(l = 150, r = 50, t = 20, b = 50)
)
p
```
\`\`\`