library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(pander)

df <- read.csv('Small_Appliance_2018-04-09')

# 카테고리 순서 변경
df$category <- factor(df$category, c("Coffee Makers","Espresso Machines",
                "Toasters & Ovens","Blenders","Immersion Blenders",
                "Hand Mixers","Food Processors","Multi Cookers & Slow Cookers"))

1. 카테고리별 상품 수 및 가격대

ggplot(df, aes(category, price)) + 
        geom_jitter(width=0.25, alpha=0.6, size=3, col='steelblue') +
        coord_cartesian(ylim = c(0,2700))

s <- df %>% group_by(category) %>% summarise(count=length(product_name))
pander(s)
category count
Coffee Makers 26
Espresso Machines 70
Toasters & Ovens 58
Blenders 23
Immersion Blenders 4
Hand Mixers 5
Food Processors 15
Multi Cookers & Slow Cookers 10

2. 브랜드별 플레이 현황 분석

브랜드별

ggplot(df, aes(category, price, col=brand)) + 
        geom_jitter(width=0.25, alpha=0.6, size=3) +
        coord_cartesian(ylim = c(0,2700))

북미 로컬 브랜드 vs. 유러피언 브랜드 플레이 현황

ggplot(df, aes(category, price, col=region, shape=region)) + 
        geom_jitter(width=0.25, alpha=0.6, size=3) +
        coord_cartesian(ylim = c(0,2700))

3. 주요 프리미엄 브랜드의 주력 카테고리

# KitchenAid
ggplot(filter(df, brand == "KitchenAid"),
              aes(category, price, col=brand)) + 
              geom_jitter(width=0.1, alpha=0.6, size=3) + 
              coord_cartesian(ylim = c(0,2700)) +
                ggtitle("KitchenAid product line-up")

# Breville
ggplot(filter(df, brand == "Breville"),
              aes(category, price, col=brand)) + 
              geom_jitter(width=0.1, alpha=0.6, size=3) + 
              coord_cartesian(ylim = c(0,2700)) +
                ggtitle("KitchenAid product line-up")

# Wolf
ggplot(filter(df, brand == "Wolf"),
              aes(category, price, col=brand)) + 
              geom_jitter(width=0.1, alpha=0.6, size=3) + 
              coord_cartesian(ylim = c(0,2700)) +
                ggtitle("KitchenAid product line-up")

# De’Longhi
ggplot(filter(df, brand =="De’Longhi"),
              aes(category, price, col=brand)) + 
              geom_jitter(width=0.1, alpha=0.6, size=3) + 
              coord_cartesian(ylim = c(0,2700)) +
                ggtitle("KitchenAid product line-up")

# 해당 안되는 카테고리도 출력되게 하기. 

4. 브랜드 별 요약표

brand_summary <- df %>% 
        group_by(brand) %>% 
        summarise(num_category = length(unique(category)),
                  num_product = length(product_name),
                  mean_price = mean(price)) 

ggplot(brand_summary, aes(num_category, mean_price, label=brand)) + 
        geom_point(size=3) +
        geom_text(hjust=0, nudge_x=0.1, size=5)

# 다시 시각화
ggplot(brand_summary, aes(num_category, mean_price, label=brand, size=num_product)) + 
        geom_point(alpha=0.6) +
        geom_text(hjust=0, nudge_x=0.1, size=5) +
        scale_x_continuous(breaks=1:10) +
        ylim(0, 1000)
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_text).

To Do.브랜드 서머리에 제품 보유한 카테고리 리스트 추가하기.