전국 커피숖 규모 파악하기

#Step1. 빠르게 불러오기 위해 data.table 패키지 로드.data.frame 보다 빠르당. 
library("data.table")

#step2. 데이터 로딩
DF<-fread("example_coffee.csv",header=T,stringsAsFactors = T,data.table = F)

#step3. 사업장규모만 별도 저장
Size = DF$sizeOfsite

#step4. 자료특성파악 summary 함수는 도수분포의 특성들을 알려줌. 평균,분산, 중간값, 결측수 등 
summary(Size)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##     0.00    28.12    50.00    75.53    93.75 24080.00       19
#step5. 그리기
plot(Size)

#step6. 아웃라이어 삭제
Size[Size > 10000] = NA
summary(Size)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   28.12   50.00   75.02   93.75 1406.00      20
#step7. 결측치 삭ㅋ제
Size[Size==0] = NA
Size = Size[complete.cases(Size)]
summary(Size)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.25   30.00   51.92   77.23   95.30 1406.00
#step8. 20단위로 계급을 만듬 
DegreeOfSize = table(cut(Size,breaks = (0:72)*20))
DegreeOfSize
## 
##              (0,20]             (20,40]             (40,60] 
##                6026               11303                8293 
##             (60,80]            (80,100]           (100,120] 
##                5283                4239                2246 
##           (120,140]           (140,160]           (160,180] 
##                1751                1297                 959 
##           (180,200]           (200,220]           (220,240] 
##                 882                 568                 512 
##           (240,260]           (260,280]           (280,300] 
##                 394                 331                 347 
##           (300,320]           (320,340]           (340,360] 
##                 179                 191                 112 
##           (360,380]           (380,400]           (400,420] 
##                 105                  94                  81 
##           (420,440]           (440,460]           (460,480] 
##                  60                  42                  34 
##           (480,500]           (500,520]           (520,540] 
##                  32                  18                  16 
##           (540,560]           (560,580]           (580,600] 
##                  14                  12                   9 
##           (600,620]           (620,640]           (640,660] 
##                   9                   5                   4 
##           (660,680]           (680,700]           (700,720] 
##                   4                   0                   1 
##           (720,740]           (740,760]           (760,780] 
##                   1                   2                   4 
##           (780,800]           (800,820]           (820,840] 
##                   1                   1                   1 
##           (840,860]           (860,880]           (880,900] 
##                   0                   0                   0 
##           (900,920]           (920,940]           (940,960] 
##                   2                   1                   0 
##           (960,980]         (980,1e+03]    (1e+03,1.02e+03] 
##                   0                   1                   0 
## (1.02e+03,1.04e+03] (1.04e+03,1.06e+03] (1.06e+03,1.08e+03] 
##                   0                   2                   0 
##  (1.08e+03,1.1e+03]  (1.1e+03,1.12e+03] (1.12e+03,1.14e+03] 
##                   0                   0                   0 
## (1.14e+03,1.16e+03] (1.16e+03,1.18e+03]  (1.18e+03,1.2e+03] 
##                   0                   0                   0 
##  (1.2e+03,1.22e+03] (1.22e+03,1.24e+03] (1.24e+03,1.26e+03] 
##                   1                   0                   0 
## (1.26e+03,1.28e+03]  (1.28e+03,1.3e+03]  (1.3e+03,1.32e+03] 
##                   0                   0                   0 
## (1.32e+03,1.34e+03] (1.34e+03,1.36e+03] (1.36e+03,1.38e+03] 
##                   0                   0                   0 
##  (1.38e+03,1.4e+03]  (1.4e+03,1.42e+03] (1.42e+03,1.44e+03] 
##                   0                   1                   0
#step9. 그래프그리기
library(ggplot2)

library(ggthemes)

ggplot(data = DF , aes(x=sizeOfsite)) + geom_freqpoly(binwidth=10,size=1.2,colour="orange") +
scale_x_continuous(limits=c(0,300), breaks = seq(0,300,20)) + theme_wsj()