전국 커피숖 규모 파악하기
#Step1. 빠르게 불러오기 위해 data.table 패키지 로드.data.frame 보다 빠르당.
library("data.table")
#step2. 데이터 로딩
DF<-fread("example_coffee.csv",header=T,stringsAsFactors = T,data.table = F)
#step3. 사업장규모만 별도 저장
Size = DF$sizeOfsite
#step4. 자료특성파악 summary 함수는 도수분포의 특성들을 알려줌. 평균,분산, 중간값, 결측수 등
summary(Size)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 28.12 50.00 75.53 93.75 24080.00 19
#step5. 그리기
plot(Size)
#step6. 아웃라이어 삭제
Size[Size > 10000] = NA
summary(Size)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 28.12 50.00 75.02 93.75 1406.00 20
#step7. 결측치 삭ㅋ제
Size[Size==0] = NA
Size = Size[complete.cases(Size)]
summary(Size)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.25 30.00 51.92 77.23 95.30 1406.00
#step8. 20단위로 계급을 만듬
DegreeOfSize = table(cut(Size,breaks = (0:72)*20))
DegreeOfSize
##
## (0,20] (20,40] (40,60]
## 6026 11303 8293
## (60,80] (80,100] (100,120]
## 5283 4239 2246
## (120,140] (140,160] (160,180]
## 1751 1297 959
## (180,200] (200,220] (220,240]
## 882 568 512
## (240,260] (260,280] (280,300]
## 394 331 347
## (300,320] (320,340] (340,360]
## 179 191 112
## (360,380] (380,400] (400,420]
## 105 94 81
## (420,440] (440,460] (460,480]
## 60 42 34
## (480,500] (500,520] (520,540]
## 32 18 16
## (540,560] (560,580] (580,600]
## 14 12 9
## (600,620] (620,640] (640,660]
## 9 5 4
## (660,680] (680,700] (700,720]
## 4 0 1
## (720,740] (740,760] (760,780]
## 1 2 4
## (780,800] (800,820] (820,840]
## 1 1 1
## (840,860] (860,880] (880,900]
## 0 0 0
## (900,920] (920,940] (940,960]
## 2 1 0
## (960,980] (980,1e+03] (1e+03,1.02e+03]
## 0 1 0
## (1.02e+03,1.04e+03] (1.04e+03,1.06e+03] (1.06e+03,1.08e+03]
## 0 2 0
## (1.08e+03,1.1e+03] (1.1e+03,1.12e+03] (1.12e+03,1.14e+03]
## 0 0 0
## (1.14e+03,1.16e+03] (1.16e+03,1.18e+03] (1.18e+03,1.2e+03]
## 0 0 0
## (1.2e+03,1.22e+03] (1.22e+03,1.24e+03] (1.24e+03,1.26e+03]
## 1 0 0
## (1.26e+03,1.28e+03] (1.28e+03,1.3e+03] (1.3e+03,1.32e+03]
## 0 0 0
## (1.32e+03,1.34e+03] (1.34e+03,1.36e+03] (1.36e+03,1.38e+03]
## 0 0 0
## (1.38e+03,1.4e+03] (1.4e+03,1.42e+03] (1.42e+03,1.44e+03]
## 0 1 0
#step9. 그래프그리기
library(ggplot2)

library(ggthemes)
ggplot(data = DF , aes(x=sizeOfsite)) + geom_freqpoly(binwidth=10,size=1.2,colour="orange") +
scale_x_continuous(limits=c(0,300), breaks = seq(0,300,20)) + theme_wsj()
