선형회귀(linear regression)를 통한 inven 예측 R code2 source = 1.Btv_Preroll_인벤예측_요약본_20190705_2-확장테스트판(중요).R

if(!require(ggplot2)){install.packages("ggplot2"); library(ggplot2)}
## Loading required package: ggplot2
if(!require(dplyr)){install.packages("dplyr"); library(dplyr)}
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
###  ★★★★★★ 모든 변수명 지우기
rm(list =  ls())
gc()
##           used (Mb) gc trigger (Mb) max used (Mb)
## Ncells  588646 31.5    1212601 64.8  1212601 64.8
## Vcells 1068755  8.2    8388608 64.0  1753441 13.4
#getwd()
setwd("/home/mjs0428/inven_forcast")

btv_pre_roll <- read.csv("./Btv_Preroll-Req_Inv_Res_Imp-data_20190702.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE, fileEncoding = "euc-kr")
glimpse(btv_pre_roll)
## Observations: 548
## Variables: 7
## $ Date            <chr> "2018-01-01", "2018-01-02", "2018-01-03", "2018-…
## $ VOD_시청시간_초 <chr> "8,421,536,616", "7,878,348,464", "7,498,034,394", "7…
## $ VOD_시청건수    <chr> "4,496,473", "4,658,646", "4,535,338", "4,500,479", …
## $ REQ             <chr> "4,567,997", "4,630,415", "4,533,702", "4,490,70…
## $ INV             <chr> "7,377,096", "7,110,607", "6,932,244", "6,839,95…
## $ Res             <chr> "4,787,165", "4,391,051", "4,252,549", "4,101,29…
## $ Imp             <chr> "4,498,714", "4,141,349", "4,011,362", "3,872,03…
# 숫자형 데이터 변환
btv_pre_roll$REQ  <- as.numeric(gsub(",", "",  btv_pre_roll$REQ))
btv_pre_roll$INV <- as.numeric(gsub(",", "",  btv_pre_roll$INV))
btv_pre_roll$Res <- as.numeric(gsub(",", "",  btv_pre_roll$Res))
btv_pre_roll$Imp <- as.numeric(gsub(",", "",  btv_pre_roll$Imp))

btv_pre_roll$VOD_시청시간_초  <- round(as.numeric(gsub(",", "", btv_pre_roll$VOD_시청시간_초))/3600,0)
# 변수명 바꾸기
btv_pre_roll <- rename(btv_pre_roll, VOD_시청시간 = VOD_시청시간_초)
btv_pre_roll$VOD_시청건수  <- as.numeric(gsub(",", "",  btv_pre_roll$VOD_시청건수))
# 날짜형 타입으로 변환
btv_pre_roll$Date  <- as.Date(btv_pre_roll$Date)

### VOD 시청건수와 큰 차이 데이터 대체처리, 260만 미만 8개
btv_pre_roll$REQ_chg_flag = FALSE

arrange(btv_pre_roll %>% filter(REQ < 2650000), REQ)
##          Date VOD_시청시간 VOD_시청건수     REQ     INV     Res     Imp
## 1  2018-09-30      2135526      3896865  221664  297700  209140  554894
## 2  2018-07-02      1775243      3532602  736092 1189102  705927  709272
## 3  2018-10-01      1755646      3356570 2324932 2846820 2099119 1355917
## 4  2018-12-07      1807782      3567662 2466112 2872066 2032149 2852099
## 5  2018-02-16      1448428      2751726 2492996 4050293 3108664 2858587
## 6  2019-06-11      1328498      2600687 2577232 4380563 2897582 2809274
## 7  2018-12-06      1725200      3412828 2589182 2954937 2111337 2770486
## 8  2018-07-10      1709360      3476201 2590101 4002227 2311666 2309964
## 9  2019-05-31      1369062      2656316 2618025 4496329 2779988 2697892
## 10 2019-06-12      1340062      2657927 2628286 4491931 2957502 2867923
## 11 2019-05-30      1397194      2680261 2642520 4534099 2751991 2674374
##    REQ_chg_flag
## 1         FALSE
## 2         FALSE
## 3         FALSE
## 4         FALSE
## 5         FALSE
## 6         FALSE
## 7         FALSE
## 8         FALSE
## 9         FALSE
## 10        FALSE
## 11        FALSE
btv_pre_roll$REQ_chg_flag [btv_pre_roll$REQ < 2600000] <- TRUE
#btv_pre_roll$REQ [REQ < 2600000] <- round(btv_pre_roll$VOD_시청건수 [REQ < 2600000] * 0.98, 0)
btv_pre_roll$REQ [btv_pre_roll$REQ < 2600000] <- btv_pre_roll$VOD_시청건수 [btv_pre_roll$REQ < 2600000]
filter(btv_pre_roll, REQ_chg_flag == TRUE)
##         Date VOD_시청시간 VOD_시청건수     REQ     INV     Res     Imp
## 1 2018-02-16      1448428      2751726 2751726 4050293 3108664 2858587
## 2 2018-07-02      1775243      3532602 3532602 1189102  705927  709272
## 3 2018-07-10      1709360      3476201 3476201 4002227 2311666 2309964
## 4 2018-09-30      2135526      3896865 3896865  297700  209140  554894
## 5 2018-10-01      1755646      3356570 3356570 2846820 2099119 1355917
## 6 2018-12-06      1725200      3412828 3412828 2954937 2111337 2770486
## 7 2018-12-07      1807782      3567662 3567662 2872066 2032149 2852099
## 8 2019-06-11      1328498      2600687 2600687 4380563 2897582 2809274
##   REQ_chg_flag
## 1         TRUE
## 2         TRUE
## 3         TRUE
## 4         TRUE
## 5         TRUE
## 6         TRUE
## 7         TRUE
## 8         TRUE
###2018-05-15 ~ 2018-06-12까지 이상 데이터 대체 처리 >> 29개
btv_pre_roll$REQ_chg_flag [btv_pre_roll$Date >= '2018-05-15' & btv_pre_roll$Date <= '2018-06-12'] <- TRUE
btv_pre_roll$REQ [btv_pre_roll$Date >= '2018-05-15' & btv_pre_roll$Date <= '2018-06-12'] <- 
  btv_pre_roll$VOD_시청건수 [btv_pre_roll$Date >= '2018-05-15' & btv_pre_roll$Date <= '2018-06-12']

## btv_pre_roll$REQ/btv_pre_roll$VOD_시청건수 ## >>> VOD_시청건수를 가지고 예측 후에 비교분석하여 나중에 검토할 것...
## boxplot(btv_pre_roll$REQ/btv_pre_roll$VOD_시청건수)
## summary(btv_pre_roll$REQ/btv_pre_roll$VOD_시청건수)
## filter(btv_pre_roll, btv_pre_roll$REQ/btv_pre_roll$VOD_시청건수 < 0.9 | btv_pre_roll$REQ/btv_pre_roll$VOD_시청건수  > 1.2)

ggplot(btv_pre_roll, aes(x=Date, y=REQ)) +
  geom_point() + geom_smooth(method = 'lm', color = 'red', linetype =2) + # 직선 추세선 추가
  geom_smooth() + # 곡선 추세선 추가
  ggtitle("Raw Data - after correction")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# 요일  구하기
btv_pre_roll$weekday = factor(weekdays(btv_pre_roll$Date), 
                              levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
summary(btv_pre_roll$weekday)
##    Monday   Tuesday Wednesday  Thursday    Friday  Saturday    Sunday 
##        79        79        78        78        78        78        78
# 월(month)  구하기
btv_pre_roll$month <-factor(substr(btv_pre_roll$Date,6,7),
                            levels = c('01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'))
summary(btv_pre_roll$month)
## 01 02 03 04 05 06 07 08 09 10 11 12 
## 62 56 62 60 62 60 33 31 30 31 30 31
length(btv_pre_roll$month)
## [1] 548
# 특일 구하기
# 참고 페이지 : https://m.blog.naver.com/hancury/221057426711
# 사이트 : https://www.data.go.kr/
# 메뉴명 : 마이페이지 > OPEN API > 개발계정 상세보기
#인증키 "SD5uTucKQxefBuN79R8TKiBa15fAbw5j1id%2FdBH0SsWFDK4boXqDpgMPtc8QJ8UNyaSO8HpGJf%2FAbh65oBzg7g%3D%3D"
if(!require(glue)){install.packages("glue"); library(glue)}
## Loading required package: glue
## 
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
## 
##     collapse
if(!require(XML)){install.packages("XML"); library(XML)}
## Loading required package: XML
if(!require(stringr)){install.packages("stringr"); library(stringr)}
## Loading required package: stringr
api.key <- "SD5uTucKQxefBuN79R8TKiBa15fAbw5j1id%2FdBH0SsWFDK4boXqDpgMPtc8QJ8UNyaSO8HpGJf%2FAbh65oBzg7g%3D%3D"

url.format <- 
  'http://apis.data.go.kr/B090041/openapi/service/SpcdeInfoService/getRestDeInfo?ServiceKey={key}&solYear={year}&solMonth={month}'

holiday.request <- function(key, year, month) glue(url.format)

df_holiday <- data.frame(dateName=NULL, Date=NULL)

for(m in 1:12){
  holiday_2018 <- xmlToList(holiday.request(api.key, 2018, str_pad(m, 2, pad=0)))
  holiday_2019 <- xmlToList(holiday.request(api.key, 2019, str_pad(m, 2, pad=0)))
  
  items_2018 <- holiday_2018$body$items
  items_2019 <- holiday_2019$body$items
  items_test <- holiday_2018$body$items
  items_test <- holiday_2019$body$items
  
  for(item_2018 in items_2018){
    if(item_2018$isHoliday == 'Y') {
      #print(paste(item_2018$dateName, item_2018$locdate, sep=' : '))
      df_holiday <- rbind(df_holiday, 
                          data.frame(dateName = item_2018$dateName, 
                                     Date = (paste(substr(item_2018$locdate,1,4),
                                                   substr(item_2018$locdate,5,6), 
                                                   substr(item_2018$locdate,7,8), sep = '-')),
                                     stringsAsFactors = FALSE))
    }
  }
  
  for(item_2019 in items_2019){
    if(item_2019$isHoliday == 'Y') {
      #print(paste(item_2019$dateName, item_2019$locdate, sep=' : '))
      df_holiday <- rbind(df_holiday, 
                          data.frame(dateName = item_2019$dateName, 
                                     Date = (paste(substr(item_2019$locdate,1,4),
                                                   substr(item_2019$locdate,5,6), 
                                                   substr(item_2019$locdate,7,8), sep = '-')),
                                     stringsAsFactors = FALSE))
    }
  }
  
}

# 날짜 데이터의 데이터 타입 변환
df_holiday$Date <- as.Date(df_holiday$Date)

### Left 조인하기 : 광고 데이터 + 특일 데이터..
btv_pre_roll <- left_join(btv_pre_roll, df_holiday)
## Joining, by = "Date"
# 특일 데이터 범주화
btv_pre_roll <- mutate(btv_pre_roll, isHolyday = as.numeric(!is.na(dateName)))

# 필요한 데이터 잘라내기
#head(btv_pre_roll); tail(btv_pre_roll)
btv_pre_roll2<- filter(btv_pre_roll, Date <= '2019-04-30')
btv_pre_roll3 <- filter(btv_pre_roll, Date <= '2019-05-31')
#tail(btv_pre_roll2); tail(btv_pre_roll3)

test_btv_pre_roll2 <- select(btv_pre_roll2, Date, REQ, weekday, month, isHolyday)
test_btv_pre_roll3 <- select(btv_pre_roll3, Date, REQ, weekday, month, isHolyday)
#tail(test_btv_pre_roll2); tail(test_btv_pre_roll3)

# 예측 Dataset 생성
# 예측할 날짜로 된 수열 생성하기

s_date2 <- as.Date("2019-05-01")
e_date2 <- as.Date("2019-05-31")
s_date3 <- as.Date("2019-06-01")
e_date3 <- as.Date("2019-06-30")
add_df2 <- data.frame(Date = seq(from = s_date2, to=e_date2, by=1))
add_df2$Date <- as.Date(add_df2$Date); add_df2$REQ <- NA; add_df2$weekday <- NA; add_df2$month <- NA; add_df2$isHolyday <- NA; 
add_df3 <- data.frame(Date = seq(from = s_date3, to=e_date3, by=1))
add_df3$Date <- as.Date(add_df3$Date); add_df3$REQ <- NA; add_df3$weekday <- NA; add_df3$month <- NA; add_df3$isHolyday <- NA;

# 요일  구하기
#Sys.setlocale("LC_TIME", "English")

#as.character.Date(weekdays(as.Date(add_df2$Date)))
#as.character(weekdays(as.Date('2019-10-01')))
#localeToCharset()
#as.character(weekdays(as.Date(add_df2$Date)))


add_df2$weekday = factor(weekdays(add_df2$Date), 
                         levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
summary(add_df2$weekday)
##    Monday   Tuesday Wednesday  Thursday    Friday  Saturday    Sunday 
##         4         4         5         5         5         4         4
add_df3$weekday = factor(weekdays(add_df3$Date), 
                         levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
summary(add_df3$weekday)
##    Monday   Tuesday Wednesday  Thursday    Friday  Saturday    Sunday 
##         4         4         4         4         4         5         5
# 월(month)  구하기
add_df2$month <-factor(substr(add_df2$Date,6,7),
                       levels = c('01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'))
summary(add_df2$month)
## 01 02 03 04 05 06 07 08 09 10 11 12 
##  0  0  0  0 31  0  0  0  0  0  0  0
add_df3$month <-factor(substr(add_df3$Date,6,7),
                       levels = c('01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'))
summary(add_df3$month)
## 01 02 03 04 05 06 07 08 09 10 11 12 
##  0  0  0  0  0 30  0  0  0  0  0  0
# 특일 데이터 범주화 추가
add_df2$isHolyday <- as.numeric(!is.na(left_join(add_df2, df_holiday)$dateName))
## Joining, by = "Date"
add_df3$isHolyday <- as.numeric(!is.na(left_join(add_df3, df_holiday)$dateName))
## Joining, by = "Date"
### 숙제 :: 특일 데이터가 몇개인지 세어볼 것???

# 기존 데이터와 예측치 결합
test_btv_pre_roll2 <- rbind(test_btv_pre_roll2, add_df2)
test_btv_pre_roll3 <- rbind(test_btv_pre_roll3, add_df3)

#View(filter(test_btv_pre_roll2, Date >= '2019-04-01'))
#View(filter(test_btv_pre_roll3, Date >= '2019-05-01'))

### 모델 평가하기as.numeric()
## 원래는 training dataset와 test dataset를 나눠서 테스트 해야 하지만...
## 표본이 작은 관계로..  관측치 데이터 전체를 사용하여 평가함...

# several models
model1 = lm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) + 
              weekday:month + weekday:isHolyday + weekday:month:isHolyday, 
            data = btv_pre_roll2)

model2 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) + 
               weekday:month + weekday:isHolyday + weekday:month:isHolyday, 
             family = gaussian(link =  'identity'), data = btv_pre_roll2)

model3 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) + 
               weekday:month + weekday:isHolyday + weekday:month:isHolyday, 
             family = Gamma(link =  'inverse'), data = btv_pre_roll2)

model4 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) + 
               weekday:month + weekday:isHolyday + weekday:month:isHolyday, 
             family = inverse.gaussian(link =  '1/mu^2'), data = btv_pre_roll2)

#test_btv_pre_roll3
model1_2 = lm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) + 
                weekday:month + weekday:isHolyday + weekday:month:isHolyday, 
              data = btv_pre_roll3)

model2_2 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) + 
                 weekday:month + weekday:isHolyday + weekday:month:isHolyday, 
               family = gaussian(link =  'identity'), data = btv_pre_roll3)

model3_2 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) + 
                 weekday:month + weekday:isHolyday + weekday:month:isHolyday, 
               family = Gamma(link =  'identity'), data = btv_pre_roll3)

model4_2 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) + 
                 weekday:month + weekday:isHolyday + weekday:month:isHolyday, 
               family = inverse.gaussian(link =  '1/mu^2'), data = btv_pre_roll3)

# 점/구간 추정 예측
m1_c_interval <- predict(model1, newdata=test_btv_pre_roll2, type='response', interval="confidence", level = 0.99)
## Warning in predict.lm(model1, newdata = test_btv_pre_roll2, type =
## "response", : prediction from a rank-deficient fit may be misleading
m1_p_interval <- predict(model1, newdata=test_btv_pre_roll2, type='response', interval="prediction")
## Warning in predict.lm(model1, newdata = test_btv_pre_roll2, type =
## "response", : prediction from a rank-deficient fit may be misleading
m2_c_interval <- predict(model2, newdata=test_btv_pre_roll2, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m2_p_interval <- predict(model2, newdata=test_btv_pre_roll2, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m3_c_interval <- predict(model3, newdata=test_btv_pre_roll2, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m3_p_interval <- predict(model3, newdata=test_btv_pre_roll2, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m4_c_interval <- predict(model4, newdata=test_btv_pre_roll2, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m4_p_interval <- predict(model4, newdata=test_btv_pre_roll2, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
test_btv_pre_roll2$m1_fit <- m1_c_interval[,1] # fit
test_btv_pre_roll2$m1_cnf_lwr <- m1_c_interval[,2] # confience lower
test_btv_pre_roll2$m1_cnf_upr <- m1_c_interval[,3] # confience upper
test_btv_pre_roll2$m1_prd_lwr <- m1_p_interval[,2] # prediction lower
test_btv_pre_roll2$m1_prd_upr <- m1_p_interval[,3] # prediction upper

test_btv_pre_roll2$m2_fit <- m2_c_interval # fit

test_btv_pre_roll2$m3_fit <- m3_c_interval # fit

test_btv_pre_roll2$m4_fit <- m4_c_interval # fit

#test_btv_pre_roll2 %>% filter(Date >= '2019-05-01' & Date <= '2019-05-31') %>%
#  summarise(sum(m1_cnf_lwr), sum(m1_fit), sum(m1_cnf_upr), sum(m2_cnf_lwr), sum(m2_fit), sum(m2_cnf_upr), sum(m3_cnf_lwr), sum(m3_fit), sum(m3_cnf_upr), sum(m4_cnf_lwr), sum(m4_fit), sum(m4_cnf_upr))

test_btv_pre_roll2 %>% filter(Date >= '2019-05-01' & Date <= '2019-05-31') %>%
  summarise(sum(m1_cnf_lwr), sum(m1_fit), sum(m1_cnf_upr), sum(m2_fit), sum(m3_fit), sum(m4_fit))  
##   sum(m1_cnf_lwr) sum(m1_fit) sum(m1_cnf_upr) sum(m2_fit) sum(m3_fit)
## 1        94388014   105831672       117275329   105831672   105543903
##   sum(m4_fit)
## 1   105504004
pp <- ggplot(test_btv_pre_roll2, aes(x=Date))
pp2 <- pp + geom_point(aes(y = REQ), color = "black", alpha = 0.3)
pp3 <- pp2 + geom_point(aes(y = m1_fit), color = "red", alpha = 0.3)
pp4 <- pp2 + geom_point(aes(y = m2_c_interval), color = "red", alpha = 0.3)
pp5 <- pp2 + geom_point(aes(y = m3_c_interval), color = "red", alpha = 0.3)
pp6 <- pp2 + geom_point(aes(y = m4_c_interval), color = "red", alpha = 0.3)




#test_btv_pre_roll3
# 점/구간 추정 예측
m1_c_interval2 <- predict(model1_2, newdata=test_btv_pre_roll3, type='response', interval="confidence", level = 0.99)
## Warning in predict.lm(model1_2, newdata = test_btv_pre_roll3, type =
## "response", : prediction from a rank-deficient fit may be misleading
m1_p_interval2 <- predict(model1_2, newdata=test_btv_pre_roll3, type='response', interval="prediction")
## Warning in predict.lm(model1_2, newdata = test_btv_pre_roll3, type =
## "response", : prediction from a rank-deficient fit may be misleading
m2_c_interval2 <- predict(model2_2, newdata=test_btv_pre_roll3, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m2_p_interval2 <- predict(model2_2, newdata=test_btv_pre_roll3, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m3_c_interval2 <- predict(model3_2, newdata=test_btv_pre_roll3, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m3_p_interval2 <- predict(model3_2, newdata=test_btv_pre_roll3, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m4_c_interval2 <- predict(model4_2, newdata=test_btv_pre_roll3, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m4_p_interval2 <- predict(model4_2, newdata=test_btv_pre_roll3, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
test_btv_pre_roll3$m1_fit <- m1_c_interval2[,1] # fit
test_btv_pre_roll3$m1_cnf_lwr <- m1_c_interval2[,2] # confience lower
test_btv_pre_roll3$m1_cnf_upr <- m1_c_interval2[,3] # confience upper
test_btv_pre_roll3$m1_prd_lwr <- m1_p_interval2[,2] # prediction lower
test_btv_pre_roll3$m1_prd_upr <- m1_p_interval2[,3] # prediction upper

test_btv_pre_roll3$m2_fit <- m2_c_interval2 # fit

test_btv_pre_roll3$m3_fit <- m3_c_interval2 # fit

test_btv_pre_roll3$m4_fit <- m4_c_interval2 # fit

#test_btv_pre_roll3 %>% filter(Date >= '2019-06-01' & Date <= '2019-06-30') %>%
#  summarise(sum(m1_cnf_lwr), sum(m1_fit), sum(m1_cnf_upr), sum(m2_cnf_lwr), sum(m2_fit), sum(m2_cnf_upr), sum(m3_cnf_lwr), sum(m3_fit), sum(m3_cnf_upr), sum(m4_cnf_lwr), sum(m4_fit), sum(m4_cnf_upr))

test_btv_pre_roll3 %>% filter(Date >= '2019-06-01' & Date <= '2019-06-30') %>%
  summarise(sum(m1_prd_lwr), sum(m1_fit), sum(m1_prd_upr), sum(m2_fit), sum(m3_fit), sum(m4_fit))  
##   sum(m1_prd_lwr) sum(m1_fit) sum(m1_prd_upr) sum(m2_fit) sum(m3_fit)
## 1        73443787    88901594       104359400    88901594    86832119
##   sum(m4_fit)
## 1    90640824
pp <- ggplot(test_btv_pre_roll3, aes(x=Date))
pp2 <- pp + geom_point(aes(y = REQ), color = "black", alpha = 0.3)
pp3 <- pp2 + geom_point(aes(y = m1_fit), color = "blue", alpha = 0.3)
pp4 <- pp2 + geom_point(aes(y = m2_c_interval2), color = "blue", alpha = 0.3)
pp5 <- pp2 + geom_point(aes(y = m3_c_interval2), color = "blue", alpha = 0.3)
pp6 <- pp2 + geom_point(aes(y = m4_c_interval2), color = "blue", alpha = 0.3)


filter(test_btv_pre_roll2, Date >= '2019-04-01')
##          Date     REQ   weekday month isHolyday  m1_fit m1_cnf_lwr
## 1  2019-04-01 3282152    Monday    04         0 3289837    3104535
## 2  2019-04-02 3184513   Tuesday    04         0 3211098    3017734
## 3  2019-04-03 3160823 Wednesday    04         0 3203076    2996555
## 4  2019-04-04 3087324  Thursday    04         0 3209590    3002911
## 5  2019-04-05 3107286    Friday    04         0 3239256    3032415
## 6  2019-04-06 3981096  Saturday    04         0 3908532    3701523
## 7  2019-04-07 3946311    Sunday    04         0 3914475    3716812
## 8  2019-04-08 3210087    Monday    04         0 3284184    3097018
## 9  2019-04-09 3292649   Tuesday    04         0 3205430    3010556
## 10 2019-04-10 3232678 Wednesday    04         0 3197391    2989044
## 11 2019-04-11 3185824  Thursday    04         0 3203888    2995332
## 12 2019-04-12 3153292    Friday    04         0 3233538    3024769
## 13 2019-04-13 3815088  Saturday    04         0 3902798    3693809
## 14 2019-04-14 4119517    Sunday    04         0 3908725    3708720
## 15 2019-04-15 3175054    Monday    04         0 3278418    3088899
## 16 2019-04-16 3068774   Tuesday    04         0 3199647    3002791
## 17 2019-04-17 3041754 Wednesday    04         0 3191592    2980966
## 18 2019-04-18 3101583  Thursday    04         0 3198073    2987184
## 19 2019-04-19 3117815    Friday    04         0 3227707    3016549
## 20 2019-04-20 3761864  Saturday    04         0 3896950    3685518
## 21 2019-04-21 3846860    Sunday    04         0 3902861    3700029
## 22 2019-04-22 3138914    Monday    04         0 3272538    3080151
## 23 2019-04-23 3122087   Tuesday    04         0 3193750    2994413
## 24 2019-04-24 3025289 Wednesday    04         0 3185679    2972299
## 25 2019-04-25 3144534  Thursday    04         0 3192144    2978442
## 26 2019-04-26 3217561    Friday    04         0 3221762    3007733
## 27 2019-04-27 3636698  Saturday    04         0 3890988    3676627
## 28 2019-04-28 3714765    Sunday    04         0 3896883    3690716
## 29 2019-04-29 3187055    Monday    04         0 3266544    3070756
## 30 2019-04-30 3024436   Tuesday    04         0 3187740    2985397
## 31 2019-05-01      NA Wednesday    05         0 3290099    2992034
## 32 2019-05-02      NA  Thursday    05         0 3214975    2915944
## 33 2019-05-03      NA    Friday    05         0 3150689    2825612
## 34 2019-05-04      NA  Saturday    05         0 3987377    3621653
## 35 2019-05-05      NA    Sunday    05         1 3497715    2783210
## 36 2019-05-06      NA    Monday    05         1 3942262    3352082
## 37 2019-05-07      NA   Tuesday    05         0 3395649    3068187
## 38 2019-05-08      NA Wednesday    05         0 3283958    2980684
## 39 2019-05-09      NA  Thursday    05         0 3208818    2904503
## 40 2019-05-10      NA    Friday    05         0 3144516    2814550
## 41 2019-05-11      NA  Saturday    05         0 3981187    3610974
## 42 2019-05-12      NA    Sunday    05         1 3491509    2774700
## 43 2019-05-13      NA    Monday    05         0 3297765    2925762
## 44 2019-05-14      NA   Tuesday    05         0 3389411    3056860
## 45 2019-05-15      NA Wednesday    05         0 3277703    2968809
## 46 2019-05-16      NA  Thursday    05         0 3202547    2892538
## 47 2019-05-17      NA    Friday    05         0 3138228    2802981
## 48 2019-05-18      NA  Saturday    05         0 3974883    3599814
## 49 2019-05-19      NA    Sunday    05         0 3742575    3405213
## 50 2019-05-20      NA    Monday    05         0 3291428    2914434
## 51 2019-05-21      NA   Tuesday    05         0 3383058    3045018
## 52 2019-05-22      NA Wednesday    05         0 3271334    2956404
## 53 2019-05-23      NA  Thursday    05         0 3196161    2880040
## 54 2019-05-24      NA    Friday    05         0 3131827    2790893
## 55 2019-05-25      NA  Saturday    05         0 3968465    3588160
## 56 2019-05-26      NA    Sunday    05         0 3736141    3392946
## 57 2019-05-27      NA    Monday    05         0 3284977    2902611
## 58 2019-05-28      NA   Tuesday    05         0 3376591    3032653
## 59 2019-05-29      NA Wednesday    05         0 3264851    2943460
## 60 2019-05-30      NA  Thursday    05         0 3189662    2867004
## 61 2019-05-31      NA    Friday    05         0 3125311    2778279
##    m1_cnf_upr m1_prd_lwr m1_prd_upr  m2_fit  m3_fit  m4_fit
## 1     3475138    2835668    3744006 3289837 3297556 3301685
## 2     3404463    2754995    3667202 3211098 3220428 3224994
## 3     3409597    2743658    3662494 3203076 3213765 3219095
## 4     3416268    2750130    3669049 3209590 3219968 3225213
## 5     3446097    2779755    3698757 3239256 3248511 3253367
## 6     4115540    3448987    4368076 3908532 3891653 3882855
## 7     4112138    3457310    4371640 3914475 3895171 3885206
## 8     3471350    2829575    3738794 3284184 3291730 3296027
## 9     3400303    2748956    3661903 3205430 3214852 3219702
## 10    3405738    2737497    3657284 3197391 3208194 3213814
## 11    3412444    2743940    3663836 3203888 3214357 3219883
## 12    3442308    2773534    3693542 3233538 3242782 3247879
## 13    4111786    3442736    4362859 3902798 3883406 3873502
## 14    4108729    3450973    4366477 3908725 3886882 3875804
## 15    3467937    2823246    3733589 3278418 3285788 3290262
## 16    3396503    2742683    3656611 3199647 3209166 3214309
## 17    3402217    2731100    3652084 3191592 3202513 3208433
## 18    3408962    2737512    3658634 3198073 3208636 3214453
## 19    3438864    2767074    3688339 3227707 3236940 3242287
## 20    4108382    3436245    4357655 3896950 3875005 3863996
## 21    4105693    3444391    4361330 3902861 3878438 3866249
## 22    3464924    2816673    3728403 3272538 3279733 3284391
## 23    3393088    2736166    3651335 3193750 3203372 3208816
## 24    3399060    2724456    3646902 3185679 3196724 3202952
## 25    3405846    2730835    3653453 3192144 3202806 3208923
## 26    3435791    2760365    3683158 3221762 3230988 3236594
## 27    4105350    3429503    4352474 3890988 3866452 3854339
## 28    4103050    3437556    4356209 3896883 3869843 3856544
## 29    3462332    2809844    3723243 3266544 3273566 3278415
## 30    3390083    2729396    3646084 3187740 3197469 3203225
## 31    3588164    2802544    3777654 3290099 3297857 3304662
## 32    3514005    2727079    3702871 3214975 3229092 3238443
## 33    3475766    2653277    3648102 3150689 3170566 3182036
## 34    4353101    3473941    4500812 3987377 3925994 3896607
## 35    4212219    2804214    4191216 3497715 3452166 3436462
## 36    4532441    3319864    4564659 3942262 3886326 3860226
## 37    3723111    2897335    3893963 3395649 3393262 3395940
## 38    3587233    2794553    3773363 3283958 3291446 3298398
## 39    3513132    2719041    3698595 3208818 3222926 3232529
## 40    3474481    2645250    3643782 3144516 3164603 3176408
## 41    4351399    3465899    4496475 3981187 3916828 3886253
## 42    4208318    2796638    4186380 3491509 3445055 3429332
## 43    3669767    2781734    3813795 3297765 3302745 3308953
## 44    3721962    2889156    3889665 3389411 3386351 3389015
## 45    3586597    2786276    3769131 3277703 3284923 3292032
## 46    3512555    2710714    3694379 3202547 3216653 3226519
## 47    3473476    2636937    3639520 3138228 3158537 3170686
## 48    4349952    3457573    4492193 3974883 3907512 3875757
## 49    4079938    3240466    4244685 3742575 3701131 3684844
## 50    3668422    2773312    3809544 3291428 3296080 3302427
## 51    3721098    2880686    3885430 3383058 3379324 3381984
## 52    3586265    2777701    3764967 3271334 3278291 3285567
## 53    3512282    2702090    3690233 3196161 3210275 3220413
## 54    3472761    2628328    3635326 3131827 3152369 3164874
## 55    4348770    3448955    4487975 3968465 3898049 3865123
## 56    4079336    3231757    4240525 3736141 3692615 3675675
## 57    3667344    2764595    3805360 3284977 3289305 3295802
## 58    3720529    2871915    3881267 3376591 3372182 3374848
## 59    3586242    2768822    3760880 3264851 3271550 3279004
## 60    3512319    2693159    3686165 3189662 3203792 3214213
## 61    3472342    2619415    3631207 3125311 3146100 3158972
filter(test_btv_pre_roll3, Date >= '2019-05-01')
##          Date     REQ   weekday month isHolyday  m1_fit m1_cnf_lwr
## 1  2019-05-01 3616125 Wednesday    05         0 3006488    2818096
## 2  2019-05-02 2955011  Thursday    05         0 2901835    2713319
## 3  2019-05-03 2907704    Friday    05         0 2870009    2673210
## 4  2019-05-04 3236108  Saturday    05         0 3570458    3348141
## 5  2019-05-05 3036148    Sunday    05         1 3269726    2860513
## 6  2019-05-06 3660025    Monday    05         1 3690200    3277719
## 7  2019-05-07 2868780   Tuesday    05         0 2982781    2772528
## 8  2019-05-08 2703155 Wednesday    05         0 2986413    2796487
## 9  2019-05-09 2747889  Thursday    05         0 2881690    2691607
## 10 2019-05-10 2807466    Friday    05         0 2849793    2651702
## 11 2019-05-11 3451599  Saturday    05         0 3550172    3326644
## 12 2019-05-12 3482948    Sunday    05         1 3249370    2840156
## 13 2019-05-13 2890072    Monday    05         0 2999994    2757674
## 14 2019-05-14 2775220   Tuesday    05         0 2962283    2750477
## 15 2019-05-15 2806413 Wednesday    05         0 2965844    2774077
## 16 2019-05-16 2800079  Thursday    05         0 2861050    2669092
## 17 2019-05-17 2819470    Friday    05         0 2829083    2629403
## 18 2019-05-18 3544076  Saturday    05         0 3529391    3304385
## 19 2019-05-19 3722838    Sunday    05         0 3508866    3260035
## 20 2019-05-20 2884655    Monday    05         0 2979072    2735158
## 21 2019-05-21 2742861   Tuesday    05         0 2941290    2727641
## 22 2019-05-22 2691009 Wednesday    05         0 2944781    2750852
## 23 2019-05-23 2695802  Thursday    05         0 2839916    2645759
## 24 2019-05-24 2752395    Friday    05         0 2807879    2606296
## 25 2019-05-25 3354832  Saturday    05         0 3508116    3281347
## 26 2019-05-26 3359779    Sunday    05         0 3487520    3236409
## 27 2019-05-27 3006505    Monday    05         0 2957656    2711878
## 28 2019-05-28 2680322   Tuesday    05         0 2919803    2704003
## 29 2019-05-29 2682910 Wednesday    05         0 2923222    2726796
## 30 2019-05-30 2642520  Thursday    05         0 2818287    2621594
## 31 2019-05-31 2618025    Friday    05         0 2786179    2582366
## 32 2019-06-01      NA  Saturday    06         0 3441481    3146587
## 33 2019-06-02      NA    Sunday    06         0 3366607    3044361
## 34 2019-06-03      NA    Monday    06         0 2782393    2459545
## 35 2019-06-04      NA   Tuesday    06         0 2787854    2464398
## 36 2019-06-05      NA Wednesday    06         0 2665811    2230725
## 37 2019-06-06      NA  Thursday    06         1 3783893    3087435
## 38 2019-06-07      NA    Friday    06         0 2779416    2481444
## 39 2019-06-08      NA  Saturday    06         0 3419215    3120553
## 40 2019-06-09      NA    Sunday    06         0 3344271    3018552
## 41 2019-06-10      NA    Monday    06         0 2759985    2433621
## 42 2019-06-11      NA   Tuesday    06         0 2765376    2438359
## 43 2019-06-12      NA Wednesday    06         0 2643261    2205437
## 44 2019-06-13      NA  Thursday    06         0 2696619    2368279
## 45 2019-06-14      NA    Friday    06         0 2756725    2454761
## 46 2019-06-15      NA  Saturday    06         0 3396453    3093752
## 47 2019-06-16      NA    Sunday    06         0 3321438    2991990
## 48 2019-06-17      NA    Monday    06         0 2737082    2406943
## 49 2019-06-18      NA   Tuesday    06         0 2742401    2411565
## 50 2019-06-19      NA Wednesday    06         0 2620216    2179443
## 51 2019-06-20      NA  Thursday    06         0 2673502    2341252
## 52 2019-06-21      NA    Friday    06         0 2733538    2427305
## 53 2019-06-22      NA  Saturday    06         0 3373195    3066177
## 54 2019-06-23      NA    Sunday    06         0 3298109    2964668
## 55 2019-06-24      NA    Monday    06         0 2713682    2379503
## 56 2019-06-25      NA   Tuesday    06         0 2718930    2384008
## 57 2019-06-26      NA Wednesday    06         0 2596674    2152735
## 58 2019-06-27      NA  Thursday    06         0 2649890    2313460
## 59 2019-06-28      NA    Friday    06         0 2709854    2399070
## 60 2019-06-29      NA  Saturday    06         0 3349440    3037822
## 61 2019-06-30      NA    Sunday    06         0 3274283    2936577
##    m1_cnf_upr m1_prd_lwr m1_prd_upr  m2_fit  m3_fit  m4_fit
## 1     3194879    2544207    3468768 3006488 2980956 3037399
## 2     3090351    2439525    3364145 2901835 2873765 2946387
## 3     3066809    2405712    3334306 2870009 2850409 2912311
## 4     3792776    3099563    4041354 3570458 3539154 3516246
## 5     3678940    2731354    3808099 3269726 3273133 3268598
## 6     4102682    3150391    4230010 3690200 3669246 3603377
## 7     3193034    2515094    3450469 2982781 2940377 3018634
## 8     3176339    2523770    3449056 2986413 2957552 3023061
## 9     3071772    2419010    3344369 2881690 2850276 2933248
## 10    3047885    2385179    3314408 2849793 2826835 2899575
## 11    3773700    3078946    4021398 3550172 3515495 3493819
## 12    3658583    2710997    3787742 3249370 3249389 3250498
## 13    3242315    2523439    3476550 2999994 2974950 3034724
## 14    3174089    2494191    3430375 2962283 2916463 3004259
## 15    3157611    2502764    3428925 2965844 2933553 3008573
## 16    3053008    2397924    3324176 2861050 2826192 2919961
## 17    3028764    2364077    3294090 2829083 2802666 2886695
## 18    3754398    3057759    4001023 3529391 3491241 3471276
## 19    3757698    3030379    3987354 3508866 3483664 3433415
## 20    3222987    2502048    3456097 2979072 2950526 3019814
## 21    3154940    2472715    3409865 2941290 2891954 2989742
## 22    3138709    2481181    3408380 2944781 2908958 2993944
## 23    3034073    2376261    3303571 2839916 2801513 2906536
## 24    3009461    2342399    3273358 2807879 2777901 2873678
## 25    3734885    3035997    3980235 3508116 3466391 3448633
## 26    3738631    3008346    3966695 3487520 3458729 3411427
## 27    3203433    2480079    3435232 2957656 2925505 3004771
## 28    3135602    2450660    3388946 2919803 2866848 2975093
## 29    3119649    2459017    3387428 2923222 2883768 2979185
## 30    3014980    2354016    3282558 2818287 2776237 2892982
## 31    2989992    2320139    3252219 2786179 2752540 2860532
## 32    3736375    2948124    3934838 3441481 3380332 3345242
## 33    3688854    2863476    3869739 3366607 3305518 3298108
## 34    3105241    2279039    3285747 2782393 2720565 2910698
## 35    3111310    2284275    3291433 2787854 2725462 2914704
## 36    3100896    2115857    3215764 2665811 2600948 2829033
## 37    4480351    3096073    4471712 3783893 3725935 3402525
## 38    3077389    2284993    3273840 2779416 2715224 2909411
## 39    3717877    2924552    3913879 3419215 3354289 3324012
## 40    3669989    2839851    3848690 3344271 3279389 3277693
## 41    3086350    2255325    3264646 2759985 2694351 2896591
## 42    3092392    2260472    3270280 2765376 2699163 2900493
## 43    3081085    2092055    3194467 2643261 2574564 2815992
## 44    3024959    2191219    3202018 2696619 2628620 2853523
## 45    3058690    2260906    3252544 2756725 2688669 2895143
## 46    3699155    2900375    3892531 3396453 3327649 3302714
## 47    3650885    2815623    3827253 3321438 3252664 3257203
## 48    3067220    2231007    3243157 2737082 2667541 2882377
## 49    3073237    2236063    3248739 2742401 2672267 2886177
## 50    3060989    2067656    3172776 2620216 2547583 2802845
## 51    3005753    2166631    3180374 2673502 2601554 2839803
## 52    3039771    2236211    3230865 2733538 2661517 2880773
## 53    3680213    2875589    3870801 3373195 3300412 3281362
## 54    3631550    2790786    3805432 3298109 3225341 3236654
## 55    3047860    2206079    3221284 2713682 2640132 2868063
## 56    3053853    2211045    3226816 2718930 2644774 2871763
## 57    3040613    2042653    3150695 2596674 2520004 2789598
## 58    2986319    2141430    3158349 2649890 2573889 2825986
## 59    3020638    2210901    3208808 2709854 2633767 2866309
## 60    3661059    2850187    3848694 3349440 3272576 3259970
## 61    3611989    2765335    3783231 3274283 3197420 3216057
### 
# 모델 신뢰구간
#confint(model1)
#confint(model2, level = 0.99)
#confint(model3, level = 0.99)
#confint(model4, level = 0.99)
#confint(model1_2, level = 0.99)
#confint(model2_2, level = 0.99)
#confint(model3_2, level = 0.99)
#confint(model4_2, level = 0.99)


###  예측기간 관측치 더하기 (검증을 위한 사후 처리...)
test_btv_pre_roll2$REQ [test_btv_pre_roll2$Date >= '2019-05-01' & test_btv_pre_roll2$Date <= '2019-05-31'] <- 
  btv_pre_roll$REQ [btv_pre_roll$Date >= '2019-05-01' & btv_pre_roll$Date <= '2019-05-31']

test_btv_pre_roll3$REQ [test_btv_pre_roll3$Date >= '2019-06-01' & test_btv_pre_roll3$Date <= '2019-06-30'] <- 
  btv_pre_roll$REQ [btv_pre_roll$Date >= '2019-06-01' & btv_pre_roll$Date <= '2019-06-30']

cor(test_btv_pre_roll2$REQ, test_btv_pre_roll2$m1_fit)^2
## [1] 0.7858214
cor(test_btv_pre_roll2$REQ, test_btv_pre_roll2$m2_fit)^2
## [1] 0.7858214
cor(test_btv_pre_roll2$REQ, test_btv_pre_roll2$m3_fit)^2
## [1] 0.7855516
cor(test_btv_pre_roll2$REQ, test_btv_pre_roll2$m4_fit)^2
## [1] 0.7841779
cor(test_btv_pre_roll3$REQ, test_btv_pre_roll3$m1_fit)^2
## [1] 0.841047
cor(test_btv_pre_roll3$REQ, test_btv_pre_roll3$m2_fit)^2
## [1] 0.841047
cor(test_btv_pre_roll3$REQ, test_btv_pre_roll3$m3_fit)^2
## [1] 0.8389602
cor(test_btv_pre_roll3$REQ, test_btv_pre_roll3$m4_fit)^2
## [1] 0.8235133
# MAE
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m1_fit))
## [1] 153112.3
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m2_fit))
## [1] 153112.3
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m3_fit))
## [1] 154276.4
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m4_fit))
## [1] 155203.9
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m1_fit))
## [1] 137020.5
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m2_fit))
## [1] 137020.5
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m3_fit))
## [1] 138880.1
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m4_fit))
## [1] 147364
# MAPE
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m1_fit)/abs(test_btv_pre_roll2$REQ))*100
## [1] 4.342613
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m2_fit)/abs(test_btv_pre_roll2$REQ))*100
## [1] 4.342613
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m3_fit)/abs(test_btv_pre_roll2$REQ))*100
## [1] 4.375261
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m4_fit)/abs(test_btv_pre_roll2$REQ))*100
## [1] 4.402082
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m1_fit)/abs(test_btv_pre_roll3$REQ))*100
## [1] 3.775664
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m2_fit)/abs(test_btv_pre_roll3$REQ))*100
## [1] 3.775664
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m3_fit)/abs(test_btv_pre_roll3$REQ))*100
## [1] 3.825968
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m4_fit)/abs(test_btv_pre_roll3$REQ))*100
## [1] 4.062741