선형회귀(linear regression)를 통한 inven 예측 R code2 source = 1.Btv_Preroll_인벤예측_요약본_20190705_2-확장테스트판(중요).R
if(!require(ggplot2)){install.packages("ggplot2"); library(ggplot2)}
## Loading required package: ggplot2
if(!require(dplyr)){install.packages("dplyr"); library(dplyr)}
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
### ★★★★★★ 모든 변수명 지우기
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 588646 31.5 1212601 64.8 1212601 64.8
## Vcells 1068755 8.2 8388608 64.0 1753441 13.4
#getwd()
setwd("/home/mjs0428/inven_forcast")
btv_pre_roll <- read.csv("./Btv_Preroll-Req_Inv_Res_Imp-data_20190702.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE, fileEncoding = "euc-kr")
glimpse(btv_pre_roll)
## Observations: 548
## Variables: 7
## $ Date <chr> "2018-01-01", "2018-01-02", "2018-01-03", "2018-…
## $ VOD_시청시간_초 <chr> "8,421,536,616", "7,878,348,464", "7,498,034,394", "7…
## $ VOD_시청건수 <chr> "4,496,473", "4,658,646", "4,535,338", "4,500,479", …
## $ REQ <chr> "4,567,997", "4,630,415", "4,533,702", "4,490,70…
## $ INV <chr> "7,377,096", "7,110,607", "6,932,244", "6,839,95…
## $ Res <chr> "4,787,165", "4,391,051", "4,252,549", "4,101,29…
## $ Imp <chr> "4,498,714", "4,141,349", "4,011,362", "3,872,03…
# 숫자형 데이터 변환
btv_pre_roll$REQ <- as.numeric(gsub(",", "", btv_pre_roll$REQ))
btv_pre_roll$INV <- as.numeric(gsub(",", "", btv_pre_roll$INV))
btv_pre_roll$Res <- as.numeric(gsub(",", "", btv_pre_roll$Res))
btv_pre_roll$Imp <- as.numeric(gsub(",", "", btv_pre_roll$Imp))
btv_pre_roll$VOD_시청시간_초 <- round(as.numeric(gsub(",", "", btv_pre_roll$VOD_시청시간_초))/3600,0)
# 변수명 바꾸기
btv_pre_roll <- rename(btv_pre_roll, VOD_시청시간 = VOD_시청시간_초)
btv_pre_roll$VOD_시청건수 <- as.numeric(gsub(",", "", btv_pre_roll$VOD_시청건수))
# 날짜형 타입으로 변환
btv_pre_roll$Date <- as.Date(btv_pre_roll$Date)
### VOD 시청건수와 큰 차이 데이터 대체처리, 260만 미만 8개
btv_pre_roll$REQ_chg_flag = FALSE
arrange(btv_pre_roll %>% filter(REQ < 2650000), REQ)
## Date VOD_시청시간 VOD_시청건수 REQ INV Res Imp
## 1 2018-09-30 2135526 3896865 221664 297700 209140 554894
## 2 2018-07-02 1775243 3532602 736092 1189102 705927 709272
## 3 2018-10-01 1755646 3356570 2324932 2846820 2099119 1355917
## 4 2018-12-07 1807782 3567662 2466112 2872066 2032149 2852099
## 5 2018-02-16 1448428 2751726 2492996 4050293 3108664 2858587
## 6 2019-06-11 1328498 2600687 2577232 4380563 2897582 2809274
## 7 2018-12-06 1725200 3412828 2589182 2954937 2111337 2770486
## 8 2018-07-10 1709360 3476201 2590101 4002227 2311666 2309964
## 9 2019-05-31 1369062 2656316 2618025 4496329 2779988 2697892
## 10 2019-06-12 1340062 2657927 2628286 4491931 2957502 2867923
## 11 2019-05-30 1397194 2680261 2642520 4534099 2751991 2674374
## REQ_chg_flag
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## 11 FALSE
btv_pre_roll$REQ_chg_flag [btv_pre_roll$REQ < 2600000] <- TRUE
#btv_pre_roll$REQ [REQ < 2600000] <- round(btv_pre_roll$VOD_시청건수 [REQ < 2600000] * 0.98, 0)
btv_pre_roll$REQ [btv_pre_roll$REQ < 2600000] <- btv_pre_roll$VOD_시청건수 [btv_pre_roll$REQ < 2600000]
filter(btv_pre_roll, REQ_chg_flag == TRUE)
## Date VOD_시청시간 VOD_시청건수 REQ INV Res Imp
## 1 2018-02-16 1448428 2751726 2751726 4050293 3108664 2858587
## 2 2018-07-02 1775243 3532602 3532602 1189102 705927 709272
## 3 2018-07-10 1709360 3476201 3476201 4002227 2311666 2309964
## 4 2018-09-30 2135526 3896865 3896865 297700 209140 554894
## 5 2018-10-01 1755646 3356570 3356570 2846820 2099119 1355917
## 6 2018-12-06 1725200 3412828 3412828 2954937 2111337 2770486
## 7 2018-12-07 1807782 3567662 3567662 2872066 2032149 2852099
## 8 2019-06-11 1328498 2600687 2600687 4380563 2897582 2809274
## REQ_chg_flag
## 1 TRUE
## 2 TRUE
## 3 TRUE
## 4 TRUE
## 5 TRUE
## 6 TRUE
## 7 TRUE
## 8 TRUE
###2018-05-15 ~ 2018-06-12까지 이상 데이터 대체 처리 >> 29개
btv_pre_roll$REQ_chg_flag [btv_pre_roll$Date >= '2018-05-15' & btv_pre_roll$Date <= '2018-06-12'] <- TRUE
btv_pre_roll$REQ [btv_pre_roll$Date >= '2018-05-15' & btv_pre_roll$Date <= '2018-06-12'] <-
btv_pre_roll$VOD_시청건수 [btv_pre_roll$Date >= '2018-05-15' & btv_pre_roll$Date <= '2018-06-12']
## btv_pre_roll$REQ/btv_pre_roll$VOD_시청건수 ## >>> VOD_시청건수를 가지고 예측 후에 비교분석하여 나중에 검토할 것...
## boxplot(btv_pre_roll$REQ/btv_pre_roll$VOD_시청건수)
## summary(btv_pre_roll$REQ/btv_pre_roll$VOD_시청건수)
## filter(btv_pre_roll, btv_pre_roll$REQ/btv_pre_roll$VOD_시청건수 < 0.9 | btv_pre_roll$REQ/btv_pre_roll$VOD_시청건수 > 1.2)
ggplot(btv_pre_roll, aes(x=Date, y=REQ)) +
geom_point() + geom_smooth(method = 'lm', color = 'red', linetype =2) + # 직선 추세선 추가
geom_smooth() + # 곡선 추세선 추가
ggtitle("Raw Data - after correction")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# 요일 구하기
btv_pre_roll$weekday = factor(weekdays(btv_pre_roll$Date),
levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
summary(btv_pre_roll$weekday)
## Monday Tuesday Wednesday Thursday Friday Saturday Sunday
## 79 79 78 78 78 78 78
# 월(month) 구하기
btv_pre_roll$month <-factor(substr(btv_pre_roll$Date,6,7),
levels = c('01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'))
summary(btv_pre_roll$month)
## 01 02 03 04 05 06 07 08 09 10 11 12
## 62 56 62 60 62 60 33 31 30 31 30 31
length(btv_pre_roll$month)
## [1] 548
# 특일 구하기
# 참고 페이지 : https://m.blog.naver.com/hancury/221057426711
# 사이트 : https://www.data.go.kr/
# 메뉴명 : 마이페이지 > OPEN API > 개발계정 상세보기
#인증키 "SD5uTucKQxefBuN79R8TKiBa15fAbw5j1id%2FdBH0SsWFDK4boXqDpgMPtc8QJ8UNyaSO8HpGJf%2FAbh65oBzg7g%3D%3D"
if(!require(glue)){install.packages("glue"); library(glue)}
## Loading required package: glue
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
if(!require(XML)){install.packages("XML"); library(XML)}
## Loading required package: XML
if(!require(stringr)){install.packages("stringr"); library(stringr)}
## Loading required package: stringr
api.key <- "SD5uTucKQxefBuN79R8TKiBa15fAbw5j1id%2FdBH0SsWFDK4boXqDpgMPtc8QJ8UNyaSO8HpGJf%2FAbh65oBzg7g%3D%3D"
url.format <-
'http://apis.data.go.kr/B090041/openapi/service/SpcdeInfoService/getRestDeInfo?ServiceKey={key}&solYear={year}&solMonth={month}'
holiday.request <- function(key, year, month) glue(url.format)
df_holiday <- data.frame(dateName=NULL, Date=NULL)
for(m in 1:12){
holiday_2018 <- xmlToList(holiday.request(api.key, 2018, str_pad(m, 2, pad=0)))
holiday_2019 <- xmlToList(holiday.request(api.key, 2019, str_pad(m, 2, pad=0)))
items_2018 <- holiday_2018$body$items
items_2019 <- holiday_2019$body$items
items_test <- holiday_2018$body$items
items_test <- holiday_2019$body$items
for(item_2018 in items_2018){
if(item_2018$isHoliday == 'Y') {
#print(paste(item_2018$dateName, item_2018$locdate, sep=' : '))
df_holiday <- rbind(df_holiday,
data.frame(dateName = item_2018$dateName,
Date = (paste(substr(item_2018$locdate,1,4),
substr(item_2018$locdate,5,6),
substr(item_2018$locdate,7,8), sep = '-')),
stringsAsFactors = FALSE))
}
}
for(item_2019 in items_2019){
if(item_2019$isHoliday == 'Y') {
#print(paste(item_2019$dateName, item_2019$locdate, sep=' : '))
df_holiday <- rbind(df_holiday,
data.frame(dateName = item_2019$dateName,
Date = (paste(substr(item_2019$locdate,1,4),
substr(item_2019$locdate,5,6),
substr(item_2019$locdate,7,8), sep = '-')),
stringsAsFactors = FALSE))
}
}
}
# 날짜 데이터의 데이터 타입 변환
df_holiday$Date <- as.Date(df_holiday$Date)
### Left 조인하기 : 광고 데이터 + 특일 데이터..
btv_pre_roll <- left_join(btv_pre_roll, df_holiday)
## Joining, by = "Date"
# 특일 데이터 범주화
btv_pre_roll <- mutate(btv_pre_roll, isHolyday = as.numeric(!is.na(dateName)))
# 필요한 데이터 잘라내기
#head(btv_pre_roll); tail(btv_pre_roll)
btv_pre_roll2<- filter(btv_pre_roll, Date <= '2019-04-30')
btv_pre_roll3 <- filter(btv_pre_roll, Date <= '2019-05-31')
#tail(btv_pre_roll2); tail(btv_pre_roll3)
test_btv_pre_roll2 <- select(btv_pre_roll2, Date, REQ, weekday, month, isHolyday)
test_btv_pre_roll3 <- select(btv_pre_roll3, Date, REQ, weekday, month, isHolyday)
#tail(test_btv_pre_roll2); tail(test_btv_pre_roll3)
# 예측 Dataset 생성
# 예측할 날짜로 된 수열 생성하기
s_date2 <- as.Date("2019-05-01")
e_date2 <- as.Date("2019-05-31")
s_date3 <- as.Date("2019-06-01")
e_date3 <- as.Date("2019-06-30")
add_df2 <- data.frame(Date = seq(from = s_date2, to=e_date2, by=1))
add_df2$Date <- as.Date(add_df2$Date); add_df2$REQ <- NA; add_df2$weekday <- NA; add_df2$month <- NA; add_df2$isHolyday <- NA;
add_df3 <- data.frame(Date = seq(from = s_date3, to=e_date3, by=1))
add_df3$Date <- as.Date(add_df3$Date); add_df3$REQ <- NA; add_df3$weekday <- NA; add_df3$month <- NA; add_df3$isHolyday <- NA;
# 요일 구하기
#Sys.setlocale("LC_TIME", "English")
#as.character.Date(weekdays(as.Date(add_df2$Date)))
#as.character(weekdays(as.Date('2019-10-01')))
#localeToCharset()
#as.character(weekdays(as.Date(add_df2$Date)))
add_df2$weekday = factor(weekdays(add_df2$Date),
levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
summary(add_df2$weekday)
## Monday Tuesday Wednesday Thursday Friday Saturday Sunday
## 4 4 5 5 5 4 4
add_df3$weekday = factor(weekdays(add_df3$Date),
levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
summary(add_df3$weekday)
## Monday Tuesday Wednesday Thursday Friday Saturday Sunday
## 4 4 4 4 4 5 5
# 월(month) 구하기
add_df2$month <-factor(substr(add_df2$Date,6,7),
levels = c('01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'))
summary(add_df2$month)
## 01 02 03 04 05 06 07 08 09 10 11 12
## 0 0 0 0 31 0 0 0 0 0 0 0
add_df3$month <-factor(substr(add_df3$Date,6,7),
levels = c('01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'))
summary(add_df3$month)
## 01 02 03 04 05 06 07 08 09 10 11 12
## 0 0 0 0 0 30 0 0 0 0 0 0
# 특일 데이터 범주화 추가
add_df2$isHolyday <- as.numeric(!is.na(left_join(add_df2, df_holiday)$dateName))
## Joining, by = "Date"
add_df3$isHolyday <- as.numeric(!is.na(left_join(add_df3, df_holiday)$dateName))
## Joining, by = "Date"
### 숙제 :: 특일 데이터가 몇개인지 세어볼 것???
# 기존 데이터와 예측치 결합
test_btv_pre_roll2 <- rbind(test_btv_pre_roll2, add_df2)
test_btv_pre_roll3 <- rbind(test_btv_pre_roll3, add_df3)
#View(filter(test_btv_pre_roll2, Date >= '2019-04-01'))
#View(filter(test_btv_pre_roll3, Date >= '2019-05-01'))
### 모델 평가하기as.numeric()
## 원래는 training dataset와 test dataset를 나눠서 테스트 해야 하지만...
## 표본이 작은 관계로.. 관측치 데이터 전체를 사용하여 평가함...
# several models
model1 = lm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) +
weekday:month + weekday:isHolyday + weekday:month:isHolyday,
data = btv_pre_roll2)
model2 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) +
weekday:month + weekday:isHolyday + weekday:month:isHolyday,
family = gaussian(link = 'identity'), data = btv_pre_roll2)
model3 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) +
weekday:month + weekday:isHolyday + weekday:month:isHolyday,
family = Gamma(link = 'inverse'), data = btv_pre_roll2)
model4 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) +
weekday:month + weekday:isHolyday + weekday:month:isHolyday,
family = inverse.gaussian(link = '1/mu^2'), data = btv_pre_roll2)
#test_btv_pre_roll3
model1_2 = lm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) +
weekday:month + weekday:isHolyday + weekday:month:isHolyday,
data = btv_pre_roll3)
model2_2 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) +
weekday:month + weekday:isHolyday + weekday:month:isHolyday,
family = gaussian(link = 'identity'), data = btv_pre_roll3)
model3_2 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) +
weekday:month + weekday:isHolyday + weekday:month:isHolyday,
family = Gamma(link = 'identity'), data = btv_pre_roll3)
model4_2 = glm(formula = REQ ~ I(as.numeric(Date)^3) + I(as.numeric(Date)^2) +
weekday:month + weekday:isHolyday + weekday:month:isHolyday,
family = inverse.gaussian(link = '1/mu^2'), data = btv_pre_roll3)
# 점/구간 추정 예측
m1_c_interval <- predict(model1, newdata=test_btv_pre_roll2, type='response', interval="confidence", level = 0.99)
## Warning in predict.lm(model1, newdata = test_btv_pre_roll2, type =
## "response", : prediction from a rank-deficient fit may be misleading
m1_p_interval <- predict(model1, newdata=test_btv_pre_roll2, type='response', interval="prediction")
## Warning in predict.lm(model1, newdata = test_btv_pre_roll2, type =
## "response", : prediction from a rank-deficient fit may be misleading
m2_c_interval <- predict(model2, newdata=test_btv_pre_roll2, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m2_p_interval <- predict(model2, newdata=test_btv_pre_roll2, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m3_c_interval <- predict(model3, newdata=test_btv_pre_roll2, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m3_p_interval <- predict(model3, newdata=test_btv_pre_roll2, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m4_c_interval <- predict(model4, newdata=test_btv_pre_roll2, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m4_p_interval <- predict(model4, newdata=test_btv_pre_roll2, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
test_btv_pre_roll2$m1_fit <- m1_c_interval[,1] # fit
test_btv_pre_roll2$m1_cnf_lwr <- m1_c_interval[,2] # confience lower
test_btv_pre_roll2$m1_cnf_upr <- m1_c_interval[,3] # confience upper
test_btv_pre_roll2$m1_prd_lwr <- m1_p_interval[,2] # prediction lower
test_btv_pre_roll2$m1_prd_upr <- m1_p_interval[,3] # prediction upper
test_btv_pre_roll2$m2_fit <- m2_c_interval # fit
test_btv_pre_roll2$m3_fit <- m3_c_interval # fit
test_btv_pre_roll2$m4_fit <- m4_c_interval # fit
#test_btv_pre_roll2 %>% filter(Date >= '2019-05-01' & Date <= '2019-05-31') %>%
# summarise(sum(m1_cnf_lwr), sum(m1_fit), sum(m1_cnf_upr), sum(m2_cnf_lwr), sum(m2_fit), sum(m2_cnf_upr), sum(m3_cnf_lwr), sum(m3_fit), sum(m3_cnf_upr), sum(m4_cnf_lwr), sum(m4_fit), sum(m4_cnf_upr))
test_btv_pre_roll2 %>% filter(Date >= '2019-05-01' & Date <= '2019-05-31') %>%
summarise(sum(m1_cnf_lwr), sum(m1_fit), sum(m1_cnf_upr), sum(m2_fit), sum(m3_fit), sum(m4_fit))
## sum(m1_cnf_lwr) sum(m1_fit) sum(m1_cnf_upr) sum(m2_fit) sum(m3_fit)
## 1 94388014 105831672 117275329 105831672 105543903
## sum(m4_fit)
## 1 105504004
pp <- ggplot(test_btv_pre_roll2, aes(x=Date))
pp2 <- pp + geom_point(aes(y = REQ), color = "black", alpha = 0.3)
pp3 <- pp2 + geom_point(aes(y = m1_fit), color = "red", alpha = 0.3)
pp4 <- pp2 + geom_point(aes(y = m2_c_interval), color = "red", alpha = 0.3)
pp5 <- pp2 + geom_point(aes(y = m3_c_interval), color = "red", alpha = 0.3)
pp6 <- pp2 + geom_point(aes(y = m4_c_interval), color = "red", alpha = 0.3)
#test_btv_pre_roll3
# 점/구간 추정 예측
m1_c_interval2 <- predict(model1_2, newdata=test_btv_pre_roll3, type='response', interval="confidence", level = 0.99)
## Warning in predict.lm(model1_2, newdata = test_btv_pre_roll3, type =
## "response", : prediction from a rank-deficient fit may be misleading
m1_p_interval2 <- predict(model1_2, newdata=test_btv_pre_roll3, type='response', interval="prediction")
## Warning in predict.lm(model1_2, newdata = test_btv_pre_roll3, type =
## "response", : prediction from a rank-deficient fit may be misleading
m2_c_interval2 <- predict(model2_2, newdata=test_btv_pre_roll3, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m2_p_interval2 <- predict(model2_2, newdata=test_btv_pre_roll3, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m3_c_interval2 <- predict(model3_2, newdata=test_btv_pre_roll3, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m3_p_interval2 <- predict(model3_2, newdata=test_btv_pre_roll3, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m4_c_interval2 <- predict(model4_2, newdata=test_btv_pre_roll3, type='response', interval="confidence")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
m4_p_interval2 <- predict(model4_2, newdata=test_btv_pre_roll3, type='response', interval="prediction")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type
## == : prediction from a rank-deficient fit may be misleading
test_btv_pre_roll3$m1_fit <- m1_c_interval2[,1] # fit
test_btv_pre_roll3$m1_cnf_lwr <- m1_c_interval2[,2] # confience lower
test_btv_pre_roll3$m1_cnf_upr <- m1_c_interval2[,3] # confience upper
test_btv_pre_roll3$m1_prd_lwr <- m1_p_interval2[,2] # prediction lower
test_btv_pre_roll3$m1_prd_upr <- m1_p_interval2[,3] # prediction upper
test_btv_pre_roll3$m2_fit <- m2_c_interval2 # fit
test_btv_pre_roll3$m3_fit <- m3_c_interval2 # fit
test_btv_pre_roll3$m4_fit <- m4_c_interval2 # fit
#test_btv_pre_roll3 %>% filter(Date >= '2019-06-01' & Date <= '2019-06-30') %>%
# summarise(sum(m1_cnf_lwr), sum(m1_fit), sum(m1_cnf_upr), sum(m2_cnf_lwr), sum(m2_fit), sum(m2_cnf_upr), sum(m3_cnf_lwr), sum(m3_fit), sum(m3_cnf_upr), sum(m4_cnf_lwr), sum(m4_fit), sum(m4_cnf_upr))
test_btv_pre_roll3 %>% filter(Date >= '2019-06-01' & Date <= '2019-06-30') %>%
summarise(sum(m1_prd_lwr), sum(m1_fit), sum(m1_prd_upr), sum(m2_fit), sum(m3_fit), sum(m4_fit))
## sum(m1_prd_lwr) sum(m1_fit) sum(m1_prd_upr) sum(m2_fit) sum(m3_fit)
## 1 73443787 88901594 104359400 88901594 86832119
## sum(m4_fit)
## 1 90640824
pp <- ggplot(test_btv_pre_roll3, aes(x=Date))
pp2 <- pp + geom_point(aes(y = REQ), color = "black", alpha = 0.3)
pp3 <- pp2 + geom_point(aes(y = m1_fit), color = "blue", alpha = 0.3)
pp4 <- pp2 + geom_point(aes(y = m2_c_interval2), color = "blue", alpha = 0.3)
pp5 <- pp2 + geom_point(aes(y = m3_c_interval2), color = "blue", alpha = 0.3)
pp6 <- pp2 + geom_point(aes(y = m4_c_interval2), color = "blue", alpha = 0.3)
filter(test_btv_pre_roll2, Date >= '2019-04-01')
## Date REQ weekday month isHolyday m1_fit m1_cnf_lwr
## 1 2019-04-01 3282152 Monday 04 0 3289837 3104535
## 2 2019-04-02 3184513 Tuesday 04 0 3211098 3017734
## 3 2019-04-03 3160823 Wednesday 04 0 3203076 2996555
## 4 2019-04-04 3087324 Thursday 04 0 3209590 3002911
## 5 2019-04-05 3107286 Friday 04 0 3239256 3032415
## 6 2019-04-06 3981096 Saturday 04 0 3908532 3701523
## 7 2019-04-07 3946311 Sunday 04 0 3914475 3716812
## 8 2019-04-08 3210087 Monday 04 0 3284184 3097018
## 9 2019-04-09 3292649 Tuesday 04 0 3205430 3010556
## 10 2019-04-10 3232678 Wednesday 04 0 3197391 2989044
## 11 2019-04-11 3185824 Thursday 04 0 3203888 2995332
## 12 2019-04-12 3153292 Friday 04 0 3233538 3024769
## 13 2019-04-13 3815088 Saturday 04 0 3902798 3693809
## 14 2019-04-14 4119517 Sunday 04 0 3908725 3708720
## 15 2019-04-15 3175054 Monday 04 0 3278418 3088899
## 16 2019-04-16 3068774 Tuesday 04 0 3199647 3002791
## 17 2019-04-17 3041754 Wednesday 04 0 3191592 2980966
## 18 2019-04-18 3101583 Thursday 04 0 3198073 2987184
## 19 2019-04-19 3117815 Friday 04 0 3227707 3016549
## 20 2019-04-20 3761864 Saturday 04 0 3896950 3685518
## 21 2019-04-21 3846860 Sunday 04 0 3902861 3700029
## 22 2019-04-22 3138914 Monday 04 0 3272538 3080151
## 23 2019-04-23 3122087 Tuesday 04 0 3193750 2994413
## 24 2019-04-24 3025289 Wednesday 04 0 3185679 2972299
## 25 2019-04-25 3144534 Thursday 04 0 3192144 2978442
## 26 2019-04-26 3217561 Friday 04 0 3221762 3007733
## 27 2019-04-27 3636698 Saturday 04 0 3890988 3676627
## 28 2019-04-28 3714765 Sunday 04 0 3896883 3690716
## 29 2019-04-29 3187055 Monday 04 0 3266544 3070756
## 30 2019-04-30 3024436 Tuesday 04 0 3187740 2985397
## 31 2019-05-01 NA Wednesday 05 0 3290099 2992034
## 32 2019-05-02 NA Thursday 05 0 3214975 2915944
## 33 2019-05-03 NA Friday 05 0 3150689 2825612
## 34 2019-05-04 NA Saturday 05 0 3987377 3621653
## 35 2019-05-05 NA Sunday 05 1 3497715 2783210
## 36 2019-05-06 NA Monday 05 1 3942262 3352082
## 37 2019-05-07 NA Tuesday 05 0 3395649 3068187
## 38 2019-05-08 NA Wednesday 05 0 3283958 2980684
## 39 2019-05-09 NA Thursday 05 0 3208818 2904503
## 40 2019-05-10 NA Friday 05 0 3144516 2814550
## 41 2019-05-11 NA Saturday 05 0 3981187 3610974
## 42 2019-05-12 NA Sunday 05 1 3491509 2774700
## 43 2019-05-13 NA Monday 05 0 3297765 2925762
## 44 2019-05-14 NA Tuesday 05 0 3389411 3056860
## 45 2019-05-15 NA Wednesday 05 0 3277703 2968809
## 46 2019-05-16 NA Thursday 05 0 3202547 2892538
## 47 2019-05-17 NA Friday 05 0 3138228 2802981
## 48 2019-05-18 NA Saturday 05 0 3974883 3599814
## 49 2019-05-19 NA Sunday 05 0 3742575 3405213
## 50 2019-05-20 NA Monday 05 0 3291428 2914434
## 51 2019-05-21 NA Tuesday 05 0 3383058 3045018
## 52 2019-05-22 NA Wednesday 05 0 3271334 2956404
## 53 2019-05-23 NA Thursday 05 0 3196161 2880040
## 54 2019-05-24 NA Friday 05 0 3131827 2790893
## 55 2019-05-25 NA Saturday 05 0 3968465 3588160
## 56 2019-05-26 NA Sunday 05 0 3736141 3392946
## 57 2019-05-27 NA Monday 05 0 3284977 2902611
## 58 2019-05-28 NA Tuesday 05 0 3376591 3032653
## 59 2019-05-29 NA Wednesday 05 0 3264851 2943460
## 60 2019-05-30 NA Thursday 05 0 3189662 2867004
## 61 2019-05-31 NA Friday 05 0 3125311 2778279
## m1_cnf_upr m1_prd_lwr m1_prd_upr m2_fit m3_fit m4_fit
## 1 3475138 2835668 3744006 3289837 3297556 3301685
## 2 3404463 2754995 3667202 3211098 3220428 3224994
## 3 3409597 2743658 3662494 3203076 3213765 3219095
## 4 3416268 2750130 3669049 3209590 3219968 3225213
## 5 3446097 2779755 3698757 3239256 3248511 3253367
## 6 4115540 3448987 4368076 3908532 3891653 3882855
## 7 4112138 3457310 4371640 3914475 3895171 3885206
## 8 3471350 2829575 3738794 3284184 3291730 3296027
## 9 3400303 2748956 3661903 3205430 3214852 3219702
## 10 3405738 2737497 3657284 3197391 3208194 3213814
## 11 3412444 2743940 3663836 3203888 3214357 3219883
## 12 3442308 2773534 3693542 3233538 3242782 3247879
## 13 4111786 3442736 4362859 3902798 3883406 3873502
## 14 4108729 3450973 4366477 3908725 3886882 3875804
## 15 3467937 2823246 3733589 3278418 3285788 3290262
## 16 3396503 2742683 3656611 3199647 3209166 3214309
## 17 3402217 2731100 3652084 3191592 3202513 3208433
## 18 3408962 2737512 3658634 3198073 3208636 3214453
## 19 3438864 2767074 3688339 3227707 3236940 3242287
## 20 4108382 3436245 4357655 3896950 3875005 3863996
## 21 4105693 3444391 4361330 3902861 3878438 3866249
## 22 3464924 2816673 3728403 3272538 3279733 3284391
## 23 3393088 2736166 3651335 3193750 3203372 3208816
## 24 3399060 2724456 3646902 3185679 3196724 3202952
## 25 3405846 2730835 3653453 3192144 3202806 3208923
## 26 3435791 2760365 3683158 3221762 3230988 3236594
## 27 4105350 3429503 4352474 3890988 3866452 3854339
## 28 4103050 3437556 4356209 3896883 3869843 3856544
## 29 3462332 2809844 3723243 3266544 3273566 3278415
## 30 3390083 2729396 3646084 3187740 3197469 3203225
## 31 3588164 2802544 3777654 3290099 3297857 3304662
## 32 3514005 2727079 3702871 3214975 3229092 3238443
## 33 3475766 2653277 3648102 3150689 3170566 3182036
## 34 4353101 3473941 4500812 3987377 3925994 3896607
## 35 4212219 2804214 4191216 3497715 3452166 3436462
## 36 4532441 3319864 4564659 3942262 3886326 3860226
## 37 3723111 2897335 3893963 3395649 3393262 3395940
## 38 3587233 2794553 3773363 3283958 3291446 3298398
## 39 3513132 2719041 3698595 3208818 3222926 3232529
## 40 3474481 2645250 3643782 3144516 3164603 3176408
## 41 4351399 3465899 4496475 3981187 3916828 3886253
## 42 4208318 2796638 4186380 3491509 3445055 3429332
## 43 3669767 2781734 3813795 3297765 3302745 3308953
## 44 3721962 2889156 3889665 3389411 3386351 3389015
## 45 3586597 2786276 3769131 3277703 3284923 3292032
## 46 3512555 2710714 3694379 3202547 3216653 3226519
## 47 3473476 2636937 3639520 3138228 3158537 3170686
## 48 4349952 3457573 4492193 3974883 3907512 3875757
## 49 4079938 3240466 4244685 3742575 3701131 3684844
## 50 3668422 2773312 3809544 3291428 3296080 3302427
## 51 3721098 2880686 3885430 3383058 3379324 3381984
## 52 3586265 2777701 3764967 3271334 3278291 3285567
## 53 3512282 2702090 3690233 3196161 3210275 3220413
## 54 3472761 2628328 3635326 3131827 3152369 3164874
## 55 4348770 3448955 4487975 3968465 3898049 3865123
## 56 4079336 3231757 4240525 3736141 3692615 3675675
## 57 3667344 2764595 3805360 3284977 3289305 3295802
## 58 3720529 2871915 3881267 3376591 3372182 3374848
## 59 3586242 2768822 3760880 3264851 3271550 3279004
## 60 3512319 2693159 3686165 3189662 3203792 3214213
## 61 3472342 2619415 3631207 3125311 3146100 3158972
filter(test_btv_pre_roll3, Date >= '2019-05-01')
## Date REQ weekday month isHolyday m1_fit m1_cnf_lwr
## 1 2019-05-01 3616125 Wednesday 05 0 3006488 2818096
## 2 2019-05-02 2955011 Thursday 05 0 2901835 2713319
## 3 2019-05-03 2907704 Friday 05 0 2870009 2673210
## 4 2019-05-04 3236108 Saturday 05 0 3570458 3348141
## 5 2019-05-05 3036148 Sunday 05 1 3269726 2860513
## 6 2019-05-06 3660025 Monday 05 1 3690200 3277719
## 7 2019-05-07 2868780 Tuesday 05 0 2982781 2772528
## 8 2019-05-08 2703155 Wednesday 05 0 2986413 2796487
## 9 2019-05-09 2747889 Thursday 05 0 2881690 2691607
## 10 2019-05-10 2807466 Friday 05 0 2849793 2651702
## 11 2019-05-11 3451599 Saturday 05 0 3550172 3326644
## 12 2019-05-12 3482948 Sunday 05 1 3249370 2840156
## 13 2019-05-13 2890072 Monday 05 0 2999994 2757674
## 14 2019-05-14 2775220 Tuesday 05 0 2962283 2750477
## 15 2019-05-15 2806413 Wednesday 05 0 2965844 2774077
## 16 2019-05-16 2800079 Thursday 05 0 2861050 2669092
## 17 2019-05-17 2819470 Friday 05 0 2829083 2629403
## 18 2019-05-18 3544076 Saturday 05 0 3529391 3304385
## 19 2019-05-19 3722838 Sunday 05 0 3508866 3260035
## 20 2019-05-20 2884655 Monday 05 0 2979072 2735158
## 21 2019-05-21 2742861 Tuesday 05 0 2941290 2727641
## 22 2019-05-22 2691009 Wednesday 05 0 2944781 2750852
## 23 2019-05-23 2695802 Thursday 05 0 2839916 2645759
## 24 2019-05-24 2752395 Friday 05 0 2807879 2606296
## 25 2019-05-25 3354832 Saturday 05 0 3508116 3281347
## 26 2019-05-26 3359779 Sunday 05 0 3487520 3236409
## 27 2019-05-27 3006505 Monday 05 0 2957656 2711878
## 28 2019-05-28 2680322 Tuesday 05 0 2919803 2704003
## 29 2019-05-29 2682910 Wednesday 05 0 2923222 2726796
## 30 2019-05-30 2642520 Thursday 05 0 2818287 2621594
## 31 2019-05-31 2618025 Friday 05 0 2786179 2582366
## 32 2019-06-01 NA Saturday 06 0 3441481 3146587
## 33 2019-06-02 NA Sunday 06 0 3366607 3044361
## 34 2019-06-03 NA Monday 06 0 2782393 2459545
## 35 2019-06-04 NA Tuesday 06 0 2787854 2464398
## 36 2019-06-05 NA Wednesday 06 0 2665811 2230725
## 37 2019-06-06 NA Thursday 06 1 3783893 3087435
## 38 2019-06-07 NA Friday 06 0 2779416 2481444
## 39 2019-06-08 NA Saturday 06 0 3419215 3120553
## 40 2019-06-09 NA Sunday 06 0 3344271 3018552
## 41 2019-06-10 NA Monday 06 0 2759985 2433621
## 42 2019-06-11 NA Tuesday 06 0 2765376 2438359
## 43 2019-06-12 NA Wednesday 06 0 2643261 2205437
## 44 2019-06-13 NA Thursday 06 0 2696619 2368279
## 45 2019-06-14 NA Friday 06 0 2756725 2454761
## 46 2019-06-15 NA Saturday 06 0 3396453 3093752
## 47 2019-06-16 NA Sunday 06 0 3321438 2991990
## 48 2019-06-17 NA Monday 06 0 2737082 2406943
## 49 2019-06-18 NA Tuesday 06 0 2742401 2411565
## 50 2019-06-19 NA Wednesday 06 0 2620216 2179443
## 51 2019-06-20 NA Thursday 06 0 2673502 2341252
## 52 2019-06-21 NA Friday 06 0 2733538 2427305
## 53 2019-06-22 NA Saturday 06 0 3373195 3066177
## 54 2019-06-23 NA Sunday 06 0 3298109 2964668
## 55 2019-06-24 NA Monday 06 0 2713682 2379503
## 56 2019-06-25 NA Tuesday 06 0 2718930 2384008
## 57 2019-06-26 NA Wednesday 06 0 2596674 2152735
## 58 2019-06-27 NA Thursday 06 0 2649890 2313460
## 59 2019-06-28 NA Friday 06 0 2709854 2399070
## 60 2019-06-29 NA Saturday 06 0 3349440 3037822
## 61 2019-06-30 NA Sunday 06 0 3274283 2936577
## m1_cnf_upr m1_prd_lwr m1_prd_upr m2_fit m3_fit m4_fit
## 1 3194879 2544207 3468768 3006488 2980956 3037399
## 2 3090351 2439525 3364145 2901835 2873765 2946387
## 3 3066809 2405712 3334306 2870009 2850409 2912311
## 4 3792776 3099563 4041354 3570458 3539154 3516246
## 5 3678940 2731354 3808099 3269726 3273133 3268598
## 6 4102682 3150391 4230010 3690200 3669246 3603377
## 7 3193034 2515094 3450469 2982781 2940377 3018634
## 8 3176339 2523770 3449056 2986413 2957552 3023061
## 9 3071772 2419010 3344369 2881690 2850276 2933248
## 10 3047885 2385179 3314408 2849793 2826835 2899575
## 11 3773700 3078946 4021398 3550172 3515495 3493819
## 12 3658583 2710997 3787742 3249370 3249389 3250498
## 13 3242315 2523439 3476550 2999994 2974950 3034724
## 14 3174089 2494191 3430375 2962283 2916463 3004259
## 15 3157611 2502764 3428925 2965844 2933553 3008573
## 16 3053008 2397924 3324176 2861050 2826192 2919961
## 17 3028764 2364077 3294090 2829083 2802666 2886695
## 18 3754398 3057759 4001023 3529391 3491241 3471276
## 19 3757698 3030379 3987354 3508866 3483664 3433415
## 20 3222987 2502048 3456097 2979072 2950526 3019814
## 21 3154940 2472715 3409865 2941290 2891954 2989742
## 22 3138709 2481181 3408380 2944781 2908958 2993944
## 23 3034073 2376261 3303571 2839916 2801513 2906536
## 24 3009461 2342399 3273358 2807879 2777901 2873678
## 25 3734885 3035997 3980235 3508116 3466391 3448633
## 26 3738631 3008346 3966695 3487520 3458729 3411427
## 27 3203433 2480079 3435232 2957656 2925505 3004771
## 28 3135602 2450660 3388946 2919803 2866848 2975093
## 29 3119649 2459017 3387428 2923222 2883768 2979185
## 30 3014980 2354016 3282558 2818287 2776237 2892982
## 31 2989992 2320139 3252219 2786179 2752540 2860532
## 32 3736375 2948124 3934838 3441481 3380332 3345242
## 33 3688854 2863476 3869739 3366607 3305518 3298108
## 34 3105241 2279039 3285747 2782393 2720565 2910698
## 35 3111310 2284275 3291433 2787854 2725462 2914704
## 36 3100896 2115857 3215764 2665811 2600948 2829033
## 37 4480351 3096073 4471712 3783893 3725935 3402525
## 38 3077389 2284993 3273840 2779416 2715224 2909411
## 39 3717877 2924552 3913879 3419215 3354289 3324012
## 40 3669989 2839851 3848690 3344271 3279389 3277693
## 41 3086350 2255325 3264646 2759985 2694351 2896591
## 42 3092392 2260472 3270280 2765376 2699163 2900493
## 43 3081085 2092055 3194467 2643261 2574564 2815992
## 44 3024959 2191219 3202018 2696619 2628620 2853523
## 45 3058690 2260906 3252544 2756725 2688669 2895143
## 46 3699155 2900375 3892531 3396453 3327649 3302714
## 47 3650885 2815623 3827253 3321438 3252664 3257203
## 48 3067220 2231007 3243157 2737082 2667541 2882377
## 49 3073237 2236063 3248739 2742401 2672267 2886177
## 50 3060989 2067656 3172776 2620216 2547583 2802845
## 51 3005753 2166631 3180374 2673502 2601554 2839803
## 52 3039771 2236211 3230865 2733538 2661517 2880773
## 53 3680213 2875589 3870801 3373195 3300412 3281362
## 54 3631550 2790786 3805432 3298109 3225341 3236654
## 55 3047860 2206079 3221284 2713682 2640132 2868063
## 56 3053853 2211045 3226816 2718930 2644774 2871763
## 57 3040613 2042653 3150695 2596674 2520004 2789598
## 58 2986319 2141430 3158349 2649890 2573889 2825986
## 59 3020638 2210901 3208808 2709854 2633767 2866309
## 60 3661059 2850187 3848694 3349440 3272576 3259970
## 61 3611989 2765335 3783231 3274283 3197420 3216057
###
# 모델 신뢰구간
#confint(model1)
#confint(model2, level = 0.99)
#confint(model3, level = 0.99)
#confint(model4, level = 0.99)
#confint(model1_2, level = 0.99)
#confint(model2_2, level = 0.99)
#confint(model3_2, level = 0.99)
#confint(model4_2, level = 0.99)
### 예측기간 관측치 더하기 (검증을 위한 사후 처리...)
test_btv_pre_roll2$REQ [test_btv_pre_roll2$Date >= '2019-05-01' & test_btv_pre_roll2$Date <= '2019-05-31'] <-
btv_pre_roll$REQ [btv_pre_roll$Date >= '2019-05-01' & btv_pre_roll$Date <= '2019-05-31']
test_btv_pre_roll3$REQ [test_btv_pre_roll3$Date >= '2019-06-01' & test_btv_pre_roll3$Date <= '2019-06-30'] <-
btv_pre_roll$REQ [btv_pre_roll$Date >= '2019-06-01' & btv_pre_roll$Date <= '2019-06-30']
cor(test_btv_pre_roll2$REQ, test_btv_pre_roll2$m1_fit)^2
## [1] 0.7858214
cor(test_btv_pre_roll2$REQ, test_btv_pre_roll2$m2_fit)^2
## [1] 0.7858214
cor(test_btv_pre_roll2$REQ, test_btv_pre_roll2$m3_fit)^2
## [1] 0.7855516
cor(test_btv_pre_roll2$REQ, test_btv_pre_roll2$m4_fit)^2
## [1] 0.7841779
cor(test_btv_pre_roll3$REQ, test_btv_pre_roll3$m1_fit)^2
## [1] 0.841047
cor(test_btv_pre_roll3$REQ, test_btv_pre_roll3$m2_fit)^2
## [1] 0.841047
cor(test_btv_pre_roll3$REQ, test_btv_pre_roll3$m3_fit)^2
## [1] 0.8389602
cor(test_btv_pre_roll3$REQ, test_btv_pre_roll3$m4_fit)^2
## [1] 0.8235133
# MAE
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m1_fit))
## [1] 153112.3
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m2_fit))
## [1] 153112.3
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m3_fit))
## [1] 154276.4
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m4_fit))
## [1] 155203.9
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m1_fit))
## [1] 137020.5
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m2_fit))
## [1] 137020.5
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m3_fit))
## [1] 138880.1
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m4_fit))
## [1] 147364
# MAPE
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m1_fit)/abs(test_btv_pre_roll2$REQ))*100
## [1] 4.342613
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m2_fit)/abs(test_btv_pre_roll2$REQ))*100
## [1] 4.342613
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m3_fit)/abs(test_btv_pre_roll2$REQ))*100
## [1] 4.375261
mean(abs(test_btv_pre_roll2$REQ - test_btv_pre_roll2$m4_fit)/abs(test_btv_pre_roll2$REQ))*100
## [1] 4.402082
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m1_fit)/abs(test_btv_pre_roll3$REQ))*100
## [1] 3.775664
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m2_fit)/abs(test_btv_pre_roll3$REQ))*100
## [1] 3.775664
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m3_fit)/abs(test_btv_pre_roll3$REQ))*100
## [1] 3.825968
mean(abs(test_btv_pre_roll3$REQ - test_btv_pre_roll3$m4_fit)/abs(test_btv_pre_roll3$REQ))*100
## [1] 4.062741