선형회귀(linear regression)를 통한 inven 예측 R code

if(!require(ggplot2)){install.packages("ggplot2"); library(ggplot2)}
## Loading required package: ggplot2
if(!require(dplyr)){install.packages("dplyr"); library(dplyr)}
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
rm(list =  ls())
gc()
##           used (Mb) gc trigger (Mb) max used (Mb)
## Ncells  588166 31.5    1211095 64.7  1211095 64.7
## Vcells 1063434  8.2    8388608 64.0  1753441 13.4
getwd()
## [1] "/home/mjs0428/inven_forcast"
setwd("/home/mjs0428/inven_forcast")
btv_pre_roll <- read.csv("./Btv_Preroll-Req_Inv_Res_Imp-data_20190620.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE, fileEncoding = "euc-kr")
glimpse(btv_pre_roll)
## Observations: 536
## Variables: 7
## $ Date             <chr> "2018-01-01", "2018-01-02", "2018-01-03", "2018…
## $ VOD_시청시간.초. <chr> "8,421,536,616", "7,878,348,464", "7,498,034,394", "…
## $ VOD_시청건수     <chr> "4,496,473", "4,658,646", "4,535,338", "4,500,479",…
## $ Request          <chr> "4,567,997", "4,630,415", "4,533,702", "4,490,7…
## $ Inv.             <chr> "7,377,096", "7,110,607", "6,932,244", "6,839,9…
## $ Response         <chr> "4,787,165", "4,391,051", "4,252,549", "4,101,2…
## $ IMP              <chr> "4,498,714", "4,141,349", "4,011,362", "3,872,0…
# 숫자형 데이터 변환
btv_pre_roll$Request  = as.numeric(gsub(",", "",  btv_pre_roll$Request))
btv_pre_roll$Inv. = as.numeric(gsub(",", "",  btv_pre_roll$Inv.))
btv_pre_roll$Response = as.numeric(gsub(",", "",  btv_pre_roll$Response))
btv_pre_roll$IMP = as.numeric(gsub(",", "",  btv_pre_roll$IMP))

# 숫자형 데이터 변환
btv_pre_roll$Request  <- as.numeric(gsub(",", "",  btv_pre_roll$Request))
btv_pre_roll$Inv. <- as.numeric(gsub(",", "",  btv_pre_roll$Inv.))
btv_pre_roll$Response <- as.numeric(gsub(",", "",  btv_pre_roll$Response))
btv_pre_roll$IMP <- as.numeric(gsub(",", "",  btv_pre_roll$IMP))

# 추가된 필드 
btv_pre_roll$VOD_시청시간.초.  <- round(as.numeric(gsub(",", "", btv_pre_roll$VOD_시청시간.초.))/3600,0)
btv_pre_roll <- rename(btv_pre_roll, VOD_시청시간 = VOD_시청시간.초.)
btv_pre_roll$VOD_시청건수  <- as.numeric(gsub(",", "",  btv_pre_roll$VOD_시청건수))

str(btv_pre_roll)
## 'data.frame':    536 obs. of  7 variables:
##  $ Date        : chr  "2018-01-01" "2018-01-02" "2018-01-03" "2018-01-04" ...
##  $ VOD_시청시간: num  2339316 2188430 2082787 2083401 2071845 ...
##  $ VOD_시청건수: num  4496473 4658646 4535338 4500479 4488078 ...
##  $ Request     : num  4567997 4630415 4533702 4490705 4480584 ...
##  $ Inv.        : num  7377096 7110607 6932244 6839954 6786827 ...
##  $ Response    : num  4787165 4391051 4252549 4101297 4167156 ...
##  $ IMP         : num  4498714 4141349 4011362 3872031 3928040 ...
glimpse(btv_pre_roll)
## Observations: 536
## Variables: 7
## $ Date         <chr> "2018-01-01", "2018-01-02", "2018-01-03", "2018-01-…
## $ VOD_시청시간 <dbl> 2339316, 2188430, 2082787, 2083401, 2071845, 2217020, 2…
## $ VOD_시청건수 <dbl> 4496473, 4658646, 4535338, 4500479, 4488078, 4558005, 4…
## $ Request      <dbl> 4567997, 4630415, 4533702, 4490705, 4480584, 458342…
## $ Inv.         <dbl> 7377096, 7110607, 6932244, 6839954, 6786827, 706520…
## $ Response     <dbl> 4787165, 4391051, 4252549, 4101297, 4167156, 446999…
## $ IMP          <dbl> 4498714, 4141349, 4011362, 3872031, 3928040, 420822…
tail(btv_pre_roll)
##           Date VOD_시청시간 VOD_시청건수 Request    Inv. Response     IMP
## 531 2019-06-15      1764021      3392494 3336975 5865433  3596532 3489441
## 532 2019-06-16      1787598      3351160 3296535 5824550  3489334 3388260
## 533 2019-06-17      1519081      2849966 2819099 4847480  3020758 2930798
## 534 2019-06-18      1467465      2870217 2844401 4884775  3162402 3068361
## 535 2019-06-19      1446935      2836474 2800606 4851506  3325569 3223069
## 536 2019-06-20      1442873      2794911 2762179 4794207  3257581 3158809
ggplot(btv_pre_roll, aes(x=Date)) + ggtitle("Request(grey)/ VOD_watch_cnt(blue) / VOD_watch_time(orange)") + 
  geom_point(aes(y = Request), color = "black", alpha = 0.3) +
  geom_point(aes(y = VOD_시청건수), color = "blue", alpha = 0.3) + 
  geom_point(aes(y = VOD_시청시간), color = "orange", alpha = 0.3)

# 날짜형 타입으로 변환
btv_pre_roll$Date  <- as.Date(btv_pre_roll$Date)

# 요일  구하기
btv_pre_roll$weekday = factor(weekdays(btv_pre_roll$Date), 
                              levels=c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"), 
                              order = T)
summary(btv_pre_roll$weekday)
##    Monday   Tuesday Wednesday  Thursday    Friday  Saturday    Sunday 
##        77        77        77        77        76        76        76
# 특일 구하기
# 참고 페이지 : https://m.blog.naver.com/hancury/221057426711
# 사이트 : https://www.data.go.kr/
# 메뉴명 : 마이페이지 > OPEN API > 개발계정 상세보기
#인증키 "SD5uTucKQxefBuN79R8TKiBa15fAbw5j1id%2FdBH0SsWFDK4boXqDpgMPtc8QJ8UNyaSO8HpGJf%2FAbh65oBzg7g%3D%3D"
if(!require(glue)){install.packages("glue"); library(glue)}
## Loading required package: glue
## 
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
## 
##     collapse
if(!require(XML)){install.packages("XML"); library(XML)}
## Loading required package: XML
if(!require(stringr)){install.packages("stringr"); library(stringr)}
## Loading required package: stringr
api.key <- "SD5uTucKQxefBuN79R8TKiBa15fAbw5j1id%2FdBH0SsWFDK4boXqDpgMPtc8QJ8UNyaSO8HpGJf%2FAbh65oBzg7g%3D%3D"

url.format <- 
  'http://apis.data.go.kr/B090041/openapi/service/SpcdeInfoService/getRestDeInfo?ServiceKey={key}&solYear={year}&solMonth={month}'

holiday.request <- function(key, year, month) glue(url.format)

df_holiday <- data.frame(dateName=NULL, Date=NULL)

for(m in 1:12){
  holiday_2018 <- xmlToList(holiday.request(api.key, 2018, str_pad(m, 2, pad=0)))
  holiday_2019 <- xmlToList(holiday.request(api.key, 2019, str_pad(m, 2, pad=0)))
  
  items_2018 <- holiday_2018$body$items
  items_2019 <- holiday_2019$body$items
  items_test <- holiday_2018$body$items
  items_test <- holiday_2019$body$items
  
  for(item_2018 in items_2018){
    if(item_2018$isHoliday == 'Y') {
      #print(paste(item_2018$dateName, item_2018$locdate, sep=' : '))
      df_holiday <- rbind(df_holiday, 
                          data.frame(dateName = item_2018$dateName, 
                                     Date = (paste(substr(item_2018$locdate,1,4),
                                                   substr(item_2018$locdate,5,6), 
                                                   substr(item_2018$locdate,7,8), sep = '-')),
                                     stringsAsFactors = FALSE))
    }
  }
  
  for(item_2019 in items_2019){
    if(item_2019$isHoliday == 'Y') {
      #print(paste(item_2019$dateName, item_2019$locdate, sep=' : '))
      df_holiday <- rbind(df_holiday, 
                          data.frame(dateName = item_2019$dateName, 
                                     Date = (paste(substr(item_2019$locdate,1,4),
                                                   substr(item_2019$locdate,5,6), 
                                                   substr(item_2019$locdate,7,8), sep = '-')),
                                     stringsAsFactors = FALSE))
    }
  }
  
}


# 날짜 데이터의 데이터 타입 변환
df_holiday$Date <- as.Date(df_holiday$Date)


### Left 조인하기 : 광고 데이터 + 특일 데이터..
btv_pre_roll_week_holy <- left_join(btv_pre_roll, df_holiday)
## Joining, by = "Date"
# 요일 데이터 범주화 
btv_pre_roll_week_holy$isMon <-as.numeric(btv_pre_roll_week_holy$weekday == 'Monday') # 요일 범주화
btv_pre_roll_week_holy$isTue <-as.numeric(btv_pre_roll_week_holy$weekday == 'Tuesday') # 요일 범주화
btv_pre_roll_week_holy$isWed <-as.numeric(btv_pre_roll_week_holy$weekday == 'Wednesday') # 요일 범주화
btv_pre_roll_week_holy$isThu <-as.numeric(btv_pre_roll_week_holy$weekday == 'Thursday') # 요일 범주화
btv_pre_roll_week_holy$isFri <-as.numeric(btv_pre_roll_week_holy$weekday == 'Friday') # 요일 범주화
btv_pre_roll_week_holy$isSat <-as.numeric(btv_pre_roll_week_holy$weekday == 'Saturday') # 요일 범주화
btv_pre_roll_week_holy$isSun <-as.numeric(btv_pre_roll_week_holy$weekday == 'Sunday') # 요일 범주화

# 특일 데이터 범주화
btv_pre_roll_week_holy <- mutate(btv_pre_roll_week_holy, isHolyday = as.numeric(!is.na(dateName)))

# 방학 데이터 범주화 (여름방학 : 7워~8월, 겨울방학 1월~2월)
btv_pre_roll_week_holy <- 
  mutate(btv_pre_roll_week_holy,
         isVacation = as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) %in% c("07", "08", "01", "02")))

# 월별 범주화
btv_pre_roll_week_holy$isJan <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '01') # 1월
btv_pre_roll_week_holy$isFeb <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '02') # 2월
btv_pre_roll_week_holy$isMar <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '03') # 3월
btv_pre_roll_week_holy$isApr <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '04') # 4월
btv_pre_roll_week_holy$isMay <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '05') # 5월
btv_pre_roll_week_holy$isJun <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '06') # 6월
btv_pre_roll_week_holy$isJul <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '07') # 7월
btv_pre_roll_week_holy$isAug <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '08') # 8월
btv_pre_roll_week_holy$isSep <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '09') # 9월
btv_pre_roll_week_holy$isOct <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '10') # 10월
btv_pre_roll_week_holy$isNov <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '11') # 11월
btv_pre_roll_week_holy$isDec <-as.numeric(substr(btv_pre_roll_week_holy$Date,6,7) == '12') # 12월

# 예측하기...
# 예측할 날짜로 된 수열 생성하기
s_date <- as.Date("2019-06-21")
e_date <- as.Date("2019-12-31")
add_df <- data.frame(Date = seq(from = s_date, to=e_date, by=1))
add_df$Date <- as.Date(add_df$Date)
add_df$VOD_시청시간 <- NA; add_df$VOD_시청건수 <-  NA;
add_df$Request <- NA; add_df$Inv. <- NA; add_df$Response <- NA; add_df$IMP <- NA;
add_df$weekday <- NA;  add_df$isMon <- NA; add_df$isTue <- NA; add_df$isWed<- NA; 
add_df$isThu <- NA; add_df$isFri <- NA; add_df$isSat <- NA; add_df$isSun <- NA;  
add_df$isHolyday <- NA; add_df$isVacation <- NA; 
add_df$isJan <- NA; add_df$isFeb <- NA; add_df$isMar <- NA; add_df$isApr <- NA;  
add_df$isMay <- NA; add_df$isJun <- NA; add_df$isJul <- NA; add_df$isAug <- NA; 
add_df$isSep <- NA; add_df$isOct <- NA; add_df$isNov <- NA; add_df$isDec <- NA; 


# 예측할 날짜에 요일데이터 및 특일데이터추가....
# 예측할 날짜에 요일 데이터 추가
add_df$weekday = weekdays(add_df$Date)
# 예측할 날짜에 요일 데이터 범주화 추가

add_df$isMon <-as.numeric(add_df$weekday == 'Monday') # 요일 범주화
add_df$isTue <-as.numeric(add_df$weekday == 'Tuesday') # 요일 범주화
add_df$isWed <-as.numeric(add_df$weekday == 'Wednesday') # 요일 범주화
add_df$isThu <-as.numeric(add_df$weekday == 'Thursday') # 요일 범주화
add_df$isFri <-as.numeric(add_df$weekday == 'Friday') # 요일 범주화
add_df$isSat <-as.numeric(add_df$weekday == 'Saturday') # 요일 범주화
add_df$isSun <-as.numeric(add_df$weekday == 'Sunday') # 요일 범주화

# 특일 데이터 추가
add_df <- left_join(add_df, df_holiday)
## Joining, by = "Date"
# 특일 데이터 범주화 추가
add_df$isHolyday <- as.numeric(!is.na(add_df$dateName))

# 방학 데이터 범주화 추가 (여름방학 : 7워~8월, 겨울방학 12월~2월)
add_df$isVacation <- as.numeric(substr(add_df$Date,6,7) %in% c("07", "08", "01", "02")) 


# 월별 범주화
add_df$isJan <-as.numeric(substr(add_df$Date,6,7) == '01') # 1월
add_df$isFeb <-as.numeric(substr(add_df$Date,6,7) == '02') # 2월
add_df$isMar <-as.numeric(substr(add_df$Date,6,7) == '03') # 3월
add_df$isApr <-as.numeric(substr(add_df$Date,6,7) == '04') # 4월
add_df$isMay <-as.numeric(substr(add_df$Date,6,7) == '05') # 5월
add_df$isJun <-as.numeric(substr(add_df$Date,6,7) == '06') # 6월
add_df$isJul <-as.numeric(substr(add_df$Date,6,7) == '07') # 7월
add_df$isAug <-as.numeric(substr(add_df$Date,6,7) == '08') # 8월
add_df$isSep <-as.numeric(substr(add_df$Date,6,7) == '09') # 9월
add_df$isOct <-as.numeric(substr(add_df$Date,6,7) == '10') # 10월
add_df$isNov <-as.numeric(substr(add_df$Date,6,7) == '11') # 11월
add_df$isDec <-as.numeric(substr(add_df$Date,6,7) == '12') # 12월


# 기존 데이터와 예측치 결합
btv_pre_roll_week_holy2 <- rbind(btv_pre_roll_week_holy, add_df)

# ★★★★★ interation model
old_btv_pre_roll <- lm(Request ~ Date + isMon + isTue + isWed + isThu + isFri + isSat + isSun + isHolyday + isVacation + 
                         isMon:Date + isTue:Date + isWed:Date + isThu:Date + isFri:Date + isSat:Date + isSun:Date + isHolyday:Date + isVacation:Date + 
                         isMon:isHolyday + isTue:isHolyday + isWed:isHolyday + isThu:isHolyday + isFri:isHolyday + isSat:isHolyday + isSun:isHolyday + isVacation:isHolyday +
                         isMon:isVacation + isTue:isVacation + isWed:isVacation + isThu:isVacation + isFri:isVacation + isSat:isVacation + isSun:isVacation, 
                       data = btv_pre_roll_week_holy)



int_btv_pre_roll <- lm(Request ~ Date + isMon + isTue + isWed + isThu + isFri + isSat + isHolyday + 
                         isJan + isFeb + isMar + isApr + isMay + isJun + isJul + isAug + isSep + 
                         isOct + isNov +  isMon:isHolyday + isTue:isHolyday + isWed:isHolyday + 
                         isThu:isHolyday + isFri:isHolyday + isSat:isHolyday + isMon:isJan + 
                         isMon:isFeb + isMon:isMar + isMon:isApr + isMon:isMay + isMon:isJun + 
                         isMon:isJul + isMon:isSep + isMon:isOct + isMon:isNov + isTue:isFeb + 
                         isTue:isMar + isTue:isApr + isTue:isMay + isTue:isJun + isTue:isJul + 
                         isTue:isSep + isTue:isOct + isTue:isNov + isWed:isFeb + isWed:isMar + 
                         isWed:isApr + isWed:isJun + isWed:isSep + isWed:isOct + isWed:isNov + 
                         isThu:isFeb + isThu:isMar + isThu:isApr + isThu:isSep + isThu:isOct + 
                         isFri:isFeb + isFri:isMar + isFri:isApr + isFri:isAug + isFri:isSep + 
                         isSat:isFeb + isSat:isSep, 
                       data = btv_pre_roll_week_holy)
  
summary(int_btv_pre_roll) ## interaction effect
## 
## Call:
## lm(formula = Request ~ Date + isMon + isTue + isWed + isThu + 
##     isFri + isSat + isHolyday + isJan + isFeb + isMar + isApr + 
##     isMay + isJun + isJul + isAug + isSep + isOct + isNov + isMon:isHolyday + 
##     isTue:isHolyday + isWed:isHolyday + isThu:isHolyday + isFri:isHolyday + 
##     isSat:isHolyday + isMon:isJan + isMon:isFeb + isMon:isMar + 
##     isMon:isApr + isMon:isMay + isMon:isJun + isMon:isJul + isMon:isSep + 
##     isMon:isOct + isMon:isNov + isTue:isFeb + isTue:isMar + isTue:isApr + 
##     isTue:isMay + isTue:isJun + isTue:isJul + isTue:isSep + isTue:isOct + 
##     isTue:isNov + isWed:isFeb + isWed:isMar + isWed:isApr + isWed:isJun + 
##     isWed:isSep + isWed:isOct + isWed:isNov + isThu:isFeb + isThu:isMar + 
##     isThu:isApr + isThu:isSep + isThu:isOct + isFri:isFeb + isFri:isMar + 
##     isFri:isApr + isFri:isAug + isFri:isSep + isSat:isFeb + isSat:isSep, 
##     data = btv_pre_roll_week_holy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2628097  -118726     -562   148608  1219153 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.499e+07  1.774e+06   8.453 3.55e-16 ***
## Date            -6.216e+02  9.912e+01  -6.271 8.11e-10 ***
## isMon           -1.261e+05  1.296e+05  -0.973 0.331078    
## isTue           -2.591e+05  9.847e+04  -2.632 0.008776 ** 
## isWed           -3.010e+05  7.772e+04  -3.872 0.000123 ***
## isThu           -3.605e+05  7.007e+04  -5.145 3.93e-07 ***
## isFri           -3.982e+05  7.092e+04  -5.615 3.36e-08 ***
## isSat            3.862e+04  6.039e+04   0.640 0.522761    
## isHolyday        7.708e+04  2.070e+05   0.372 0.709783    
## isJan            6.092e+05  8.001e+04   7.614 1.47e-13 ***
## isFeb            1.022e+05  1.418e+05   0.720 0.471674    
## isMar            1.321e+05  1.046e+05   1.262 0.207564    
## isApr            4.226e+04  1.079e+05   0.392 0.695420    
## isMay           -3.724e+05  8.299e+04  -4.487 9.09e-06 ***
## isJun           -4.103e+05  9.142e+04  -4.488 9.06e-06 ***
## isJul           -1.223e+05  9.915e+04  -1.233 0.218137    
## isAug           -9.386e+04  9.092e+04  -1.032 0.302447    
## isSep           -1.077e+06  1.725e+05  -6.243 9.57e-10 ***
## isOct            2.929e+04  1.186e+05   0.247 0.804974    
## isNov            1.681e+04  1.034e+05   0.162 0.870997    
## isMon:isHolyday  9.055e+04  2.664e+05   0.340 0.734094    
## isTue:isHolyday  2.390e+05  2.550e+05   0.937 0.349087    
## isWed:isHolyday  5.607e+05  2.584e+05   2.170 0.030504 *  
## isThu:isHolyday  1.589e+05  2.916e+05   0.545 0.586211    
## isFri:isHolyday -3.719e+05  3.263e+05  -1.140 0.254908    
## isSat:isHolyday -7.589e+05  3.211e+05  -2.363 0.018524 *  
## isMon:isJan     -1.483e+05  1.731e+05  -0.857 0.391872    
## isMon:isFeb     -4.503e+04  2.132e+05  -0.211 0.832833    
## isMon:isMar     -5.135e+05  1.896e+05  -2.709 0.006996 ** 
## isMon:isApr     -4.797e+05  1.839e+05  -2.609 0.009382 ** 
## isMon:isMay     -6.268e+04  1.835e+05  -0.342 0.732753    
## isMon:isJun     -3.149e+05  1.882e+05  -1.673 0.095057 .  
## isMon:isJul     -6.286e+05  2.076e+05  -3.028 0.002597 ** 
## isMon:isSep      2.765e+05  2.656e+05   1.041 0.298418    
## isMon:isOct     -7.044e+05  2.183e+05  -3.227 0.001337 ** 
## isMon:isNov     -3.643e+05  2.233e+05  -1.631 0.103558    
## isTue:isFeb     -2.513e+04  1.940e+05  -0.130 0.896987    
## isTue:isMar     -4.555e+05  1.698e+05  -2.683 0.007554 ** 
## isTue:isApr     -4.212e+05  1.672e+05  -2.518 0.012118 *  
## isTue:isMay     -6.350e+04  1.511e+05  -0.420 0.674495    
## isTue:isJun     -2.133e+05  1.681e+05  -1.269 0.204982    
## isTue:isJul     -2.013e+05  1.895e+05  -1.063 0.288516    
## isTue:isSep      5.299e+05  2.486e+05   2.132 0.033562 *  
## isTue:isOct     -2.205e+05  2.008e+05  -1.098 0.272819    
## isTue:isNov     -3.479e+05  2.066e+05  -1.684 0.092782 .  
## isWed:isFeb      2.199e+05  1.856e+05   1.185 0.236798    
## isWed:isMar     -3.877e+05  1.586e+05  -2.444 0.014896 *  
## isWed:isApr     -3.902e+05  1.608e+05  -2.426 0.015629 *  
## isWed:isJun     -2.136e+05  1.615e+05  -1.323 0.186432    
## isWed:isSep      5.906e+05  2.427e+05   2.433 0.015326 *  
## isWed:isOct     -2.952e+05  1.934e+05  -1.527 0.127511    
## isWed:isNov     -2.062e+05  1.974e+05  -1.045 0.296685    
## isThu:isFeb      2.439e+05  1.831e+05   1.332 0.183571    
## isThu:isMar     -2.437e+05  1.509e+05  -1.615 0.106931    
## isThu:isApr     -3.231e+05  1.573e+05  -2.055 0.040435 *  
## isThu:isSep      7.272e+05  2.381e+05   3.054 0.002388 ** 
## isThu:isOct     -2.841e+05  2.030e+05  -1.400 0.162257    
## isFri:isFeb      2.360e+05  1.850e+05   1.275 0.202887    
## isFri:isMar     -1.308e+05  1.482e+05  -0.883 0.377941    
## isFri:isApr     -2.548e+05  1.576e+05  -1.616 0.106736    
## isFri:isAug      2.557e+05  1.757e+05   1.455 0.146298    
## isFri:isSep      7.937e+05  2.384e+05   3.329 0.000941 ***
## isSat:isFeb      1.286e+05  1.805e+05   0.712 0.476591    
## isSat:isSep      9.338e+05  2.235e+05   4.177 3.51e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 336000 on 472 degrees of freedom
## Multiple R-squared:  0.6431, Adjusted R-squared:  0.5954 
## F-statistic:  13.5 on 63 and 472 DF,  p-value: < 2.2e-16
# 점 추정 예측
#predict(int_btv_pre_roll, newdata=btv_pre_roll_week_holy2, type='response')
#interval이 confidence로 주어진 경우 회귀 계수에 대한 신뢰 구간을 고려하여 종속 변수의 신뢰 구간(confidence interval)을 찾는다.
#predict(int_btv_pre_roll, newdata=btv_pre_roll_week_holy2, type='response', interval="confidence")
#interval이 prediction인 경우 회귀 계수의 신뢰 구간과 오차항을 고려한 종속 변수의 예측 구간(prediction interval)을 찾는다. 
#(구간 크기 : confidence < prediction)
# predict(int_btv_pre_roll, newdata=btv_pre_roll_week_holy2, type='response', interval="prediction")
# 구간 추정 예측 : fit =  예측값, lwr = 예측하한값, upr = 예측 상한값
#interval="prediction" 구간 추정
pred_interval <- predict(int_btv_pre_roll, newdata=btv_pre_roll_week_holy2, type='response', interval="prediction")
conf_interval <- predict(int_btv_pre_roll, newdata=btv_pre_roll_week_holy2, type='response', interval="confidence")
str(pred_interval)
##  num [1:730, 1:3] 4598051 4445108 4402664 4342467 4304146 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:730] "1" "2" "3" "4" ...
##   ..$ : chr [1:3] "fit" "lwr" "upr"
str(conf_interval)
##  num [1:730, 1:3] 4598051 4445108 4402664 4342467 4304146 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:730] "1" "2" "3" "4" ...
##   ..$ : chr [1:3] "fit" "lwr" "upr"
btv_pre_roll_week_holy2$prd_fit <- pred_interval[,1] # fit
btv_pre_roll_week_holy2$prd_lwr <- pred_interval[,2] # lwr
btv_pre_roll_week_holy2$prd_upr <- pred_interval[,3] # upr
btv_pre_roll_week_holy2$cnf_fit <- conf_interval[,1] # fit
btv_pre_roll_week_holy2$cnf_lwr <- conf_interval[,2] # lwr
btv_pre_roll_week_holy2$cnf_upr <- conf_interval[,3] # upr


#btv_pre_roll_week_holy2$old_pred <- round(predict(old_btv_pre_roll, newdata=btv_pre_roll_week_holy2, type='response'), 0)
#btv_pre_roll_week_holy2$int_pred <- round(predict(int_btv_pre_roll, newdata=btv_pre_roll_week_holy2, type='response'), 0)
btv_pre_roll_week_holy2$old_pred <- round(predict(old_btv_pre_roll, newdata=btv_pre_roll_week_holy2), 0)
## Warning in predict.lm(old_btv_pre_roll, newdata = btv_pre_roll_week_holy2):
## prediction from a rank-deficient fit may be misleading
btv_pre_roll_week_holy2$int_pred <- round(predict(int_btv_pre_roll, newdata=btv_pre_roll_week_holy2), 0)


as.numeric(as.Date('2018-01-01'))
## [1] 17532
summary(int_btv_pre_roll)
## 
## Call:
## lm(formula = Request ~ Date + isMon + isTue + isWed + isThu + 
##     isFri + isSat + isHolyday + isJan + isFeb + isMar + isApr + 
##     isMay + isJun + isJul + isAug + isSep + isOct + isNov + isMon:isHolyday + 
##     isTue:isHolyday + isWed:isHolyday + isThu:isHolyday + isFri:isHolyday + 
##     isSat:isHolyday + isMon:isJan + isMon:isFeb + isMon:isMar + 
##     isMon:isApr + isMon:isMay + isMon:isJun + isMon:isJul + isMon:isSep + 
##     isMon:isOct + isMon:isNov + isTue:isFeb + isTue:isMar + isTue:isApr + 
##     isTue:isMay + isTue:isJun + isTue:isJul + isTue:isSep + isTue:isOct + 
##     isTue:isNov + isWed:isFeb + isWed:isMar + isWed:isApr + isWed:isJun + 
##     isWed:isSep + isWed:isOct + isWed:isNov + isThu:isFeb + isThu:isMar + 
##     isThu:isApr + isThu:isSep + isThu:isOct + isFri:isFeb + isFri:isMar + 
##     isFri:isApr + isFri:isAug + isFri:isSep + isSat:isFeb + isSat:isSep, 
##     data = btv_pre_roll_week_holy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2628097  -118726     -562   148608  1219153 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.499e+07  1.774e+06   8.453 3.55e-16 ***
## Date            -6.216e+02  9.912e+01  -6.271 8.11e-10 ***
## isMon           -1.261e+05  1.296e+05  -0.973 0.331078    
## isTue           -2.591e+05  9.847e+04  -2.632 0.008776 ** 
## isWed           -3.010e+05  7.772e+04  -3.872 0.000123 ***
## isThu           -3.605e+05  7.007e+04  -5.145 3.93e-07 ***
## isFri           -3.982e+05  7.092e+04  -5.615 3.36e-08 ***
## isSat            3.862e+04  6.039e+04   0.640 0.522761    
## isHolyday        7.708e+04  2.070e+05   0.372 0.709783    
## isJan            6.092e+05  8.001e+04   7.614 1.47e-13 ***
## isFeb            1.022e+05  1.418e+05   0.720 0.471674    
## isMar            1.321e+05  1.046e+05   1.262 0.207564    
## isApr            4.226e+04  1.079e+05   0.392 0.695420    
## isMay           -3.724e+05  8.299e+04  -4.487 9.09e-06 ***
## isJun           -4.103e+05  9.142e+04  -4.488 9.06e-06 ***
## isJul           -1.223e+05  9.915e+04  -1.233 0.218137    
## isAug           -9.386e+04  9.092e+04  -1.032 0.302447    
## isSep           -1.077e+06  1.725e+05  -6.243 9.57e-10 ***
## isOct            2.929e+04  1.186e+05   0.247 0.804974    
## isNov            1.681e+04  1.034e+05   0.162 0.870997    
## isMon:isHolyday  9.055e+04  2.664e+05   0.340 0.734094    
## isTue:isHolyday  2.390e+05  2.550e+05   0.937 0.349087    
## isWed:isHolyday  5.607e+05  2.584e+05   2.170 0.030504 *  
## isThu:isHolyday  1.589e+05  2.916e+05   0.545 0.586211    
## isFri:isHolyday -3.719e+05  3.263e+05  -1.140 0.254908    
## isSat:isHolyday -7.589e+05  3.211e+05  -2.363 0.018524 *  
## isMon:isJan     -1.483e+05  1.731e+05  -0.857 0.391872    
## isMon:isFeb     -4.503e+04  2.132e+05  -0.211 0.832833    
## isMon:isMar     -5.135e+05  1.896e+05  -2.709 0.006996 ** 
## isMon:isApr     -4.797e+05  1.839e+05  -2.609 0.009382 ** 
## isMon:isMay     -6.268e+04  1.835e+05  -0.342 0.732753    
## isMon:isJun     -3.149e+05  1.882e+05  -1.673 0.095057 .  
## isMon:isJul     -6.286e+05  2.076e+05  -3.028 0.002597 ** 
## isMon:isSep      2.765e+05  2.656e+05   1.041 0.298418    
## isMon:isOct     -7.044e+05  2.183e+05  -3.227 0.001337 ** 
## isMon:isNov     -3.643e+05  2.233e+05  -1.631 0.103558    
## isTue:isFeb     -2.513e+04  1.940e+05  -0.130 0.896987    
## isTue:isMar     -4.555e+05  1.698e+05  -2.683 0.007554 ** 
## isTue:isApr     -4.212e+05  1.672e+05  -2.518 0.012118 *  
## isTue:isMay     -6.350e+04  1.511e+05  -0.420 0.674495    
## isTue:isJun     -2.133e+05  1.681e+05  -1.269 0.204982    
## isTue:isJul     -2.013e+05  1.895e+05  -1.063 0.288516    
## isTue:isSep      5.299e+05  2.486e+05   2.132 0.033562 *  
## isTue:isOct     -2.205e+05  2.008e+05  -1.098 0.272819    
## isTue:isNov     -3.479e+05  2.066e+05  -1.684 0.092782 .  
## isWed:isFeb      2.199e+05  1.856e+05   1.185 0.236798    
## isWed:isMar     -3.877e+05  1.586e+05  -2.444 0.014896 *  
## isWed:isApr     -3.902e+05  1.608e+05  -2.426 0.015629 *  
## isWed:isJun     -2.136e+05  1.615e+05  -1.323 0.186432    
## isWed:isSep      5.906e+05  2.427e+05   2.433 0.015326 *  
## isWed:isOct     -2.952e+05  1.934e+05  -1.527 0.127511    
## isWed:isNov     -2.062e+05  1.974e+05  -1.045 0.296685    
## isThu:isFeb      2.439e+05  1.831e+05   1.332 0.183571    
## isThu:isMar     -2.437e+05  1.509e+05  -1.615 0.106931    
## isThu:isApr     -3.231e+05  1.573e+05  -2.055 0.040435 *  
## isThu:isSep      7.272e+05  2.381e+05   3.054 0.002388 ** 
## isThu:isOct     -2.841e+05  2.030e+05  -1.400 0.162257    
## isFri:isFeb      2.360e+05  1.850e+05   1.275 0.202887    
## isFri:isMar     -1.308e+05  1.482e+05  -0.883 0.377941    
## isFri:isApr     -2.548e+05  1.576e+05  -1.616 0.106736    
## isFri:isAug      2.557e+05  1.757e+05   1.455 0.146298    
## isFri:isSep      7.937e+05  2.384e+05   3.329 0.000941 ***
## isSat:isFeb      1.286e+05  1.805e+05   0.712 0.476591    
## isSat:isSep      9.338e+05  2.235e+05   4.177 3.51e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 336000 on 472 degrees of freedom
## Multiple R-squared:  0.6431, Adjusted R-squared:  0.5954 
## F-statistic:  13.5 on 63 and 472 DF,  p-value: < 2.2e-16
round(coef(int_btv_pre_roll),4) # 절편과 기울기를 보여주는 함수는 coef()이다.
##     (Intercept)            Date           isMon           isTue 
##   14993480.7174       -621.5944    -126117.7131    -259127.3872 
##           isWed           isThu           isFri           isSat 
##    -300950.0486    -360524.8677    -398224.5950      38621.8208 
##       isHolyday           isJan           isFeb           isMar 
##      77079.9411     609169.3094     102157.7658     132069.5337 
##           isApr           isMay           isJun           isJul 
##      42257.0060    -372362.9828    -410254.8696    -122265.1892 
##           isAug           isSep           isOct           isNov 
##     -93857.2221   -1076852.3234      29293.9191      16806.3765 
## isMon:isHolyday isTue:isHolyday isWed:isHolyday isThu:isHolyday 
##      90551.8062     238999.7573     560724.9489     158854.7147 
## isFri:isHolyday isSat:isHolyday     isMon:isJan     isMon:isFeb 
##    -371915.6405    -758876.0621    -148320.0972     -45027.7859 
##     isMon:isMar     isMon:isApr     isMon:isMay     isMon:isJun 
##    -513508.6919    -479653.9902     -62679.2893    -314870.4363 
##     isMon:isJul     isMon:isSep     isMon:isOct     isMon:isNov 
##    -628584.6865     276477.4784    -704411.5033    -364270.8368 
##     isTue:isFeb     isTue:isMar     isTue:isApr     isTue:isMay 
##     -25130.3863    -455512.4234    -421187.6038     -63503.2404 
##     isTue:isJun     isTue:isJul     isTue:isSep     isTue:isOct 
##    -213314.3106    -201306.4179     529883.7592    -220465.5744 
##     isTue:isNov     isWed:isFeb     isWed:isMar     isWed:isApr 
##    -347949.5683     219867.9706    -387718.6675    -390246.2463 
##     isWed:isJun     isWed:isSep     isWed:isOct     isWed:isNov 
##    -213628.8805     590634.2171    -295228.7569    -206184.3124 
##     isThu:isFeb     isThu:isMar     isThu:isApr     isThu:isSep 
##     243874.2079    -243693.9753    -323146.0828     727180.3531 
##     isThu:isOct     isFri:isFeb     isFri:isMar     isFri:isApr 
##    -284114.2459     235957.3686    -130787.6928    -254768.3861 
##     isFri:isAug     isFri:isSep     isSat:isFeb     isSat:isSep 
##     255722.8495     793687.6748     128606.4749     933844.7731
#write.csv(btv_pre_roll_week_holy2, '111.csv')

p1 <- ggplot(btv_pre_roll_week_holy, aes(x=Date, y=Request)) +
  geom_point() + geom_smooth(method = 'lm', color = 'red', linetype =2) + # 직선 추세선 추가
  geom_smooth() + # 곡선 추세선 추가
  ggtitle("Raw Data 추세선")

p2 <- ggplot(btv_pre_roll_week_holy2, aes(x=Date)) +
  geom_point(aes(y = Request), color = "grey") + 
  geom_jitter(aes(y = int_pred), color = "red", alpha = 0.5, size = 1) + 
  ggtitle("interation 예측")

if(!require(gridExtra)){install.packages("gridExtra"); library(gridExtra)} # ggplot2 그래프를 하나의 그림 안에 위치시킬 수 있게 해준다. 
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(p1, p2, nrow  =2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 194 rows containing missing values (geom_point).

grid.arrange(p1, p2, ncol  =2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 194 rows containing missing values (geom_point).

as.numeric(as.Date('2018-01-01')); as.numeric(as.Date('2018-01-02')); as.numeric(as.Date('2019-12-31'))
## [1] 17532
## [1] 17533
## [1] 18261
as.numeric(as.Date('2019-12-31'))-as.numeric(as.Date('2018-01-01'))
## [1] 729