Data (신한카드 코로나19 카드이용건수)
# 사용된 패키지
library(plyr)
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(stringr)
library(data.table)
##
## 다음의 패키지를 부착합니다: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(ggplot2)
filelist <- list.files("C:/Users/ysm64/Desktop/Coding/DB/card db", full.names = T)
db <- NULL
for (i in filelist) {
x <- read_xlsx(i, col_names = T)
db <- rbind.fill(db, x)
}
db
## # A tibble: 5,538 x 3
## 이용일자 업종대분류 `카드결제건수(천건)`
## <dbl> <chr> <dbl>
## 1 20190101 가전/가구 149
## 2 20190101 가정생활/서비스 189
## 3 20190101 교육/학원 142
## 4 20190101 미용 464
## 5 20190101 스포츠/문화/레저 1752
## 6 20190101 여행/교통 260
## 7 20190101 요식/유흥 9264
## 8 20190101 유통 11307
## 9 20190101 음/식료품 1192
## 10 20190101 의료 432
## # ... with 5,528 more rows
#카드결제건수(천건)
data1 <- db %>% data.table()
head(data1)
## 이용일자 업종대분류 카드결제건수(천건)
## 1: 20190101 가전/가구 149
## 2: 20190101 가정생활/서비스 189
## 3: 20190101 교육/학원 142
## 4: 20190101 미용 464
## 5: 20190101 스포츠/문화/레저 1752
## 6: 20190101 여행/교통 260
## 이용일자 업종대분류 카드결제건수(천건)
## 1: 20200630 유통 13551
## 2: 20200630 음/식료품 1549
## 3: 20200630 의료 2887
## 4: 20200630 자동차 418
## 5: 20200630 주유 2008
## 6: 20200630 패션/잡화 360
## Classes 'data.table' and 'data.frame': 5538 obs. of 3 variables:
## $ 이용일자 : num 20190101 20190101 20190101 20190101 20190101 ...
## $ 업종대분류 : chr "가전/가구" "가정생활/서비스" "교육/학원" "미용" ...
## $ 카드결제건수(천건): num 149 189 142 464 1752 ...
## - attr(*, ".internal.selfref")=<externalptr>
colnames(data1) <- c("Days", "Category", "Payment")
summary(data1)
## Days Category Payment
## Min. :20190101 Length:5538 Min. : 37.0
## 1st Qu.:20190417 Class :character 1st Qu.: 357.0
## Median :20190802 Mode :character Median : 587.5
## Mean :20194698 Mean : 2652.3
## 3rd Qu.:20200316 3rd Qu.: 2156.8
## Max. :20200630 Max. :16650.0
## Days Category Payment
## 1: 20190101 가전/가구 149
## 2: 20190101 가정생활/서비스 189
## 3: 20190101 교육/학원 142
## 4: 20190101 미용 464
## 5: 20190101 스포츠/문화/레저 1752
## 6: 20190101 여행/교통 260
Date
##
## 다음의 패키지를 부착합니다: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
data1$Date <- data1$Days
data1$Date <- ymd(data1$Date)
head(data1)
## Days Category Payment Date
## 1: 20190101 가전/가구 149 2019-01-01
## 2: 20190101 가정생활/서비스 189 2019-01-01
## 3: 20190101 교육/학원 142 2019-01-01
## 4: 20190101 미용 464 2019-01-01
## 5: 20190101 스포츠/문화/레저 1752 2019-01-01
## 6: 20190101 여행/교통 260 2019-01-01
## Classes 'data.table' and 'data.frame': 5538 obs. of 4 variables:
## $ Days : num 20190101 20190101 20190101 20190101 20190101 ...
## $ Category: chr "가전/가구" "가정생활/서비스" "교육/학원" "미용" ...
## $ Payment : num 149 189 142 464 1752 ...
## $ Date : Date, format: "2019-01-01" "2019-01-01" ...
## - attr(*, ".internal.selfref")=<externalptr>
data1$Day1 <- wday(data1$Date, label = T) #요일을 텍스트로 확인하기 위해 label = T 넣음.
data1$Day2 <- wday(data1$Date, label = F) #요일을 텍스트로 확인하기 위해 label = T 넣음.
data1$Week <- week(data1$Date)
data1$Month <- month(data1$Date)
data1$Q <- semester(data1$Date) #상하반기 구분 함수
data1$Q4 <- quarter(data1$Date) #4분기 구분 함수
data1$Year <- year(data1$Date)
data1 <- data1 %>% mutate(날짜 = make_datetime(Year, Month, Day2)) #make_datetime : 년, 월, 일, 분, 초 순서임.
data1$DATE_V <- make_datetime(data1$Year, data1$Month, data1$Day2)
head(data1)
## Days Category Payment Date Day1 Day2 Week Month Q Q4 Year
## 1: 20190101 가전/가구 149 2019-01-01 화 3 1 1 1 1 2019
## 2: 20190101 가정생활/서비스 189 2019-01-01 화 3 1 1 1 1 2019
## 3: 20190101 교육/학원 142 2019-01-01 화 3 1 1 1 1 2019
## 4: 20190101 미용 464 2019-01-01 화 3 1 1 1 1 2019
## 5: 20190101 스포츠/문화/레저 1752 2019-01-01 화 3 1 1 1 1 2019
## 6: 20190101 여행/교통 260 2019-01-01 화 3 1 1 1 1 2019
## 날짜 DATE_V
## 1: 2019-01-03 2019-01-03
## 2: 2019-01-03 2019-01-03
## 3: 2019-01-03 2019-01-03
## 4: 2019-01-03 2019-01-03
## 5: 2019-01-03 2019-01-03
## 6: 2019-01-03 2019-01-03
## Classes 'data.table' and 'data.frame': 5538 obs. of 13 variables:
## $ Days : num 20190101 20190101 20190101 20190101 20190101 ...
## $ Category: chr "가전/가구" "가정생활/서비스" "교육/학원" "미용" ...
## $ Payment : num 149 189 142 464 1752 ...
## $ Date : Date, format: "2019-01-01" "2019-01-01" ...
## $ Day1 : Ord.factor w/ 7 levels "일"<"월"<"화"<..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Day2 : num 3 3 3 3 3 3 3 3 3 3 ...
## $ Week : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Month : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Q : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Q4 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Year : num 2019 2019 2019 2019 2019 ...
## $ 날짜 : POSIXct, format: "2019-01-03" "2019-01-03" ...
## $ DATE_V : POSIXct, format: "2019-01-03" "2019-01-03" ...
## - attr(*, ".internal.selfref")=<externalptr>
Plot_2
options(scipen = 100) #자연상수 e 없애는 함수.
DB <- data1 %>% data.table()
C_DB <- DB[ , sum(Payment), by = c("Year", "Month", "Category")]
colnames(C_DB) <- c("Year", "Month", "Category", "Payment")
C_DB
## Year Month Category Payment
## 1: 2019 1 가전/가구 8602
## 2: 2019 1 가정생활/서비스 15904
## 3: 2019 1 교육/학원 10798
## 4: 2019 1 미용 17262
## 5: 2019 1 스포츠/문화/레저 45637
## ---
## 191: 2020 6 음/식료품 51674
## 192: 2020 6 의료 79369
## 193: 2020 6 자동차 12344
## 194: 2020 6 주유 62544
## 195: 2020 6 패션/잡화 14747
## Classes 'data.table' and 'data.frame': 195 obs. of 4 variables:
## $ Year : num 2019 2019 2019 2019 2019 ...
## $ Month : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Category: chr "가전/가구" "가정생활/서비스" "교육/학원" "미용" ...
## $ Payment : num 8602 15904 10798 17262 45637 ...
## - attr(*, ".internal.selfref")=<externalptr>
C_DB$Year <- as.character(C_DB$Year)
C_DB$Month <- as.character(C_DB$Month)
ggplot(C_DB, aes(x = Month, y = Payment, group = Year, colour = Year)) +
geom_point() +
facet_wrap(~ Category, scales="free") +
coord_cartesian(ylim = c(200, 460000)) +
labs(title = "신한카드 코로나19 카드이용건수",
x = "카드 이용 영역",
y = "카드 이용 건수(천건)")

Plot_3
options(scipen = 100)
C_DB_V <- C_DB %>% filter(Category == c("요식/유흥", "유통"))
## Warning in Category == c("요식/유흥", "유통"): 두 객체의 길이가 서로 배수관계에
## 있지 않습니다
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11291 315588 352118 322898 394153 452138
ggplot(C_DB_V, aes(x = Month, y = Payment, group = Year, colour = Year)) +
geom_point() +
facet_wrap(~ Category) +
coord_cartesian(ylim = c(10000, 460000)) +
labs(title = "신한카드 카드이용건수 TOP 2 영역",
x = "카드 이용 영역",
y = "카드 이용 건수(천건)") +
geom_text(aes(label = Payment), vjust = -0.5)
