library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
rm(list=ls())
getwd()
## [1] "C:/data"
subway_202210<-read.delim("CARD_SUBWAY_MONTH_202210.txt", fileEncoding = "euc-kr")
glimpse(subway_202210)
## Rows: 18,785
## Columns: 6
## $ 사용일자 <int> 20221001, 20221001, 20221001, 20221001, 20221001, 2022100…
## $ 노선명 <chr> "3호선", "3호선", "3호선", "3호선", "3호선", "3호선", "3…
## $ 역명 <chr> "고속터미널", "교대(법원.검찰청)", "학여울", "대청", "일…
## $ 승차총승객수 <int> 59124, 8040, 3355, 6517, 6231, 15481, 6913, 4490, 4155, 1…
## $ 하차총승객수 <int> 62989, 4875, 3401, 5926, 6025, 15390, 6566, 4231, 3923, 1…
## $ 등록일자 <int> 20221004, 20221004, 20221004, 20221004, 20221004, 2022100…
str(subway_202210)
## 'data.frame': 18785 obs. of 6 variables:
## $ 사용일자 : int 20221001 20221001 20221001 20221001 20221001 20221001 20221001 20221001 20221001 20221001 ...
## $ 노선명 : chr "3호선" "3호선" "3호선" "3호선" ...
## $ 역명 : chr "고속터미널" "교대(법원.검찰청)" "학여울" "대청" ...
## $ 승차총승객수: int 59124 8040 3355 6517 6231 15481 6913 4490 4155 10551 ...
## $ 하차총승객수: int 62989 4875 3401 5926 6025 15390 6566 4231 3923 10189 ...
## $ 등록일자 : int 20221004 20221004 20221004 20221004 20221004 20221004 20221004 20221004 20221004 20221004 ...
subway_202210<-subway_202210 %>%
rename(date="사용일자",
line="노선명",
station="역명",
on_pass="승차총승객수",
off_pass="하차총승객수") %>%
select(-"등록일자")
summary(subway_202210)
## date line station on_pass
## Min. :20221001 Length:18785 Length:18785 Min. : 1
## 1st Qu.:20221008 Class :character Class :character 1st Qu.: 3802
## Median :20221016 Mode :character Mode :character Median : 7865
## Mean :20221016 Mean :10917
## 3rd Qu.:20221024 3rd Qu.:14432
## Max. :20221031 Max. :95408
## off_pass
## Min. : 0
## 1st Qu.: 3615
## Median : 7580
## Mean : 10875
## 3rd Qu.: 14197
## Max. :102651
#2-1
subway_202210 %>% summarise(on_p=mean(on_pass), off_p=mean(off_pass))
## on_p off_p
## 1 10916.98 10875.09
#2-2
subway_202210 %>% filter(on_pass==max(on_pass))
## date line station on_pass off_pass
## 1 20221028 2호선 잠실(송파구청) 95408 95061
#2-3
subway_202210 %>% group_by(station) %>%
mutate(total_pass=on_pass+off_pass) %>% summarise(m=mean(total_pass)) %>%
arrange(desc(m)) %>% head(3)
## # A tibble: 3 × 2
## station m
## <chr> <dbl>
## 1 강남 139260.
## 2 구로디지털단지 104601.
## 3 삼성(무역센터) 94918.
#2-4
subway_202210 %>% mutate(total_pass=on_pass+off_pass) %>% filter(line=="1호선") %>%
filter(total_pass==max(total_pass))
## date line station on_pass off_pass total_pass
## 1 20221028 1호선 서울역 61206 60155 121361
#2-5
table(subway_202210$date)
##
## 20221001 20221002 20221003 20221004 20221005 20221006 20221007 20221008
## 606 606 604 606 607 606 606 606
## 20221009 20221010 20221011 20221012 20221013 20221014 20221015 20221016
## 605 605 607 604 606 607 605 606
## 20221017 20221018 20221019 20221020 20221021 20221022 20221023 20221024
## 606 606 607 607 606 606 606 606
## 20221025 20221026 20221027 20221028 20221029 20221030 20221031
## 605 606 607 608 607 604 606
subway_202210$day<-substr(subway_202210$date,7,8)
table(subway_202210$day)
##
## 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20
## 606 606 604 606 607 606 606 606 605 605 607 604 606 607 605 606 606 606 607 607
## 21 22 23 24 25 26 27 28 29 30 31
## 606 606 606 606 605 606 607 608 607 604 606
subway_202210$day<-as.numeric(subway_202210$day)
subway_202210$week<-ifelse(subway_202210$day%in%c(1,2,8,9,15,16,22,23,29,30),
"weekend","weekday")
table(subway_202210$week)
##
## weekday weekend
## 12728 6057
options(scipen=999)
subway_202210<-subway_202210 %>% mutate(total_pass=on_pass+off_pass)
t.test(data=subway_202210,total_pass~week)
##
## Welch Two Sample t-test
##
## data: total_pass by week
## t = 23.347, df = 15046, p-value < 0.00000000000000022
## alternative hypothesis: true difference in means between group weekday and group weekend is not equal to 0
## 95 percent confidence interval:
## 6546.031 7745.939
## sample estimates:
## mean in group weekday mean in group weekend
## 24096.21 16950.23