關於澳門第三次全民核酸的初步分析,核酸檢測時間為2021年10月4日9pm -7日9pm
因第二次全民核酸已得顯著改善,故第三次核酸的安排鼓勵縮短整體時間至兩日內
(第三日僅開放部分八個檢測點以延續正常檢測工作)
這裡的分析主要聚焦看前兩日內的各站點壓力表現
以每採樣站每採樣點平均半小時採樣數做主要表達
主要想了解,在整體時間縮短至兩日內的情況下,各站點整體是否負載均衡
是否有壓力過高或過低的時段可以改善
PreventCovid-image-tag
Station-image-tag
RNA010-image-tag
filelist2 <- dir('../../RNA010/20211004', full.names=TRUE)
file2 <- tail(filelist2, 1)
sheets <- c("20211004A","20211005A","20211006A","20211007A")
df <- data.frame()
for (sheetname in sheets) {
#sheetname <- "20211004A"
### Extract first row for location list
cnames <- read_excel(file2, sheet = sheetname, n_max = 0, na = "---") %>% names()
lls1 <- sub(".*?-", "",cnames[seq(6, length(cnames), 3)])
### Extract data from 2nd row
rdf1 <- read_excel(file2, sheet=sheetname, na = "---", skip = ifelse(sheetname == "20211004A", 2, 1)) # skip 2 because there exists a hidden row 1 in this spreadsheet
### Set date
rdf1$SwabDate <- as.Date(strptime(str_remove(sheetname, "A"),"%Y%m%d"))
rdf1$SwabTime <- substr(rdf1$預約時段,1,5)
### select columns and rows
sdf1 <- rdf1 %>% select(c(6:ncol(rdf1))) %>% slice(2:nrow(rdf1)) %>% select(-contains("總人次"))
### Repeat Location info for number of rows
Location <- rep(lls1, each = nrow(sdf1) * 2)
### Melt to pivot
sdf1 <- as.data.frame(sdf1)
mdf1 <- reshape::melt(sdf1, id = c("SwabDate", "SwabTime"))
### Combine Location with dataset
df1 <- cbind(Location,mdf1)
### Clean away column names with ...
df1$variable <- sub("\\....*", "", df1$variable)
df <- rbind(df,as.data.frame(df1))
}
pdf <- df %>% pivot_wider(names_from = variable, values_from = value)
pdf <- as.data.frame(pdf)
pdf$SwabCount <- rowSums(pdf[ ,c("口咽拭", "鼻咽拭")], na.rm=TRUE)
pdf$DateTimeRound <- as.POSIXlt(paste(pdf$SwabDate, pdf$SwabTime))
attr(pdf$DateTimeRound, "tzone") <- "GMT"
pdf$HourNumber <-
sapply(strsplit(pdf$SwabTime,":"),
function(x) {
x <- as.numeric(x)
x[1]+x[2]/60
}
)
pdf <- pdf %>%
group_by(Location) %>%
mutate(AvgSwabCount = mean(SwabCount)) %>%
ungroup() %>%
mutate(
DurationHour = as.numeric((DateTimeRound - ymd_hms("2021-10-04 21:00:00")),"hours"),
DurationDay = as.numeric((DateTimeRound - ymd_hms("2021-10-04 21:00:00")),"days"),
DurationDayNumber = as.integer(as.numeric((DateTimeRound - ymd_hms("2021-10-04 21:00:00")),"days") + 1)
)
str(pdf)
## tibble [4,608 x 12] (S3: tbl_df/tbl/data.frame)
## $ Location : chr [1:4608] "聖若瑟教區中學第六校" "聖若瑟教區中學第六校" "聖若瑟教區中學第六校" "聖若瑟教區中學第六校" ...
## $ SwabDate : Date[1:4608], format: "2021-10-04" "2021-10-04" ...
## $ SwabTime : chr [1:4608] "21:00" "21:30" "22:00" "22:30" ...
## $ 口咽拭 : num [1:4608] 110 92 92 102 113 124 77 76 69 60 ...
## $ 鼻咽拭 : num [1:4608] 56 57 64 65 53 41 35 36 42 44 ...
## $ SwabCount : num [1:4608] 166 149 156 167 166 165 112 112 111 104 ...
## $ DateTimeRound : POSIXlt[1:4608], format: "2021-10-04 21:00:00" "2021-10-04 21:30:00" ...
## $ HourNumber : num [1:4608] 21 21.5 22 22.5 23 23.5 21 21.5 22 22.5 ...
## $ AvgSwabCount : num [1:4608] 103 103 103 103 103 ...
## $ DurationHour : num [1:4608] 0 0.5 1 1.5 2 2.5 0 0.5 1 1.5 ...
## $ DurationDay : num [1:4608] 0 0.0208 0.0417 0.0625 0.0833 ...
## $ DurationDayNumber: int [1:4608] 1 1 1 1 1 1 1 1 1 1 ...
filelist <- dir('../../aptmon-scraping/20211004', full.names=TRUE)
scrp <- data.frame()
for (file in filelist) {
filetimestr <- sub(".csv", "",sub(".*-", "", file))
filetime <- strptime(filetimestr,"%Y%m%d%H%M%S")
temp <- read.csv(file, na = "---")
temp$DateTime <- as.POSIXlt(filetime)
scrp <- rbind(scrp,as.data.frame(temp))
}
attr(scrp$DateTime, "tzone") <- "GMT"
scrp$DateTimeRound <- round(scrp$DateTime, "30mins")
scrp$WaitingQueue <- as.numeric(as.character(sub("*人", "", scrp$輪候人數)))
scrp$WaitingMinutes <- as.numeric(as.character(sub("分鐘", "",sub(".*>", "", scrp$等候時間))))
scrp$DeskCount <- rowSums(scrp[ ,c("口採樣點", "鼻採樣點")], na.rm=TRUE)
scrp$HourNumber <-
sapply(strsplit(substr(scrp$DateTimeRound,12,16),":"),
function(x) {
x <- as.numeric(x)
x[1]+x[2]/60
}
)
str(scrp)
## 'data.frame': 12113 obs. of 15 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ 序號 : chr "A01" "A02" "A03" "A04" ...
## $ 地點 : chr "街坊會聯合總會社區服務大樓街坊會聯合總會社區服務大樓口採樣點: 2鼻採樣點: 3輪候人數:175人等候時間:<U+2B24>35分鐘"| __truncated__ "慈幼中學慈幼中學口採樣點: 1鼻採樣點: 1輪候人數:228人等候時間:<U+2B24>114分鐘類別: (A) 關愛專站(無需預約)" "望廈體育中心三樓望廈體育中心三樓口採樣點: 3鼻採樣點: 2輪候人數:25人等候時間:<U+2B24>5分鐘類別: (A) 關愛專站(無需預約)" "塔石體育館B館塔石體育館B館口採樣點: 2鼻採樣點: 2輪候人數:30人等候時間:<U+2B24>8分鐘類別: (A) 關愛專站(無需預約)" ...
## $ 口採樣點 : int 2 1 3 2 2 1 3 4 2 3 ...
## $ 鼻採樣點 : int 3 1 2 2 1 1 2 2 2 2 ...
## $ 輪候人數 : chr "175人" "228人" "25人" "30人" ...
## $ 等候時間 : chr "<U+2B24>35分鐘" "<U+2B24>114分鐘" "<U+2B24>5分鐘" "<U+2B24>8分鐘" ...
## $ 類別 : chr "A" "A" "A" "A" ...
## $ Location : chr "街坊會聯合總會社區服務大樓" "慈幼中學" "望廈體育中心三樓" "塔石體育館B館" ...
## $ DateTime : POSIXlt, format: "2021-10-04 21:21:48" "2021-10-04 21:21:48" ...
## $ DateTimeRound : POSIXct, format: "2021-10-04 21:30:00" "2021-10-04 21:30:00" ...
## $ WaitingQueue : num 175 228 25 30 260 38 120 160 120 130 ...
## $ WaitingMinutes: num 35 114 5 8 87 19 24 27 30 26 ...
## $ DeskCount : num 5 2 5 4 3 2 5 6 4 5 ...
## $ HourNumber : num 21.5 21.5 21.5 21.5 21.5 21.5 21.5 21.5 21.5 21.5 ...
station <- scrp %>% group_by(序號,Location,類別,DateTimeRound,HourNumber) %>%
summarise(
DeskCount.mean = mean(DeskCount, na.rm = TRUE),
DeskCount.median = median(DeskCount, na.rm = TRUE),
口採樣點.mean = mean(口採樣點, na.rm = TRUE),
鼻採樣點.mean = mean(鼻採樣點, na.rm = TRUE),
口採樣點.median = median(口採樣點, na.rm = TRUE),
鼻採樣點.median = median(鼻採樣點, na.rm = TRUE),
WaitingQueue.mean = mean(WaitingQueue, na.rm = TRUE),
WaitingMinutes.mean = mean(WaitingMinutes, na.rm = TRUE),
WaitingQueue.median = median(WaitingQueue, na.rm = TRUE),
WaitingMinutes.median = median(WaitingMinutes, na.rm = TRUE),
) %>%
ungroup() %>%
as.data.frame()
station <- station %>%
complete(nesting(Location,序號,類別),
DateTimeRound = seq.POSIXt(as.POSIXct("2021-10-05 06:00") + hms("08:00:00"),
as.POSIXct("2021-10-05 09:00") + hms("08:00:00"),
by="30 min")
) %>%
filter(!(Location %in% c("綜藝館(免費)", "綜藝館(自費)") &
DateTimeRound <= as.POSIXct("2021-10-05 09:00") + hms("08:00:00")
)) %>%
mutate(HourNumber = hour(DateTimeRound) + minute(DateTimeRound) / 60) %>%
arrange(Location,DateTimeRound) %>%
group_by(Location) %>%
fill(`DeskCount.mean`,`DeskCount.median`,
`口採樣點.mean`,`鼻採樣點.mean`,`口採樣點.median`,`鼻採樣點.median`,
`WaitingQueue.mean`,`WaitingMinutes.mean`,`WaitingQueue.median`,`WaitingMinutes.median`) %>%
mutate(DeskCount.ntile = ntile(DeskCount.mean, 5),
AvgDeskCount = median(DeskCount.mean)) %>%
ungroup()
LonLat <- unique(scrp[c("Location")])
LonLat$MapLoc <- ifelse(LonLat$Location == "科大體育館","Macao, 澳門科技大學室內體育館 Gymnasium",LonLat$Location)
LonLat[grep(".*工人體育場", LonLat$Location, perl=T), ]$MapLoc <- "Macao, 工人體育場館"
LonLat[grep(".*1樓", LonLat$Location, perl=T), ]$MapLoc <- "望廈體育中心 Centro Desportivo Mong-Há"
LonLat <- mutate_geocode(LonLat, MapLoc)
LonLat$area <- ifelse(LonLat$lat>=22.17,'Macao','Taipa')
str(LonLat)
## 'data.frame': 43 obs. of 5 variables:
## $ Location: chr "街坊會聯合總會社區服務大樓" "慈幼中學" "望廈體育中心三樓" "塔石體育館B館" ...
## $ MapLoc : chr "街坊會聯合總會社區服務大樓" "慈幼中學" "望廈體育中心三樓" "塔石體育館B館" ...
## $ lon : num 113.5 113.5 -77.5 113.5 113.6 ...
## $ lat : num 22.2 22.2 37.6 22.2 22.2 ...
## $ area : chr "Macao" "Macao" "Macao" "Macao" ...
ldf <- merge(LonLat, pdf)
mdf <- merge(station, pdf, by = c("Location","DateTimeRound","HourNumber"))
mdf <- mdf %>%
mutate(
SwabPerDesk = mdf$SwabCount / ifelse(is.na(mdf$DeskCount.mean) | mdf$DeskCount.mean == 0, 1,
mdf$DeskCount.mean),
SwabPerDesk.ntile = ntile(SwabPerDesk, 4),
MouthPerDesk = mdf$口咽拭 / ifelse(is.na(mdf$口採樣點.mean) | mdf$口採樣點.mean == 0 , 1,
mdf$口採樣點.mean),
NosePerDesk = mdf$鼻咽拭 / ifelse(is.na(mdf$鼻採樣點.mean) | mdf$鼻採樣點.mean == 0, 1,
mdf$鼻採樣點.mean)) %>%
group_by(Location) %>%
mutate(AvgSwabPerDesk = mean(SwabPerDesk)) %>%
ungroup()
lldf <- merge(LonLat, mdf)
sum(pdf$SwabCount,na.rm = TRUE)
## [1] 627865
options("scipen" = 100, "digits" = 4)
p1 <- df %>% group_by(variable) %>% summarise(value.sum = sum(value, na.rm = TRUE))
p2 <- ldf %>% group_by(area) %>% tally(SwabCount)
rng <- range(0, p1$value.sum, p2$n)
g1 <- ggplot(data=p1, aes(x = variable, y = value.sum, fill = variable)) +
geom_bar(stat="identity", alpha = .7) + coord_flip(ylim = rng) +
scale_color_viridis_d(option = 'magma') + scale_fill_viridis_d(option = 'magma') +
guides(alpha = FALSE) + theme_minimal() +
xlab("SWab Method") + ylab("Swab Counts")
g2 <- ggplot(data=p2, aes(x = reorder(area,n), y = n, fill = area)) +
geom_bar(stat="identity", alpha = .7) + coord_flip(ylim = rng) +
scale_color_viridis_d(option = 'magma') + scale_fill_viridis_d(option = 'magma') +
guides(alpha = FALSE) + theme_minimal() +
xlab("Macao / Cotai") + ylab("Swab Counts")
grid.arrange(g1, g2, ncol = 1, top = "Total Swabs by Method / Location Area")
tilevalue <- c(max(filter(mdf, SwabPerDesk.ntile == 1)$SwabPerDesk),
max(filter(mdf, SwabPerDesk.ntile == 2)$SwabPerDesk),
max(filter(mdf, SwabPerDesk.ntile == 3)$SwabPerDesk),
max(filter(mdf, SwabPerDesk.ntile == 4)$SwabPerDesk))
tilevalue
## [1] 26.88 33.50 42.17 122.38
fl <- as_labeller(
c(`1` = "below 27 swap/desk", `2` = "below 33 swap/desk",`3` = "below 42 swap/desk", `4` = "below 122 swap/desk"))
ggplot(mdf, aes(x = HourNumber, fill = factor(DurationDayNumber))) +
geom_histogram(binwidth = 1, alpha = .7) +
geom_hline(linetype = "dotted", yintercept = 32, color = "goldenrod") +
scale_fill_viridis_d(name = "Day", option = 'magma') +
facet_wrap(~SwabPerDesk.ntile, ncol = 2, labeller = fl) +
theme_minimal() +
xlab("24 Hours") + ylab("Counts") +
ggtitle("Swap per desk in 4-tiles, 24 hours")
p3 <- pdf %>% group_by(Location, DurationDayNumber) %>% tally(SwabCount)
ggplot(p3, aes(x = reorder(Location, n), y = n, fill = factor(DurationDayNumber))) +
geom_bar(stat = "identity", alpha = .7) + coord_flip() +
scale_fill_viridis_d(name = "Day", option = 'magma') +
guides(alpha = FALSE) + theme_minimal() +
ggtitle("Total swabs by location") + xlab("Location") + ylab("Swab Counts")
ggplot(mdf, aes(x = reorder(Location, AvgDeskCount), y = DeskCount.mean, fill = AvgDeskCount)) +
geom_boxplot(alpha = .7) +
coord_flip() +
scale_fill_viridis_c(option = 'magma', direction = -1) +
guides(fill = FALSE, alpha = FALSE) + theme_minimal() +
ggtitle("Number of swab desk by location") + xlab("Location") + ylab("Number of Swab Desks")
prdf <- mdf %>%
group_by(Location, DeskCount.ntile) %>%
summarise(SwabCountByDesk = sum(SwabCount, na.rm = TRUE)) %>%
ungroup() %>%
group_by(Location) %>%
mutate(prop = SwabCountByDesk / sum(SwabCountByDesk)) %>%
ungroup() %>%
select(-SwabCountByDesk) %>%
pivot_wider(
id_cols = Location,
names_from = DeskCount.ntile,
values_from = prop
) %>%
mutate(Location = fct_reorder(Location, `5`)) %>%
pivot_longer(
cols = -Location,
names_to = "DeskCount.ntile",
values_to = "prop"
)
## `summarise()` has grouped output by 'Location'. You can override using the `.groups` argument.
ggplot(prdf, aes(DeskCount.ntile, Location)) +
geom_tile(aes(fill = prop), alpha = .7) +
geom_text(aes(label = scales::percent(prop, accuracy = 1)), size = 3) +
scale_fill_viridis_c(option = 'magma', direction = -1) +
guides(fill = FALSE, alpha = FALSE) + theme_minimal() +
ggtitle("Number Of swabs in proportions by locations") + xlab("Number of Swab Desks in 5-tiles") + ylab("Location")
#unique(mdf[c("Location", "AvgSwabPerDesk")]) %>% arrange(-AvgSwabPerDesk)
pldf1 <- mdf[c("Location", "DateTimeRound", "DurationHour","口咽拭", "鼻咽拭")] %>%
pivot_longer(
cols = c("口咽拭", "鼻咽拭"),
names_to = "variable",
values_to = "Count"
)
pldf2 <- mdf[c("Location", "DateTimeRound", "DurationHour","口採樣點.mean","鼻採樣點.mean")] %>%
pivot_longer(
cols = c("口採樣點.mean","鼻採樣點.mean"),
names_to = "variable",
values_to = "Count"
)
pldf <- rbind(pldf1, pldf2) %>%
mutate(variable = case_when(variable == "口咽拭" ~ "M.Swab Sum",
variable == "口採樣點.mean" ~ "M.Swab Desk",
variable == "鼻咽拭" ~ "N.Swab Sum",
variable == "鼻採樣點.mean" ~ "N.Swab Desk",
TRUE ~ "N/A"
)) %>%
arrange(Location, DateTimeRound, DurationHour, variable) %>%
group_by(Location, variable) %>%
mutate(prop = Count / sum(Count, na.rm = TRUE)) %>%
ungroup()
s1 <- c("綜藝館(免費)","鏡平學校(中學部)","望廈體育中心1樓","工人體育場")
sh1 <- filter(pldf, Location %in% s1) %>%
ggplot(aes(DateTimeRound, variable)) +
geom_tile(aes(fill = prop), alpha = .8) +
scale_fill_viridis_c(option = 'magma', direction = -1) +
facet_wrap(~Location, ncol = 1) +
guides(fill = FALSE, alpha = FALSE) + theme_minimal() +
xlab("") + ylab("Number proportion")
sb1 <- filter(mdf, Location %in% s1) %>%
ggplot(aes(x = DateTimeRound, y = SwabPerDesk, fill = SwabPerDesk)) +
geom_bar(stat = "identity", alpha = .8) +
geom_hline(linetype = "dotted", aes(yintercept = AvgSwabPerDesk), color = "goldenrod") +
scale_fill_viridis_c(option = 'magma', direction = -1) +
facet_wrap(~Location, ncol = 1) +
guides(fill = FALSE, alpha = FALSE) + theme_minimal() +
xlab("") + ylab("Swabs per desk")
grid.arrange(sh1, sb1, nrow = 1, top = "Set of 4 locations from Macao area in 3 days")
s2 <- c("北安客運碼頭","澳門威尼斯人","澳門東亞運體育館A館","奧林匹克體育中心-運動場-室內體育館")
sh2 <- filter(pldf, Location %in% s2) %>%
ggplot(aes(DateTimeRound, variable)) +
geom_tile(aes(fill = prop), alpha = .8) +
scale_fill_viridis_c(option = 'magma', direction = -1) +
facet_wrap(~Location, ncol = 1) +
guides(fill = FALSE, alpha = FALSE) + theme_minimal() +
xlab("") + ylab("Number proportion")
sb2 <- filter(mdf, Location %in% s2) %>%
ggplot(aes(x = DateTimeRound, y = SwabPerDesk, fill = SwabPerDesk)) +
geom_bar(stat = "identity", alpha = .8) +
geom_hline(linetype = "dotted", aes(yintercept = AvgSwabPerDesk), color = "goldenrod") +
scale_fill_viridis_c(option = 'magma', direction = -1) +
facet_wrap(~Location, ncol = 1) +
guides(fill = FALSE, alpha = FALSE) + theme_minimal() +
xlab("") + ylab("Swabs per desk")
grid.arrange(sh2, sb2, nrow = 1, top = "Set of 4 locations from Cotai area in 3 days")
p4 <- lldf %>% filter(SwabPerDesk.ntile == 4)
plot <- ggmap(get_map(location = "taipa, macao", zoom = 12), darken = .5,
base_layer = ggplot(data = p4, aes(x = lon, y = lat, frame = DurationHour, ids = Location))) +
geom_point(data = p4, aes(color = SwabPerDesk, size = SwabPerDesk, alpha = .5)) +
scale_size(range = c(0, 12)) +
scale_color_viridis_c(option = "magma")
ggplotly(plot)
威尼斯人採樣站點第二日實景,較第一日為空閒
Venetian-image-tag
預設約20採樣點,於第二日中午調整至僅需開放 5-6 採樣點供基本均無需等待