Find out the number of days you have spent at NCKU as a registered student or staff person.
大學部:2014/09 入學,2019/08/21 畢業離校
博士班:2019/09 註冊入學 至今
NCKU_start <- as.Date("2014/09/01")
NCKU_end <- Sys.Date()
在 NCKU 的日子,扣掉學籍切換的間隔期,總天數為:
as.numeric(difftime(NCKU_end, NCKU_start, unit="days"))-as.numeric(difftime("2019/09/01", "2019/08/21", unit="days"))
## [1] 2082
Reproduce the plot of calls for police assistances around 24 hours in New York City using the data set here.
dta2 <- read.csv("C:/Users/TheorEco Lab/Desktop/108-2/DataManagement/0518/calls_nyc.csv", header = TRUE)
str(dta2)
## 'data.frame': 24 obs. of 2 variables:
## $ Hour : num 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 ...
## $ Calls: int 1080 910 770 780 380 390 200 300 275 395 ...
dta2$Hour <- factor(dta2$Hour, levels = dta2$Hour)
#install.packages("hrbrthemes")
library(ggplot2)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
ggplot(dta2, aes(Hour, Calls, group=1)) +
geom_bar(width=1,
stat = "identity",
fill="cyan",
col="gray",
alpha=0.2) +
geom_abline(intercept=mean(dta2$Calls),
slope=0,
col="pink") +
coord_polar(theta="x") +
theme_ipsum()
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
Assume that a friend of yours will live to be 100 years old. Find out how often his or her birthday falls on each day of the week. Plot it.
假設這位朋友的生日為 1990/02/07,將會活到 100 歲,也就是 2090/02/07。
b_HCY <- as.Date("1990/02/07")
d_HCY <- as.Date("2090/02/07")
bDays <- seq(from = b_HCY, to = d_HCY, by="years")
wd <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
sum(weekdays(bDays) %in% wd)
## [1] 0
檢查 weekdays function 的結果
weekdays(bDays)
## [1] "星期三" "星期四" "星期五" "星期日" "星期一" "星期二" "星期三" "星期五"
## [9] "星期六" "星期日" "星期一" "星期三" "星期四" "星期五" "星期六" "星期一"
## [17] "星期二" "星期三" "星期四" "星期六" "星期日" "星期一" "星期二" "星期四"
## [25] "星期五" "星期六" "星期日" "星期二" "星期三" "星期四" "星期五" "星期日"
## [33] "星期一" "星期二" "星期三" "星期五" "星期六" "星期日" "星期一" "星期三"
## [41] "星期四" "星期五" "星期六" "星期一" "星期二" "星期三" "星期四" "星期六"
## [49] "星期日" "星期一" "星期二" "星期四" "星期五" "星期六" "星期日" "星期二"
## [57] "星期三" "星期四" "星期五" "星期日" "星期一" "星期二" "星期三" "星期五"
## [65] "星期六" "星期日" "星期一" "星期三" "星期四" "星期五" "星期六" "星期一"
## [73] "星期二" "星期三" "星期四" "星期六" "星期日" "星期一" "星期二" "星期四"
## [81] "星期五" "星期六" "星期日" "星期二" "星期三" "星期四" "星期五" "星期日"
## [89] "星期一" "星期二" "星期三" "星期五" "星期六" "星期日" "星期一" "星期三"
## [97] "星期四" "星期五" "星期六" "星期一" "星期二"
修正篩選字串為中文
wd <- c("星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日")
HCY_l <- lapply(wd, function(x) sum(weekdays(bDays) %in% x))
HCY_df <- data.frame(Weekdays = wd,
Number = matrix(unlist(HCY_l), nrow=length(HCY_l), byrow=T))
HCY_df
## Weekdays Number
## 1 星期一 15
## 2 星期二 14
## 3 星期三 15
## 4 星期四 14
## 5 星期五 15
## 6 星期六 14
## 7 星期日 14
p3 <- ggplot(data = HCY_df, aes(x = Weekdays, y = Number)) +
geom_bar(stat = "identity")+
xlab("星期") +
ylab("次數")
p3
Reproduce the plot of fertility rate and college acceptance rate in Taiwan from 1981 to 2009 using the data set here.
dta4 <- read.table("C:/Users/TheorEco Lab/Desktop/108-2/DataManagement/0518/birth_college.txt", header = TRUE)
head(dta4)
## Birth Entrance
## 1 23.0 NA
## 2 22.1 NA
## 3 20.6 NA
## 4 19.6 NA
## 5 18.0 NA
## 6 15.9 NA
dta4$year <- c(1981:2009)
head(dta4)
## Birth Entrance year
## 1 23.0 NA 1981
## 2 22.1 NA 1982
## 3 20.6 NA 1983
## 4 19.6 NA 1984
## 5 18.0 NA 1985
## 6 15.9 NA 1986
par(mar = c(5, 5, 2, 5))
plot(dta4$year, dta4$Birth, ylim = c(0, 60),
xlab = "Year", ylab = "Birth rate (0.1%)")
par(new = TRUE)
plot(dta4$year, dta4$Entrance, ylim = c(40, 100), pch = 16,
xaxt = "n", yaxt = "n", xlab = "", ylab = "") # xaxt = "n", yaxt = "n" 用來移除其預設在左側的y軸座標
axis(4, ylim = c(40,100)) # 加上一 y 軸,2 是左邊, 4 是右邊
mtext("Acceptance rate (%)", side = 4, line = 3) # line = 3 用來調整右側 y 軸名稱的位置,數字愈小愈靠近座標軸
legend("topleft", c("Birth", "College"), pch = c(1, 16)) # 圖例
grid(nx = 30, ny = 0, col = "lightgray", lty = "dotted") # 背景格線
Use the dataset containing the average number of visitors (monthly) in New Zealand by country of residence to explore the seasonal patterns between the eight countries. Is there a hemisphere effect?
dtahw1 <- read.csv("C:/Users/TheorEco Lab/Desktop/108-2/DataManagement/0518/nz_visitors.csv", header = TRUE)
head(dtahw1)
## Month Australia ChinaPRof Japan Korea Germany UK Canada USA Total
## 1 1998M09 17244 748 6093 979 1320 5794 973 3837 57930
## 2 1998M10 18090 941 5039 1083 2459 7876 1418 6093 68203
## 3 1998M11 16750 1054 6112 1144 5195 13362 2236 8468 84370
## 4 1998M12 25909 1270 6670 1836 5499 20238 2935 7865 113853
## 5 1999M01 27228 1375 6008 2716 6430 22557 3623 10007 122130
## 6 1999M02 19461 1660 7478 2245 7320 27477 4394 12533 124305
dtahw1 <- dtahw1 %>%
dplyr::mutate(Year = as.integer(substr(Month, 1, 4)),
Month = as.integer(substr(Month, 6, 7)),
Season = (Month - 1) %/% 3 + 1)
head(dtahw1)
## Month Australia ChinaPRof Japan Korea Germany UK Canada USA Total Year
## 1 9 17244 748 6093 979 1320 5794 973 3837 57930 1998
## 2 10 18090 941 5039 1083 2459 7876 1418 6093 68203 1998
## 3 11 16750 1054 6112 1144 5195 13362 2236 8468 84370 1998
## 4 12 25909 1270 6670 1836 5499 20238 2935 7865 113853 1998
## 5 1 27228 1375 6008 2716 6430 22557 3623 10007 122130 1999
## 6 2 19461 1660 7478 2245 7320 27477 4394 12533 124305 1999
## Season
## 1 3
## 2 4
## 3 4
## 4 4
## 5 1
## 6 1
dtahw1 <- dtahw1 %>%
tidyr::gather(key = "Country", value = "Visitors", 2:9)
head(dtahw1)
## Month Total Year Season Country Visitors
## 1 9 57930 1998 3 Australia 17244
## 2 10 68203 1998 4 Australia 18090
## 3 11 84370 1998 4 Australia 16750
## 4 12 113853 1998 4 Australia 25909
## 5 1 122130 1999 1 Australia 27228
## 6 2 124305 1999 1 Australia 19461
dtahw1 <- dtahw1 %>%
dplyr::group_by(Country, Month) %>%
dplyr::summarize(Visitor = mean(Visitors))
head(dtahw1)
## # A tibble: 6 x 3
## # Groups: Country [1]
## Country Month Visitor
## <chr> <int> <dbl>
## 1 Australia 1 44729.
## 2 Australia 2 30123.
## 3 Australia 3 29966.
## 4 Australia 4 29149.
## 5 Australia 5 18887.
## 6 Australia 6 17624.
ggplot()+
# 春天
geom_rect(data=data.frame(xstart=2.5, xend=5.5),
aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf),
fill='pink',
alpha=.2) +
# 夏天
geom_rect(data=data.frame(xstart=5.5, xend=8.5),
aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf),
fill='green',
alpha=.2) +
# 秋天
geom_rect(data=data.frame(xstart=8.5, xend=11.5),
aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf),
fill='orange',
alpha=.2) +
geom_line(data = dtahw1, aes(x = Month, y = Visitor, color = Country))+
scale_x_continuous(limits = c(1, 12), breaks = c(1, 4, 8, 12))+
theme_classic()
結果顯示,各國旅客至紐西蘭旅遊,都是秋冬較多,春下較少。位於北半球之國家可推測是前往紐西蘭感受相對季節,但同樣位於南半球的澳洲,可能是因為相較而言,紐西蘭的四季較澳洲更為分明。
Use the sample data set to estimate the mean life expectancy of Nobel prize winners.
dtahw2 <- read.table("C:/Users/TheorEco Lab/Desktop/108-2/DataManagement/0518/nobel_lspan.txt", header = TRUE)
str(dtahw2)
## 'data.frame': 21 obs. of 3 variables:
## $ ID : Factor w/ 21 levels "Bernard Katz",..: 3 8 17 1 13 15 7 14 2 16 ...
## $ Born: Factor w/ 21 levels "August 8, 1902",..: 5 9 2 12 1 14 6 10 16 3 ...
## $ Died: Factor w/ 21 levels "April 16, 1972",..: 21 8 20 2 19 6 5 16 7 4 ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(ggalt)
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library(tidyr)
library(hrbrthemes)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## √ tibble 3.0.0 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## √ purrr 0.3.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lattice)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
dtahw2$born <- mdy(dtahw2$Born)
dtahw2$died <- mdy(dtahw2$Died)
dtahw2$span <- dtahw2$died %--% dtahw2$born
head(dtahw2)
## ID Born Died born died
## 1 David H. Hubel February 27, 1926 September 22, 2013 1926-02-27 2013-09-22
## 2 Herbert A. Simon June 15, 1916 February 9, 2001 1916-06-15 2001-02-09
## 3 Ronald H. Coase December 29, 1910 September 2, 2013 1910-12-29 2013-09-02
## 4 Bernard Katz March 26, 1911 April 20, 2003 1911-03-26 2003-04-20
## 5 P.A.M. Dirac August 8, 1902 October 20, 1984 1902-08-08 1984-10-20
## 6 Richard Feynman May 11, 1918 February 15, 1988 1918-05-11 1988-02-15
## span
## 1 2013-09-22 UTC--1926-02-27 UTC
## 2 2001-02-09 UTC--1916-06-15 UTC
## 3 2013-09-02 UTC--1910-12-29 UTC
## 4 2003-04-20 UTC--1911-03-26 UTC
## 5 1984-10-20 UTC--1902-08-08 UTC
## 6 1988-02-15 UTC--1918-05-11 UTC
ggplot(dtahw2, aes(reorder(ID, died), x = born, xend = died)) +
geom_dumbbell(size=rel(1.1),
colour="gold",
colour_x="goldenrod",
colour_xend="goldenrod",
dot_guide=TRUE,
dot_guide_size=0.2) +
labs(x="Year", y=NULL) +
theme_ipsum()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
dtahw2$age <- as.numeric(difftime(dtahw2$died, dtahw2$born, unit="days")) / 365
mean(dtahw2$age)
## [1] 82.39517
列表中的諾貝爾獎得主平均壽命為 82.4 歲。
Use the following sample of records for profit made, arrival date, and departure date of group travel booked at a travel agency in Taiwan to estimate the mean profit per day of service.
dtahw3 <- read.csv("C:/Users/TheorEco Lab/Desktop/108-2/DataManagement/0518/Visit_TW.csv", header =TRUE )
str(dtahw3)
## 'data.frame': 96 obs. of 3 variables:
## $ Expense : int 15393 27616 8876 57378 32613 46998 10744 3269 16195 55842 ...
## $ Arrival : Factor w/ 83 levels "2014/10/10","2014/10/13",..: 79 83 78 74 75 31 31 26 29 77 ...
## $ Depature: Factor w/ 79 levels "2014/10/1","2014/10/13",..: 73 77 73 76 75 32 69 26 28 73 ...
dtahw3$Arrival <- as.Date(dtahw3$Arrival)
dtahw3$Depature <- as.Date(dtahw3$Depature)
dtahw3$duration <- as.numeric(difftime(dtahw3$Depature, dtahw3$Arrival, unit="days"))+1
str(dtahw3)
## 'data.frame': 96 obs. of 4 variables:
## $ Expense : int 15393 27616 8876 57378 32613 46998 10744 3269 16195 55842 ...
## $ Arrival : Date, format: "2015-02-16" "2015-03-06" ...
## $ Depature: Date, format: "2015-02-17" "2015-03-11" ...
## $ duration: num 2 6 4 11 7 5 9 6 4 7 ...
profit <- data.frame(All_profit <- sum(dtahw3$Expense),
Total_stay_duration <- sum(dtahw3$duration),
Mean_profit <- All_profit/Total_stay_duration)
names(profit) <- c("All profit", "Total stay duration", "Mean profit")
knitr::kable(profit)
| All profit | Total stay duration | Mean profit |
|---|---|---|
| 3722133 | 674 | 5522.453 |
The following rather awful plot is shown on a web page hosted by the Taiwanese Ministry of Education
Revise it so that it is a proper time series plot. For your convenience, the data points have been extracted and saved here . What had happened in the early 1990’s and how do we know if the trend reversal is real? You may want to augment the data set with further data points from 2012 to 2018 available in the foreign students in the U.S. data file.
dtahw4 <- read.table("C:/Users/TheorEco Lab/Desktop/108-2/DataManagement/0518/tw_to_us.txt", header =FALSE)
str(dtahw4)
## 'data.frame': 23 obs. of 1 variable:
## $ V1: int 3637 2553 4564 6780 12029 12250 17560 22590 30960 33530 ...
names(dtahw4) <- "Number"
str(dtahw4)
## 'data.frame': 23 obs. of 1 variable:
## $ Number: int 3637 2553 4564 6780 12029 12250 17560 22590 30960 33530 ...
dtahw4$year <- c(seq(1950, 1990, 5), seq(1991, 2004, 1))
str(dtahw4)
## 'data.frame': 23 obs. of 2 variables:
## $ Number: int 3637 2553 4564 6780 12029 12250 17560 22590 30960 33530 ...
## $ year : num 1950 1955 1960 1965 1970 ...
dtahw4_us <- read.table("C:/Users/TheorEco Lab/Desktop/108-2/DataManagement/0518/foreign_students_us.txt", header =TRUE)
str(dtahw4_us)
## 'data.frame': 21 obs. of 3 variables:
## $ Year : int 2012 2013 2014 2015 2016 2017 2018 2012 2013 2014 ...
## $ Country: Factor w/ 3 levels "CN","JP","TW": 3 3 3 3 3 3 3 2 2 2 ...
## $ Number : int 21867 21266 20993 21127 21516 22454 23369 19568 19334 19064 ...
dtahw4_us_tw <- subset(dtahw4_us, Country == "TW")
dtahw4_us_tw
## Year Country Number
## 1 2012 TW 21867
## 2 2013 TW 21266
## 3 2014 TW 20993
## 4 2015 TW 21127
## 5 2016 TW 21516
## 6 2017 TW 22454
## 7 2018 TW 23369
ggplot() +
geom_line(data=dtahw4, aes(year, Number)) +
geom_line(data=dtahw4_us_tw, aes(Year, Number),
linetype="dotted") +
geom_point(data=dtahw4,
aes(year, Number),
pch=16) +
geom_point(data=dtahw4_us_tw,
aes(Year, Number),
pch=1) +
geom_segment(aes(x = 2004, xend = 2012,
y = 26178, yend = 21867), col = "blue")+
annotate("text", x=1965, y=20000, label="from TW government") +
annotate("text", x=2013, y=19000, label="from US") +
scale_x_continuous(limits=c(1945, 2020),
breaks=seq(1950, 2020, 5))+
labs(x="Year",
y="Number of students in U.S.A.") +
theme_ipsum()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
由圖可以看出,前往美國求學的台灣學生人數在 1950 到 1995 年左右是持續上升,之後便逐漸減少;加上美國所公布的 2012 至 2018 臺灣留學生人數,可以看出逐漸減少的趨勢一直持續到近 2015,近年來有漸增的趨向。
How different groups spend their day is an article published in The New York Times using the data collected from The American Time Use Survey. Discuss what we need to have in order to replicate this piece of graphical journalism in Taiwan.
根據該調查結果(如下圖),若我們要在臺灣複製此調查,需要收集以下資料:
基本資料:包括(但不限)性別、工作狀態、族群身分(e.g. 原住民與否)、年齡、教育水準、養育子女人數等
生活活動:包括(但不限)學習、工作、家務活動、旅行、運動、視聽娛樂、社交活動、睡眠等