In-Class Exercise 1: ROC (Taiwan) Students in the U.S.A (1950-2004)
## ─ Attaching packages ────────────────────────── tidyverse 1.3.0 ─
## ✓ tibble 3.0.1 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.3 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ─ Conflicts ─────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:dplyr':
##
## intersect, setdiff, union
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Load data file
dta <- read.table("tw_to_us.txt")
names(dta) <- "Values"
dta$Year <- c(seq(1950,1990,5),seq(1991,2004,1))
head(dta)## Values Year
## 1 3637 1950
## 2 2553 1955
## 3 4564 1960
## 4 6780 1965
## 5 12029 1970
## 6 12250 1975
Plot
ggplot(data=dta, aes(x=Year, y=Values))+
geom_point()+
geom_smooth(method = "loess", se = FALSE, col = "blue")+
theme_minimal()+
ggtitle("ROC (Taiwan) Students in the U.S.A (1950-2004)")+
scale_x_continuous(limits=c(1950, 2004), breaks=seq(1950,2005,5))+
scale_y_continuous(limits=c(0, 40000), breaks=seq(0, 40000, 5000))## `geom_smooth()` using formula 'y ~ x'
Exercise 1: Find out how often your birthday falls on each day of the week.
birthday <- as.Date("1998/1/7")
deathday <- as.Date("2098/1/7")
as.numeric(difftime(deathday, birthday, unit = "days")) / 365## [1] 100.0685
t <- seq(from = birthday, to = deathday, by = "years")
plot(factor(weekdays(t),
levels = c("週日", "週一", "週二", "週三", "週四", "週五", "週六"),
labels = c("Sun", "Mon", "Tue", "Wen", "Thu", "Fri", "Sat")))Exercise 2: Find out the number of days you have spent at NCKU as a registered student.
## Time difference of 1354 days
I have spent 1354 days at NCKU.
Exercise 3: The plot of calls for police assistances around 24 hours in NY.
Load data file
## Hour Calls
## 1 0.5 1080
## 2 1.5 910
## 3 2.5 770
## 4 3.5 780
## 5 4.5 380
## 6 5.5 390
Reproduce the plot
ggplot(dta, aes(Hour, Calls)) +
geom_bar(width=1, fill="cyan", col="gray", alpha=0.4, stat = "identity") +
geom_abline(intercept = mean(dta$Calls), slope = 0, col = 'pink') +
coord_polar(theta = 'x', start = pi/12) +
theme_bw() +
theme(panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank())Exercise 4: The mean life expectancy of Nobel prize winners
## ID Born Died
## 1 David H. Hubel February 27, 1926 September 22, 2013
## 2 Herbert A. Simon June 15, 1916 February 9, 2001
## 3 Ronald H. Coase December 29, 1910 September 2, 2013
## 4 Bernard Katz March 26, 1911 April 20, 2003
## 5 P.A.M. Dirac August 8, 1902 October 20, 1984
## 6 Richard Feynman May 11, 1918 February 15, 1988
dta_n <- dta %>% mutate(born=mdy(Born),
died=mdy(Died),
span=born %--% died ,
y=as.duration(span) / dyears(1))
head(dta_n)## ID Born Died born died
## 1 David H. Hubel February 27, 1926 September 22, 2013 1926-02-07 2013-02-02
## 2 Herbert A. Simon June 15, 1916 February 9, 2001 1916-01-05 2001-09-20
## 3 Ronald H. Coase December 29, 1910 September 2, 2013 1910-02-09 2013-02-20
## 4 Bernard Katz March 26, 1911 April 20, 2003 1911-02-06 2003-04-20
## 5 P.A.M. Dirac August 8, 1902 October 20, 1984 2002-08-19 1984-10-20
## 6 Richard Feynman May 11, 1918 February 15, 1988 1918-01-01 1988-01-05
## span y
## 1 1926-02-07 UTC--2013-02-02 UTC 86.98700
## 2 1916-01-05 UTC--2001-09-20 UTC 85.70842
## 3 1910-02-09 UTC--2013-02-20 UTC 103.03080
## 4 1911-02-06 UTC--2003-04-20 UTC 92.19986
## 5 2002-08-19 UTC--1984-10-20 UTC -17.82888
## 6 1918-01-01 UTC--1988-01-05 UTC 70.00958
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
ggplot(dta_n, aes(reorder(ID, died), x=born, xend=died)) +
geom_dumbbell(size=rel(1.1),
colour="gold",
colour_x="goldenrod",
colour_xend="goldenrod",
dot_guide=TRUE,
dot_guide_size=0.2) +
labs(x="Year", y=NULL) +
theme_ipsum()Exercise 5: Estimate the mean profit per day of service in Taiwan
Load data file
## Expense Arrival Depature
## 1 15393 2015/2/16 2015/2/17
## 2 27616 2015/3/6 2015/3/11
## 3 8876 2015/2/14 2015/2/17
## 4 57378 2015/1/30 2015/2/9
## 5 32613 2015/1/31 2015/2/6
## 6 46998 2014/12/27 2014/12/31
dta %>% mutate(Arrival = as.Date(Arrival),
Depature = as.Date(Depature),
Stay = as.numeric(difftime(Depature, Arrival, unit="days"))+1) %>%
summarize(TotalProfit = sum(Expense), stayDays = sum(Stay), MeanProfit = sum(Expense)/sum(Stay))## TotalProfit stayDays MeanProfit
## 1 3722133 674 5522.453
The mean profit per day of service in Taiwan is 5522.453.
Exercise 6: The average monthly rainfall at Chia-Yi county from 1969 to 2010
Load data file
## Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1 1969 6.1 52.8 66.1 46.9 179.1 452.7 187.4 192.5 220.9 6.4 2 0.6
## 2 1970 36 1.4 60.6 1.0 194.0 149.4 176.7 336.1 403.6 21.4 26.1 16.7
## 3 1971 17 19.3 3.4 26.4 11.9 554.6 200.7 260.7 285.0 15.7 4.2 52.7
## 4 1972 76.8 45.6 . 153.5 358.2 713.8 502.9 609.9 22.1 1.5 8.5 62.1
## 5 1973 31.4 21.6 38.4 225.6 147.2 329.4 322.9 587.6 124.7 49.3 6.1 .
## 6 1974 6.3 72.9 14.5 146.8 128.7 680.1 227.2 162.3 201.0 66.1 1.3 9.8
## Warning in is.data.frame(data): 強制變更過程中產生了 NA
quart <- (cycle(dta_ts) - 1) %/% 3
monthplot(dta_ts,
phase=quart,
xlab="Season",
ylab="Average monthly rainfall (mm)")
grid()Exercise 7
dta <- acs::cpi
dta <- as.data.frame(dta)
dta$Year <- row.names(dta)
dta$Value <- (dta$dta/20)
head(dta)## dta Year Value
## 1913 9.9 1913 0.495
## 1914 10.0 1914 0.500
## 1915 10.1 1915 0.505
## 1916 10.9 1916 0.545
## 1917 12.8 1917 0.640
## 1918 15.1 1918 0.755
Reproduce the plot
ggplot(data = dta, aes(as.numeric(Year), Value)) +
geom_line() +
geom_point(pch=16) +
scale_x_continuous(limits=c(1910, 2015),
breaks=seq(1915, 2015, 10))+
labs(x="Year",
y="Annual Change in CPI") +
theme_minimal() ## It seems that the data set is different from the raw figure