In class Exercise

Q1

Find out the number of days you have spent at NCKU as a registered student or staff person.

Sys.Date() - as.Date('2018-09-01')
## Time difference of 630 days

Q2

Reproduce the plot of calls for police assistances around 24 hours in New York City using the data set here.

dta2 <- read.table("calls_nyc.csv", sep=",", header = T)
head(dta2)
##   Hour Calls
## 1  0.5  1080
## 2  1.5   910
## 3  2.5   770
## 4  3.5   780
## 5  4.5   380
## 6  5.5   390
library(ggplot2)
library(tidyverse)
dta2$Hour <- as.integer((dta2$Hour))
ggplot(data= dta2, aes(x= Hour, y= Calls))+
  geom_bar(stat = "identity", fill="cyan", color="gray", alpha=.2)+
  geom_abline(intercept = mean(dta2$Calls), slope=0, color="pink")+
  scale_x_time(breaks = 0:23 - .5,
               labels = c(0,  rep('', 4), 5, rep('', 4), 10, rep('', 4),
               15, rep('', 4), 20, rep('', 3)))+
  coord_polar(theta = 'x', start = -pi/24)+
  theme_bw()+
  theme(panel.grid.minor.x = element_blank(), #去除隔線
        panel.grid.major.x = element_blank())

Q3

Assume that a friend of yours will live to be 100 years old. Find out how often his or her birthday falls on each day of the week. Plot it.

bb<- as.Date("1950-01-01")
db<- as.Date("2050-01-01")
ndays<-as.numeric((difftime(db, bb, unit = "days"))/365)
ndays
## [1] 100.0685
nyear<- seq(from=bb, to= db, by="years")
t <- as.data.frame(table(weekdays(nyear)))
t
##   Var1 Freq
## 1 週二   15
## 2 週六   14
## 3 週日   15
## 4 週三   14
## 5 週四   14
## 6 週五   15
## 7 週一   14
ggplot(t,aes(Var1, Freq))+
  geom_bar(stat="identity")+
  xlab("Weekdays")+ylab("Times")

Q4

Reproduce the plot of fertility rate and college acceptance rate in Taiwan from 1981 to 2009 (dataset:birth_college.txt)

dta4 <- read.table("birth_college.txt", header = T)
dta4$Year <- 1981:2009
head(dta4)
##   Birth Entrance Year
## 1  23.0       NA 1981
## 2  22.1       NA 1982
## 3  20.6       NA 1983
## 4  19.6       NA 1984
## 5  18.0       NA 1985
## 6  15.9       NA 1986
str(dta4)
## 'data.frame':    29 obs. of  3 variables:
##  $ Birth   : num  23 22.1 20.6 19.6 18 15.9 16 17.2 15.7 16.6 ...
##  $ Entrance: int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Year    : int  1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 ...
par(mar=c(5,4,4,4))
with(dta4, plot(x=Year, y=Birth, type = "n", yaxt = 'n', #第1個圖
     ylim=c(0, 60), ylab = 'Birth rate (0.1%)'))
axis(2, at = seq(0, 60, by = 10), labels = seq(0, 60, by = 10))
abline(v = 1981:2009, lty = 2, lwd = .5, col = 'grey90')
with(dta4, points(x = Year, y = Birth, pch = 1))
par(new=TRUE)
with(dta4, plot(x=Year, y=Entrance, type = "n", yaxt = 'n', #第二個圖
     ylim=c(40,100), xlab = '', ylab = ''))
axis(1, at = seq(1980, 2010, by = 5), labels = seq(1980, 2010, by = 5)) #bottome axis
axis(4, at = seq(40, 100, by=10), labels = seq(40, 100, by=10))# right axis
with(dta4, points(x = Year, y = Entrance, pch = 19))
mtext("Acceptance rate (%)", side=4, line = 3)
legend(1980, 102, c('Birth', 'College'),
       pch = c(1, 19), cex = 1)

Exercise

Exercise 1

Use the dataset containing the average number of visitors (monthly) in New Zealand by country of residence to explore the seasonal patterns between the eight countries. Is there a hemisphere effect?

nz_dta <- read.table("nz_visitors.csv", sep = ",", header = T)
head(nz_dta)
##     Month Australia ChinaPRof Japan Korea Germany    UK Canada   USA  Total
## 1 1998M09     17244       748  6093   979    1320  5794    973  3837  57930
## 2 1998M10     18090       941  5039  1083    2459  7876   1418  6093  68203
## 3 1998M11     16750      1054  6112  1144    5195 13362   2236  8468  84370
## 4 1998M12     25909      1270  6670  1836    5499 20238   2935  7865 113853
## 5 1999M01     27228      1375  6008  2716    6430 22557   3623 10007 122130
## 6 1999M02     19461      1660  7478  2245    7320 27477   4394 12533 124305
nz_dta<- nz_dta %>% mutate(Year= substr(nz_dta$Month, 1, 4),
                  Months = substr(nz_dta$Month, 6, 7))%>% 
                select(-Month)
head(nz_dta)
##   Australia ChinaPRof Japan Korea Germany    UK Canada   USA  Total Year Months
## 1     17244       748  6093   979    1320  5794    973  3837  57930 1998     09
## 2     18090       941  5039  1083    2459  7876   1418  6093  68203 1998     10
## 3     16750      1054  6112  1144    5195 13362   2236  8468  84370 1998     11
## 4     25909      1270  6670  1836    5499 20238   2935  7865 113853 1998     12
## 5     27228      1375  6008  2716    6430 22557   3623 10007 122130 1999     01
## 6     19461      1660  7478  2245    7320 27477   4394 12533 124305 1999     02
nz_dtal<- nz_dta %>% select(-Total) %>% 
  reshape2::melt(., id.var=c("Year", "Months"),
                 measurevar="Australia":"USA",
                 variable.name="Country",
                 value.name="Visitors")  

ggplot(nz_dtal,aes(x=Year, y=Visitors,group=Country, color= Country))+
    geom_point(size=.3)+
    geom_line()+
    facet_grid(.~Months)

南半球的國家(Australia)到紐西蘭遊玩的遊客數不論在哪個月份都逐年增加

ggplot(nz_dtal,aes(x=Year, y=Visitors,color= Country))+
    geom_point(size=.3)+
    geom_line()+
    facet_grid(.~Country)

總量來看Australia和UK到紐西蘭的遊客數最多,但UK的人數有下滑的趨勢

Exercise 2

Use the sample data set to estimate the mean life expectancy of Nobel prize winners.

library(lubridate)
dta2 <- read.table("nobel_lspan.txt", header = T) %>%
  mutate(born=mdy(Born), 
        died=mdy(Died), 
        span=died %--% born) 
head(dta2)
##                 ID              Born               Died       born       died
## 1   David H. Hubel February 27, 1926 September 22, 2013 1926-02-07 2013-02-02
## 2 Herbert A. Simon     June 15, 1916   February 9, 2001 1916-01-05 2001-09-20
## 3  Ronald H. Coase December 29, 1910  September 2, 2013 1910-02-09 2013-02-20
## 4     Bernard Katz    March 26, 1911     April 20, 2003 1911-02-06 2003-04-20
## 5     P.A.M. Dirac    August 8, 1902   October 20, 1984 2002-08-19 1984-10-20
## 6  Richard Feynman      May 11, 1918  February 15, 1988 1918-01-01 1988-01-05
##                             span
## 1 2013-02-02 UTC--1926-02-07 UTC
## 2 2001-09-20 UTC--1916-01-05 UTC
## 3 2013-02-20 UTC--1910-02-09 UTC
## 4 2003-04-20 UTC--1911-02-06 UTC
## 5 1984-10-20 UTC--2002-08-19 UTC
## 6 1988-01-05 UTC--1918-01-01 UTC
ggplot(dta2, aes(reorder(ID, died),  
                x=born, 
                xend=died)) +
 ggalt::geom_dumbbell(size=rel(1.1), 
               colour="gold", 
               colour_x="goldenrod",
               colour_xend="goldenrod", 
               dot_guide=TRUE, 
               dot_guide_size=0.2) +
 labs(x="Year", y=NULL) + 
 theme_classic()  ##得到錯誤的圖(因為出現未來的時間)

Exercise 4

The following rather awful plot is shown on a web page hosted by the Taiwanese Ministry of Education.Revise it so that it is a proper time series plot.

dta<- read.table("tw_to_us.txt", header = F)
head(dta)
##      V1
## 1  3637
## 2  2553
## 3  4564
## 4  6780
## 5 12029
## 6 12250
dta1<- read.table("foreign_students_us.txt", header = T)
head(dta1)
##   Year Country Number
## 1 2012      TW  21867
## 2 2013      TW  21266
## 3 2014      TW  20993
## 4 2015      TW  21127
## 5 2016      TW  21516
## 6 2017      TW  22454
dta$Year <- c(seq(1950,1990,5), seq(1991,2004,1))
dta$Country <-rep("TW")
colnames(dta)[1]<-"Number"
head(dta)
##   Number Year Country
## 1   3637 1950      TW
## 2   2553 1955      TW
## 3   4564 1960      TW
## 4   6780 1965      TW
## 5  12029 1970      TW
## 6  12250 1975      TW
dta<- rbind(dta, dta1)
tail(dta)
##    Number Year Country
## 39 274439 2013      CN
## 40 304040 2014      CN
## 41 328547 2015      CN
## 42 350755 2016      CN
## 43 363341 2017      CN
## 44 369548 2018      CN
library(ggplot2)
dta %>% subset(Country =="TW") %>% 
  ggplot(., aes(x= Year, y= Number))+
  geom_point()+
  scale_x_continuous(limits=c(1950, 2018),
                     breaks = seq(1950, 2020, 5))+
  geom_smooth(method = "loess", se = F, col = "gray")+
  ggtitle("Taiwanese Students in the U.S.A (1950-2018)")