Find out the number of days you have spent at NCKU as a registered student or staff person.
Sys.Date() - as.Date('2018-09-01')
## Time difference of 630 days
Reproduce the plot of calls for police assistances around 24 hours in New York City using the data set here.
dta2 <- read.table("calls_nyc.csv", sep=",", header = T)
head(dta2)
## Hour Calls
## 1 0.5 1080
## 2 1.5 910
## 3 2.5 770
## 4 3.5 780
## 5 4.5 380
## 6 5.5 390
library(ggplot2)
library(tidyverse)
dta2$Hour <- as.integer((dta2$Hour))
ggplot(data= dta2, aes(x= Hour, y= Calls))+
geom_bar(stat = "identity", fill="cyan", color="gray", alpha=.2)+
geom_abline(intercept = mean(dta2$Calls), slope=0, color="pink")+
scale_x_time(breaks = 0:23 - .5,
labels = c(0, rep('', 4), 5, rep('', 4), 10, rep('', 4),
15, rep('', 4), 20, rep('', 3)))+
coord_polar(theta = 'x', start = -pi/24)+
theme_bw()+
theme(panel.grid.minor.x = element_blank(), #去除隔線
panel.grid.major.x = element_blank())
Assume that a friend of yours will live to be 100 years old. Find out how often his or her birthday falls on each day of the week. Plot it.
bb<- as.Date("1950-01-01")
db<- as.Date("2050-01-01")
ndays<-as.numeric((difftime(db, bb, unit = "days"))/365)
ndays
## [1] 100.0685
nyear<- seq(from=bb, to= db, by="years")
t <- as.data.frame(table(weekdays(nyear)))
t
## Var1 Freq
## 1 週二 15
## 2 週六 14
## 3 週日 15
## 4 週三 14
## 5 週四 14
## 6 週五 15
## 7 週一 14
ggplot(t,aes(Var1, Freq))+
geom_bar(stat="identity")+
xlab("Weekdays")+ylab("Times")
Reproduce the plot of fertility rate and college acceptance rate in Taiwan from 1981 to 2009 (dataset:birth_college.txt)
dta4 <- read.table("birth_college.txt", header = T)
dta4$Year <- 1981:2009
head(dta4)
## Birth Entrance Year
## 1 23.0 NA 1981
## 2 22.1 NA 1982
## 3 20.6 NA 1983
## 4 19.6 NA 1984
## 5 18.0 NA 1985
## 6 15.9 NA 1986
str(dta4)
## 'data.frame': 29 obs. of 3 variables:
## $ Birth : num 23 22.1 20.6 19.6 18 15.9 16 17.2 15.7 16.6 ...
## $ Entrance: int NA NA NA NA NA NA NA NA NA NA ...
## $ Year : int 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 ...
par(mar=c(5,4,4,4))
with(dta4, plot(x=Year, y=Birth, type = "n", yaxt = 'n', #第1個圖
ylim=c(0, 60), ylab = 'Birth rate (0.1%)'))
axis(2, at = seq(0, 60, by = 10), labels = seq(0, 60, by = 10))
abline(v = 1981:2009, lty = 2, lwd = .5, col = 'grey90')
with(dta4, points(x = Year, y = Birth, pch = 1))
par(new=TRUE)
with(dta4, plot(x=Year, y=Entrance, type = "n", yaxt = 'n', #第二個圖
ylim=c(40,100), xlab = '', ylab = ''))
axis(1, at = seq(1980, 2010, by = 5), labels = seq(1980, 2010, by = 5)) #bottome axis
axis(4, at = seq(40, 100, by=10), labels = seq(40, 100, by=10))# right axis
with(dta4, points(x = Year, y = Entrance, pch = 19))
mtext("Acceptance rate (%)", side=4, line = 3)
legend(1980, 102, c('Birth', 'College'),
pch = c(1, 19), cex = 1)
Use the dataset containing the average number of visitors (monthly) in New Zealand by country of residence to explore the seasonal patterns between the eight countries. Is there a hemisphere effect?
nz_dta <- read.table("nz_visitors.csv", sep = ",", header = T)
head(nz_dta)
## Month Australia ChinaPRof Japan Korea Germany UK Canada USA Total
## 1 1998M09 17244 748 6093 979 1320 5794 973 3837 57930
## 2 1998M10 18090 941 5039 1083 2459 7876 1418 6093 68203
## 3 1998M11 16750 1054 6112 1144 5195 13362 2236 8468 84370
## 4 1998M12 25909 1270 6670 1836 5499 20238 2935 7865 113853
## 5 1999M01 27228 1375 6008 2716 6430 22557 3623 10007 122130
## 6 1999M02 19461 1660 7478 2245 7320 27477 4394 12533 124305
nz_dta<- nz_dta %>% mutate(Year= substr(nz_dta$Month, 1, 4),
Months = substr(nz_dta$Month, 6, 7))%>%
select(-Month)
head(nz_dta)
## Australia ChinaPRof Japan Korea Germany UK Canada USA Total Year Months
## 1 17244 748 6093 979 1320 5794 973 3837 57930 1998 09
## 2 18090 941 5039 1083 2459 7876 1418 6093 68203 1998 10
## 3 16750 1054 6112 1144 5195 13362 2236 8468 84370 1998 11
## 4 25909 1270 6670 1836 5499 20238 2935 7865 113853 1998 12
## 5 27228 1375 6008 2716 6430 22557 3623 10007 122130 1999 01
## 6 19461 1660 7478 2245 7320 27477 4394 12533 124305 1999 02
nz_dtal<- nz_dta %>% select(-Total) %>%
reshape2::melt(., id.var=c("Year", "Months"),
measurevar="Australia":"USA",
variable.name="Country",
value.name="Visitors")
ggplot(nz_dtal,aes(x=Year, y=Visitors,group=Country, color= Country))+
geom_point(size=.3)+
geom_line()+
facet_grid(.~Months)
南半球的國家(Australia)到紐西蘭遊玩的遊客數不論在哪個月份都逐年增加
ggplot(nz_dtal,aes(x=Year, y=Visitors,color= Country))+
geom_point(size=.3)+
geom_line()+
facet_grid(.~Country)
總量來看Australia和UK到紐西蘭的遊客數最多,但UK的人數有下滑的趨勢
Use the sample data set to estimate the mean life expectancy of Nobel prize winners.
library(lubridate)
dta2 <- read.table("nobel_lspan.txt", header = T) %>%
mutate(born=mdy(Born),
died=mdy(Died),
span=died %--% born)
head(dta2)
## ID Born Died born died
## 1 David H. Hubel February 27, 1926 September 22, 2013 1926-02-07 2013-02-02
## 2 Herbert A. Simon June 15, 1916 February 9, 2001 1916-01-05 2001-09-20
## 3 Ronald H. Coase December 29, 1910 September 2, 2013 1910-02-09 2013-02-20
## 4 Bernard Katz March 26, 1911 April 20, 2003 1911-02-06 2003-04-20
## 5 P.A.M. Dirac August 8, 1902 October 20, 1984 2002-08-19 1984-10-20
## 6 Richard Feynman May 11, 1918 February 15, 1988 1918-01-01 1988-01-05
## span
## 1 2013-02-02 UTC--1926-02-07 UTC
## 2 2001-09-20 UTC--1916-01-05 UTC
## 3 2013-02-20 UTC--1910-02-09 UTC
## 4 2003-04-20 UTC--1911-02-06 UTC
## 5 1984-10-20 UTC--2002-08-19 UTC
## 6 1988-01-05 UTC--1918-01-01 UTC
ggplot(dta2, aes(reorder(ID, died),
x=born,
xend=died)) +
ggalt::geom_dumbbell(size=rel(1.1),
colour="gold",
colour_x="goldenrod",
colour_xend="goldenrod",
dot_guide=TRUE,
dot_guide_size=0.2) +
labs(x="Year", y=NULL) +
theme_classic() ##得到錯誤的圖(因為出現未來的時間)
The following rather awful plot is shown on a web page hosted by the Taiwanese Ministry of Education.Revise it so that it is a proper time series plot.
dta<- read.table("tw_to_us.txt", header = F)
head(dta)
## V1
## 1 3637
## 2 2553
## 3 4564
## 4 6780
## 5 12029
## 6 12250
dta1<- read.table("foreign_students_us.txt", header = T)
head(dta1)
## Year Country Number
## 1 2012 TW 21867
## 2 2013 TW 21266
## 3 2014 TW 20993
## 4 2015 TW 21127
## 5 2016 TW 21516
## 6 2017 TW 22454
dta$Year <- c(seq(1950,1990,5), seq(1991,2004,1))
dta$Country <-rep("TW")
colnames(dta)[1]<-"Number"
head(dta)
## Number Year Country
## 1 3637 1950 TW
## 2 2553 1955 TW
## 3 4564 1960 TW
## 4 6780 1965 TW
## 5 12029 1970 TW
## 6 12250 1975 TW
dta<- rbind(dta, dta1)
tail(dta)
## Number Year Country
## 39 274439 2013 CN
## 40 304040 2014 CN
## 41 328547 2015 CN
## 42 350755 2016 CN
## 43 363341 2017 CN
## 44 369548 2018 CN
library(ggplot2)
dta %>% subset(Country =="TW") %>%
ggplot(., aes(x= Year, y= Number))+
geom_point()+
scale_x_continuous(limits=c(1950, 2018),
breaks = seq(1950, 2020, 5))+
geom_smooth(method = "loess", se = F, col = "gray")+
ggtitle("Taiwanese Students in the U.S.A (1950-2018)")