Dates & Times
In-class exercises:
1.Find out the number of days you have spent at NCKU as a registered student or staff person. DONE
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
stdate <- dmy("1-September-2019")
endate <- dmy("18-May-2020")
endate - stdate
#> Time difference of 260 days2.Reproduce the plot of calls for police assistances around 24 hours in New York City using the data set here. DONE
library(ggplot2)
library(tidyverse)
#> -- Attaching packages ------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
#> √ tibble 3.0.1 √ dplyr 0.8.5
#> √ tidyr 1.0.3 √ stringr 1.4.0
#> √ readr 1.3.1 √ forcats 0.5.0
#> √ purrr 0.3.4
#> -- Conflicts ---------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
#> x lubridate::as.difftime() masks base::as.difftime()
#> x lubridate::date() masks base::date()
#> x dplyr::filter() masks stats::filter()
#> x lubridate::intersect() masks base::intersect()
#> x dplyr::lag() masks stats::lag()
#> x lubridate::setdiff() masks base::setdiff()
#> x lubridate::union() masks base::union()
library(hrbrthemes)
#> NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
#> Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
#> if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
dta <- read.csv("C:/Users/boss/Desktop/data_management/calls_nyc.csv", h=T)
dta
#> Hour Calls
#> 1 0.5 1080
#> 2 1.5 910
#> 3 2.5 770
#> 4 3.5 780
#> 5 4.5 380
#> 6 5.5 390
#> 7 6.5 200
#> 8 7.5 300
#> 9 8.5 275
#> 10 9.5 395
#> 11 10.5 305
#> 12 11.5 560
#> 13 12.5 450
#> 14 13.5 600
#> 15 14.5 600
#> 16 15.5 700
#> 17 16.5 690
#> 18 17.5 680
#> 19 18.5 860
#> 20 19.5 750
#> 21 20.5 900
#> 22 21.5 1150
#> 23 22.5 1100
#> 24 23.5 830
class(dta)
#> [1] "data.frame"
str(dta)
#> 'data.frame': 24 obs. of 2 variables:
#> $ Hour : num 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 ...
#> $ Calls: int 1080 910 770 780 380 390 200 300 275 395 ...
dta1 <- dta %>% mutate(dtah = c(0:23))
rt <- data.frame(Hour = factor(dta1[, 3]),
Calls=(dta1[, 2]))
ggplot(rt, aes(Hour,
Calls,
group=1)) +
geom_bar(width=1,
stat="identity",
fill="cyan",
col="gray",
alpha=0.2) +
geom_abline(intercept=mean(dta1$Calls),
slope=0,
col="pink") +
coord_polar(theta="x",
start=-pi/12) +
theme_ipsum()
#> Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
#> found in Windows font database
#> Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
#> found in Windows font database
#> Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
#> family not found in Windows font database
#> Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
#> family not found in Windows font database
#> Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
#> family not found in Windows font database
#> Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
#> family not found in Windows font database
#> Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
#> font family not found in Windows font database
#> Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
#> family not found in Windows font database
#> Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
#> font family not found in Windows font database3.Assume that a friend of yours will live to be 100 years old. Find out how often his or her birthday falls on each day of the week. Plot it. DONE
db_ov <- as.Date("1993/10/18")
dd_ov <- as.Date("2093/10/18")
bDays <- seq(from=db_ov, to=dd_ov, by="years")
sum(weekdays(bDays) %in% "星期一")
#> [1] 15
sum(weekdays(bDays) %in% "星期二")
#> [1] 14
sum(weekdays(bDays) %in% "星期三")
#> [1] 15
sum(weekdays(bDays) %in% "星期四")
#> [1] 14
sum(weekdays(bDays) %in% "星期五")
#> [1] 14
sum(weekdays(bDays) %in% "星期六")
#> [1] 15
sum(weekdays(bDays) %in% "星期日")
#> [1] 14
dta <-data.frame(table(weekdays(bDays)))
library(tidyverse)
names(dta) <- c("weekdays", "Freq")
dta
#> weekdays Freq
#> 1 星期一 15
#> 2 星期二 14
#> 3 星期三 15
#> 4 星期五 14
#> 5 星期六 15
#> 6 星期日 14
#> 7 星期四 14
ggplot(dta, aes(x=weekdays, y=Freq)) +
geom_segment( aes(x=weekdays, xend=weekdays, y=0, yend=Freq)) +
geom_point( size=5, color="gray", fill=alpha("black", 0.3), alpha=0.7, shape=21, stroke=2) 4.Reproduce the plot of fertility rate and college acceptance rate in Taiwan from 1981 to 2009 using the data set here. Done, but not very well.
dta <- read.table("C:/Users/boss/Desktop/data_management/birth_college.txt", h=T)
str(dta)
#> 'data.frame': 29 obs. of 2 variables:
#> $ Birth : num 23 22.1 20.6 19.6 18 15.9 16 17.2 15.7 16.6 ...
#> $ Entrance: int NA NA NA NA NA NA NA NA NA NA ...
head(dta)
#> Birth Entrance
#> 1 23.0 NA
#> 2 22.1 NA
#> 3 20.6 NA
#> 4 19.6 NA
#> 5 18.0 NA
#> 6 15.9 NA
dta$year <- 1981:2009
dta$College <- dta$Entrance-40
tail(dta)
#> Birth Entrance year College
#> 24 9.6 87 2004 47
#> 25 9.1 89 2005 49
#> 26 9.0 91 2006 51
#> 27 8.9 96 2007 56
#> 28 8.6 97 2008 57
#> 29 8.3 97 2009 57
dta
#> Birth Entrance year College
#> 1 23.0 NA 1981 NA
#> 2 22.1 NA 1982 NA
#> 3 20.6 NA 1983 NA
#> 4 19.6 NA 1984 NA
#> 5 18.0 NA 1985 NA
#> 6 15.9 NA 1986 NA
#> 7 16.0 NA 1987 NA
#> 8 17.2 NA 1988 NA
#> 9 15.7 NA 1989 NA
#> 10 16.6 NA 1990 NA
#> 11 15.7 NA 1991 NA
#> 12 15.5 NA 1992 NA
#> 13 15.6 NA 1993 NA
#> 14 15.3 44 1994 4
#> 15 15.5 44 1995 4
#> 16 15.2 49 1996 9
#> 17 15.1 60 1997 20
#> 18 12.4 60 1998 20
#> 19 12.9 60 1999 20
#> 20 13.8 58 2000 18
#> 21 11.7 61 2001 21
#> 22 11.0 80 2002 40
#> 23 10.1 83 2003 43
#> 24 9.6 87 2004 47
#> 25 9.1 89 2005 49
#> 26 9.0 91 2006 51
#> 27 8.9 96 2007 56
#> 28 8.6 97 2008 57
#> 29 8.3 97 2009 57
library(ggplot2)
ggplot(data = dta, aes(x = year, y = Birth )) +
geom_point(data = dta, aes(year, Birth), pch = 1) +
geom_point(data = dta, aes(year, College)) +
labs(x = "Year", y = "Birth rate(0.1%)") +
theme_minimal()
#> Warning: Removed 13 rows containing missing values (geom_point).Exercises:
1.Use the dataset containing the average number of visitors (monthly) in New Zealand by country of residence to explore the seasonal patterns between the eight countries. Is there a hemisphere effect? unfinish
library("ggplot2")
library("lubridate")
dta <- read.csv("C:/Users/boss/Desktop/data_management/nz_visitors.csv", h=T)
str(dta)
#> 'data.frame': 170 obs. of 10 variables:
#> $ Month : chr "1998M09" "1998M10" "1998M11" "1998M12" ...
#> $ Australia: int 17244 18090 16750 25909 27228 19461 19200 19595 12564 10987 ...
#> $ ChinaPRof: int 748 941 1054 1270 1375 1660 1456 1488 1449 1413 ...
#> $ Japan : int 6093 5039 6112 6670 6008 7478 7341 5030 4275 3758 ...
#> $ Korea : int 979 1083 1144 1836 2716 2245 1611 1416 1192 1255 ...
#> $ Germany : int 1320 2459 5195 5499 6430 7320 6094 3157 1450 765 ...
#> $ UK : int 5794 7876 13362 20238 22557 27477 20187 13448 8587 7271 ...
#> $ Canada : int 973 1418 2236 2935 3623 4394 3573 1795 1160 903 ...
#> $ USA : int 3837 6093 8468 7865 10007 12533 10519 6737 4932 4845 ...
#> $ Total : int 57930 68203 84370 113853 122130 124305 104106 83414 59800 52426 ...
head(dta)
#> Month Australia ChinaPRof Japan Korea Germany UK Canada USA Total
#> 1 1998M09 17244 748 6093 979 1320 5794 973 3837 57930
#> 2 1998M10 18090 941 5039 1083 2459 7876 1418 6093 68203
#> 3 1998M11 16750 1054 6112 1144 5195 13362 2236 8468 84370
#> 4 1998M12 25909 1270 6670 1836 5499 20238 2935 7865 113853
#> 5 1999M01 27228 1375 6008 2716 6430 22557 3623 10007 122130
#> 6 1999M02 19461 1660 7478 2245 7320 27477 4394 12533 1243052.Use the sample data set to estimate the mean life expectancy of Nobel prize winners. Done, but I can’t transform days to years.
dta <- read.table("C:/Users/boss/Desktop/data_management/nobel_lspan.txt", h=T)
str(dta)
#> 'data.frame': 21 obs. of 3 variables:
#> $ ID : chr "David H. Hubel" "Herbert A. Simon" "Ronald H. Coase" "Bernard Katz" ...
#> $ Born: chr "February 27, 1926" "June 15, 1916" "December 29, 1910" "March 26, 1911" ...
#> $ Died: chr "September 22, 2013" "February 9, 2001" "September 2, 2013" "April 20, 2003" ...
dta
#> ID Born Died
#> 1 David H. Hubel February 27, 1926 September 22, 2013
#> 2 Herbert A. Simon June 15, 1916 February 9, 2001
#> 3 Ronald H. Coase December 29, 1910 September 2, 2013
#> 4 Bernard Katz March 26, 1911 April 20, 2003
#> 5 P.A.M. Dirac August 8, 1902 October 20, 1984
#> 6 Richard Feynman May 11, 1918 February 15, 1988
#> 7 George J. Stigler January 17, 1911 December 1, 1991
#> 8 Pearl S. Buck June 26 1892 March 6, 1973
#> 9 Bertrand A. W. Russell May 18, 1872 February 2, 1970
#> 10 Roger Y. Tsien February 1, 1952 August 24, 2016
#> 11 William Shockley February 13, 1910 August 12, 1989
#> 12 T. S. Eliot September 26, 1888 January 4, 1965
#> 13 John C. Eccles January 27, 1903 May 2, 1997
#> 14 W. Lawrence Bragg March 31, 1890 July 1, 1971
#> 15 Francis H. C. Crick June 8, 1916 July 28, 2004
#> 16 Marie Curie November 7, 1867 July 4, 1934
#> 17 Irene Joliot-Curie September 12, 1897 March 17, 1956
#> 18 Dorothy M. C. Hodgkin May 12, 1910 July 29, 1994
#> 19 Nadine Gordimer November 20, 1923 July 13, 2014
#> 20 Doris Lessing October 22, 1919 November 17, 2013
#> 21 Yasunari Kawabata June 11, 1899 April 16, 1972
library("lubridate")
b1 <- mdy("February 27, 1926")
b2 <- mdy(" June 15, 1916")
b3 <- mdy("December 29, 1910")
b4 <- mdy("March 26, 1911")
b5 <- mdy("August 8, 1902")
b6 <- mdy("May 11, 1918")
b7 <- mdy(" January 17, 1911")
b8 <- mdy("June 26 1892")
b9 <- mdy("May 18, 1872")
b10 <- mdy("February 1, 1952")
b11 <- mdy("February 13, 1910")
b12 <- mdy("September 26, 1888")
b13 <- mdy("January 27, 1903")
b14 <- mdy("March 31, 1890")
b15 <- mdy("June 8, 1916")
b16 <- mdy("November 7, 1867")
b17 <- mdy("September 12, 1897")
b18 <- mdy("May 12, 1910")
b19 <- mdy("November 20, 1923")
b20 <- mdy("October 22, 1919")
b21 <- mdy("June 11, 1899")
d1 <- mdy("September 22, 2013")
d2 <- mdy("February 9, 2001")
d3 <- mdy("September 2, 2013")
d4 <- mdy("April 20, 2003")
d5 <- mdy("October 20, 1984")
d6 <- mdy(" February 15, 1988")
d7 <- mdy("December 1, 1991")
d8 <- mdy("March 6, 1973")
d9 <- mdy("February 2, 1970")
d10 <- mdy("August 24, 2016")
d11 <- mdy("August 12, 1989")
d12 <- mdy("January 4, 1965")
d13 <- mdy("May 2, 1997")
d14 <- mdy("July 1, 1971")
d15 <- mdy(" July 28, 2004")
d16 <- mdy("July 4, 1934")
d17 <- mdy("March 17, 1956")
d18 <- mdy(" July 29, 1994")
d19 <- mdy("July 13, 2014")
d20 <- mdy("November 17, 2013")
d21 <- mdy("April 16, 1972")
dta1 <- mean((d1-b1)+(d2-b2)+(d3-b3)+(d4-b4)+(d5-b5)+(d6-b6)+(d7-b7)+(d8-b8)+(d9-b9)+(d10-b10)+(d11-b11)+(d12-b12)+(d13-b13)+(d14-b14)+(d15-b15)+(d16-b16)+(d17-b17)+(d18-b18)+(d19-b19)+(d20-b20)+(d21-b21))/21
dta1/365.25
#> Time difference of 82.33878 days3.Use the following sample of records for profit made, arrival date, and departure date of group travel booked at a travel agency in Taiwan to estimate the mean profit per day of service. DONE~
dta <- read.csv("C:/Users/boss/Desktop/data_management/Visit_TW.csv", h=T)
dta
#> Expense Arrival Depature
#> 1 15393 2015/2/16 2015/2/17
#> 2 27616 2015/3/6 2015/3/11
#> 3 8876 2015/2/14 2015/2/17
#> 4 57378 2015/1/30 2015/2/9
#> 5 32613 2015/1/31 2015/2/6
#> 6 46998 2014/12/27 2014/12/31
#> 7 10744 2014/12/27 2015/1/4
#> 8 3269 2014/12/19 2014/12/24
#> 9 16195 2014/12/23 2014/12/26
#> 10 55842 2015/2/11 2015/2/17
#> 11 6593 2014/12/22 2014/12/29
#> 12 12616 2014/12/30 2015/1/5
#> 13 37059 2014/12/6 2014/12/13
#> 14 45619 2014/12/1 2014/12/13
#> 15 62725 2015/1/20 2015/1/23
#> 16 45262 2015/1/1 2015/1/8
#> 17 39408 2015/2/28 2015/3/13
#> 18 41179 2015/1/23 2015/1/27
#> 19 37363 2015/2/18 2015/2/25
#> 20 11969 2014/11/11 2014/11/14
#> 21 14379 2014/9/14 2014/9/17
#> 22 28579 2014/11/6 2014/11/10
#> 23 28738 2014/11/22 2014/11/25
#> 24 77510 2014/12/18 2014/12/23
#> 25 2402 2014/11/10 2014/11/14
#> 26 10614 2014/11/5 2014/11/6
#> 27 1864 2014/7/24 2014/7/28
#> 28 111940 2014/11/18 2014/11/23
#> 29 -489 2014/10/6 2014/10/13
#> 30 67857 2014/11/17 2014/11/21
#> 31 -85 2014/10/24 2014/10/28
#> 32 122418 2014/11/16 2014/11/21
#> 33 1014 2014/10/13 2014/10/20
#> 34 37406 2014/11/21 2014/11/28
#> 35 87197 2014/12/17 2014/12/25
#> 36 1525 2014/11/6 2014/11/6
#> 37 2883 2014/9/20 2014/9/20
#> 38 42105 2014/12/29 2015/1/6
#> 39 89304 2014/11/23 2014/11/30
#> 40 48294 2014/12/12 2014/12/21
#> 41 7000 2014/9/28 2014/10/3
#> 42 22743 2015/1/7 2015/1/11
#> 43 66549 2014/9/26 2014/10/30
#> 44 56523 2014/10/10 2014/10/20
#> 45 2655 2014/11/25 2014/11/25
#> 46 54985 2014/9/7 2014/9/15
#> 47 8962 2014/4/22 2014/4/22
#> 48 121132 2014/12/11 2014/12/19
#> 49 33193 2014/12/9 2014/12/14
#> 50 47590 2014/12/6 2014/12/11
#> 51 30885 2014/12/9 2014/12/22
#> 52 131482 2014/12/26 2015/1/5
#> 53 30964 2014/8/5 2014/8/9
#> 54 4202 2014/7/30 2014/8/1
#> 55 20334 2014/8/14 2014/8/20
#> 56 56328 2014/6/5 2014/6/15
#> 57 11168 2014/7/12 2014/7/13
#> 58 31266 2014/8/2 2014/8/8
#> 59 124388 2014/12/12 2014/12/20
#> 60 73627 2014/11/15 2014/11/26
#> 61 37522 2014/7/26 2014/8/1
#> 62 36011 2014/7/25 2014/7/30
#> 63 74698 2014/7/25 2014/8/3
#> 64 73825 2014/9/24 2014/10/3
#> 65 31362 2014/9/29 2014/10/6
#> 66 46062 2014/7/24 2014/7/31
#> 67 57321 2014/12/20 2014/12/30
#> 68 1979 2014/6/25 2014/6/29
#> 69 14433 2014/6/3 2014/6/5
#> 70 43137 2014/8/31 2014/9/8
#> 71 69468 2014/7/26 2014/8/3
#> 72 3843 2014/5/22 2014/5/26
#> 73 19587 2014/7/9 2014/7/13
#> 74 34523 2014/10/14 2014/10/20
#> 75 25162 2014/9/22 2014/10/1
#> 76 15961 2014/11/8 2014/11/10
#> 77 42979 2014/10/21 2014/10/29
#> 78 31589 2014/6/6 2014/6/13
#> 79 183522 2014/11/25 2014/12/3
#> 80 53969 2014/5/11 2014/5/18
#> 81 30082 2014/6/7 2014/6/11
#> 82 72609 2014/6/2 2014/6/16
#> 83 36197 2014/5/31 2014/6/7
#> 84 58113 2014/5/6 2014/5/13
#> 85 26067 2014/10/20 2014/10/20
#> 86 106673 2014/8/31 2014/9/10
#> 87 613 2015/3/4 2015/3/4
#> 88 4598 2014/10/20 2014/10/20
#> 89 5198 2014/7/12 2014/7/18
#> 90 4349 2014/9/27 2014/9/29
#> 91 -5797 2014/3/18 2014/3/23
#> 92 21777 2014/3/13 2014/3/20
#> 93 30466 2014/3/26 2014/4/5
#> 94 81221 2014/6/1 2014/6/9
#> 95 8622 2014/4/15 2014/4/18
#> 96 18244 2014/4/15 2014/4/15
library("lubridate")
A <-ymd(dta$Arrival)
D <- ymd(dta$Depature)
E <- mean(dta$Expense)
R <- mean(D-A)
E
#> [1] 38772.22
R
#> Time difference of 6.020833 days
38772.22/6.020833
#> [1] 6439.6774.The following rather awful plot is shown on a web page hosted by the Taiwanese Ministry of Education
Revise it so that it is a proper time series plot. For your convenience, the data points have been extracted and saved here .
What had happened in the early 1990’s and how do we know if the trend reversal is real?
You may want to augment the data set with further data points from 2012 to 2018 available in the foreign students in the U.S. data file.
unfinish
5.How different groups spend their day is an article published in The New York Times using the data collected from The American Time Use Survey.
Discuss what we need to have in order to replicate this piece of graphical journalism in Taiwan.
unfinish
Maps
In-class exercises:
1.Indicate countries you have visited so far on a world map in the style of the ebola outbreaks example. unfinish
2.Plot places in administrative areas of Taiwan you have visited so far. unfinish
3.Map an area of Tainan city to include three of your favorite places to eat as landmarks.
Done
library("leaflet")
m1 <- leaflet() %>%
addTiles() %>%
addMarkers(lng= 120.193703,
lat= 23.016013,
popup="燒肉神保町")
m2 <- leaflet() %>%
addTiles() %>%
addMarkers(lng= 120.229313,
lat= 23.000203,
popup="極鮮火鍋")
m3 <- leaflet() %>%
addTiles() %>%
addMarkers(lng= 120.210048,
lat= 22.995656,
popup="鬥牛士二鍋")
m1Exercises:
1.Build a thematic plot of the results of Taiwan 2020 presidential election between the DDP and the KMT. The geographical data (maps) for Taiwan can be obtained from DIVA-GIS: Geographic Information System for Biodiversity Research. unfinish
2.Traffic accidents on roads in Taiwan in 2011 is available on-line from the Department of Transportation. Plot the number of deaths per 10,000 vehicles over the administrative units. unfinish
3.Download the data for age fisrt have sex across several countries to make the following plot:
unfinish
4.Download all the files from github (click the downward triangle in the clone or download button in green) for flood in schools in Taipei to replicate the analysis with the markdown file included.
unfinish