Dates & Times

library(tidyverse)
library(lubridate)
library(ggplot2)
library(pacman)
library(dplyr)

In-class exercise

Q1.

Find out the number of days you have spent at NCKU as a registered student or staff person.

startDate <- dmy("9-September-2016")
nowDate <- dmy("20-May-2020")
nowDate - startDate

## Time difference of 1349 days

Q2.

Reproduce the plot of calls for police assistances around 24 hours in New York City using the data set here.

dta <- read.csv("calls_nyc.csv")
dta$Hour <- factor(dta$Hour, levels = dta$Hour)
# dta$group <- ifelse(dta$Calls < mean(dta$Calls), 0, 1)
ggplot(dta, aes(Hour, Calls)) +
  # width 0~1剛好黏合, alpha透明度
  geom_bar(width=1, fill="cyan", col="gray", 
          alpha=0.4, stat = "identity") +
  geom_abline(intercept = mean(dta$Calls), slope = 0, col = 'pink') +
  coord_polar(theta = 'x', start = pi/12)

Q3.

Assume that a friend of yours will live to be 100 years old. Find out how often his or her birthday falls on each day of the week. Plot it.

birthday <- as.Date("1920/5/20")
deathday <- as.Date("2020/5/20")
as.numeric(difftime(deathday, birthday, unit = "days")) / 365

## [1] 100.0685

t <- seq(from = birthday, to = deathday, by = "years")
plot(factor(weekdays(t), 
            levels = c("週日", "週一", "週二", "週三", "週四", "週五", "週六"),
            labels = c("Sun", "Mon", "Tue", "Wen", "Thu", "Fri", "Sat")))

Q4.

Reproduce the plot of fertility rate and college acceptance rate in Taiwan from 1981 to 2009 using the data set here.

dta <- read.table("birth_college.txt", header = T)
dta$year <- 1981:2009
head(dta)

par(mar=c(4.5, 4.5, 2, 4.5))
plot(dta$year, dta$Birth, ylim = c(0, 60), 
     xlab = "Year", ylab = "Birth rate(0.1%)")
par(new = TRUE)
plot(dta$year, dta$Entrance, ylim = c(40, 100), pch = 16, 
     xaxt = "n", yaxt = "n", xlab = "", ylab = "")
axis(4, ylim = c(40,100))
mtext("Acceptance rate(%)", side = 4, line = 3)
grid(nx = 30, ny = 0, col = "lightgray", lty = "dotted")
legend("topleft", c("Birth", "College"), pch = c(1, 16))

Exercises

Q1.

Use the dataset containing the average number of visitors (monthly) in New Zealand by country of residence to explore the seasonal patterns between the eight countries. Is there a hemisphere effect?

dta <- read.csv("nz_visitors.csv")
head(dta)

dta %>% 
  mutate(Year = as.integer(substr(Month, 1, 4)),
         Month = as.integer(substr(Month, 6, 7))) %>%
  gather(key = "Country", value = "visitors", 2:9) %>%
  group_by(Country, Month) %>% 
  summarize(Visitor = mean(visitors)) %>% 
  ggplot(data = ., aes(x = Month, y = Visitor)) +
    geom_line(aes(color = Country))

參考 Jay Liao 寫法

dtat <- dta %>% 
  dplyr::select(Month, Total) %>%
  mutate(Year = as.integer(substr(Month, 1, 4)),
         Month = as.integer(substr(Month, 6, 7)),
         Season = ifelse(Month >= 10, "W", ifelse(Month <= 3, "W", "S")),
         ym = as.Date(paste(Year, Month, "1", sep = "/")))
ggplot(data = dtat, aes(x = ym, y = Total)) +
  geom_line() +
  geom_point(aes(color = Season)) +
  theme_bw() +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 45, size = 5))

dtat <- dta %>% 
  gather(key = "Country", value = "visitors", 2:9) %>%
  dplyr::select(Month, Country, visitors) %>%
  mutate(Year = as.integer(substr(Month, 1, 4)),
         Month = as.integer(substr(Month, 6, 7)),
         Season = ifelse(Month >= 10, "W", ifelse(Month <= 3, "W", "S")),
         ym = as.Date(paste(Year, Month, "1", sep = "/")))
ggplot(data = dtat, aes(x = ym, y = visitors)) +
  facet_wrap(. ~ Country, ncol = 2) +
  geom_line() +
  geom_point(aes(color = Season)) +
  theme_bw() +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 45, size = 5))

夏天有比較多遊客

Q2.

Use the sample data set to estimate the mean life expectancy of Nobel prize winners.

p_load(ggalt)
library(hrbrthemes)
dta <- read.table("nobel_lspan.txt", header = T)
dta <-dta %>%
  mutate(born = mdy(Born),
         died = mdy(Died),
         span = died %--% born)
ggplot(dta, aes(reorder(ID, died),  
                x=born, 
                xend=died)) +
 geom_dumbbell(size=rel(1.1), 
               colour="gold", 
               colour_x="goldenrod",
               colour_xend="goldenrod", 
               dot_guide=TRUE, 
               dot_guide_size=0.2) +
 labs(x="Year", y=NULL) + 
 theme_ipsum()

出現未來人！我發現mdy日期需要是兩碼，一碼會有錯誤，例如

mdy("February 1, 1952")

## [1] "2052-01-19"

mdy("February 01, 1952")

## [1] "1952-02-01"

mdy("July 4th, 2000") # lubridate CHEAT SHEET example

## [1] "2000-07-04"

mdy("July 4, 2000")

## [1] "2000-04-20"

library(stringr)
dta <- read.table("nobel_lspan.txt", header = T) %>% 
  separate(Born, c("bmonth","bday","byear"), sep = '\\s') %>% 
  separate(Died, c("dmonth","dday","dyear"), sep = '\\s') %>% 
  mutate(bday = gsub(",","",bday), # parse_number()
         dday = gsub(",","",dday)) %>%
  transmute(ID = ID,
            Born = paste(bmonth, bday, byear, sep = "-"),
            Died = paste(dmonth, dday, dyear, sep = "-")) %>%
  mutate(born = mdy(Born),
         died = mdy(Died),
         span = born %--% died,
         life = as.numeric(difftime(died, born, unit="days")) / 365) 
head(dta)

ggplot(dta, aes(reorder(ID, died),  
                x=born, 
                xend=died)) +
 geom_dumbbell(size=rel(1.1), 
               colour="gold", 
               colour_x="goldenrod",
               colour_xend="goldenrod", 
               dot_guide=TRUE, 
               dot_guide_size=0.2) +
 labs(x="Year", y=NULL) + 
 theme_ipsum()

ggplot(data = dta) +
 geom_point(aes(y = reorder(ID, died),  x = life), colour="gray", size = 3) +
 geom_segment(aes(y = reorder(ID, died), yend = reorder(ID, died), 
                  x = life, xend = mean(life)), colour="gray") +
 labs(x = "Age", y = "")

Q3.

Use the following sample of records for profit made, arrival date, and departure date of group travel booked at a travel agency in Taiwan to estimate the mean profit per day of service.

dta <- read.csv("Visit_TW.csv")
head(dta)

dta %>% mutate(Arrival = as.Date(Arrival),
               Depature = as.Date(Depature),
               Stay = as.numeric(difftime(Depature, Arrival, unit="days"))+1) %>%
  summarize(allProfit = sum(Expense), stayDays = sum(Stay), meanProfit = sum(Expense)/sum(Stay))

Q4.

The following rather awful plot is shown on a web page hosted by the Taiwanese Ministry of Education Revise it so that it is a proper time series plot. For your convenience, the data points have been extracted and saved here . What had happened in the early 1990’s and how do we know if the trend reversal is real? You may want to augment the data set with further data points from 2012 to 2018 available in the foreign students in the U.S. data file.

library(padr)
dtaT <- read.table("tw_to_us.txt")
names(dtaT) <-"Number"
dtaT$Year <- c(seq(1950,1990,5),seq(1991,2004,1))
dtaU <- read.table("foreign_students_us.txt", h=T) %>% 
  filter(Country == "TW") %>% dplyr::select(-Country)
dta <- dplyr::bind_rows(dtaT,dtaU)
dta$Year <- as.Date(as.character(dta$Year), format = "%Y")
Dta <- pad(dta, interval = "year")# %>% fill_by_value()
ggplot(aes(Year,Number),data = Dta) + 
  geom_smooth(method = "loess", se = F, col = "gray") +
  geom_point(na.rm = TRUE) +
  ggtitle("ROC (Taiwan) Students in the U.S.A (1950-2018)")

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 39 rows containing non-finite values (stat_smooth).

Q5.

How different groups spend their day is an article published in The New York Times using the data collected from The American Time Use Survey. Discuss what we need to have in order to replicate this piece of graphical journalism in Taiwan.

基本資料：

性別（男女）、勞動狀態（有工作、無工作、非勞工）、種族、年齡、學歷、幾個小孩

Times using data：

時間＿在做的事

ex.

0        1        2        3        4        5          ...
sleeping sleeping sleeping sleeping sleeping sleeping   ...
work     work     work     work     eating   eating     ...
...

Maps

library(rworldmap)
library(countrycode)
library(maptools)
library(raster)
library(ggmap)
library(leaflet)
library(sf)
library(dichromat) 
library(rgdal)
library(sp)
library(choroplethr)

In-class exercise

Q1.

Indicate countries you have visited so far on a world map in the style of the ebola outbreaks example.

dta <- data.frame(Country = c("Taiwan","Japan"), Visited = c(1,1)) %>% 
  mutate(Country = countrycode(Country, "country.name", "iso3c"))
Map <- joinCountryData2Map(dta, joinCode = "ISO3", nameJoinColumn = "Country")

## 2 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 241 codes from the map weren't represented in your data

mapCountryData(Map, nameColumnToPlot = "Visited", catMethod = "categorical",
               addLegend = FALSE, mapTitle ="Countries I Have Visited So Far", 
               missingCountryCol = gray(.9))

Q2.

Plot places in administratice areas of Taiwan you have visited so far.

tw <- sf::st_read("TWN_adm/TWN_adm2.shp")

## Reading layer `TWN_adm2' from data source `/Users/Lynn/Documents/資料管理/0519_c10/TWN_adm/TWN_adm2.shp' using driver `ESRI Shapefile'
## Simple feature collection with 21 features and 18 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 119.3138 ymin: 21.89653 xmax: 122.1085 ymax: 25.63431
## CRS:            4326

tw$NAME_2

##  [1] Kaohsiung City Taipei City    Changhwa       Chiayi        
##  [5] Hsinchu        Hualien        Ilan           Kaohsiung     
##  [9] Keelung City   Miaoli         Nantou         Penghu        
## [13] Pingtung       Taichung       Taichung City  Tainan        
## [17] Tainan City    Taipei         Taitung        Taoyuan       
## [21] Yunlin        
## 21 Levels: Changhwa Chiayi Hsinchu Hualien Ilan ... Yunlin

col = c(rep("lightgray",11),"red",rep("lightgray",9))
plot(tw, col=col, max.plot = 1, main = "")
legend("topleft", inset=.02, c("have visited", "haven't visited"), fill=c("lightgray","red"))

Q3.

Map an area of Tainan city to include three of your favorite places to eat as landmarks. ### 主題：炸雞

m <- leaflet() %>%
 addTiles() %>%  
 addMarkers(lng=120.195214, 
            lat=22.997932, 
            popup="金華炸雞") %>%  
 addMarkers(lng=120.221492, 
            lat=22.991098, 
            popup="鳳凰來") %>%  
 addMarkers(lng=120.197976, 
            lat=22.993621, 
            popup="炸雞洋行") %>%  
 addMarkers(lng=120.159680, 
            lat=23.002530, 
            popup="炸雞洋行") %>%  
 addMarkers(lng=120.218192, 
            lat=22.995690, 
            popup="雞排本色") %>%  
 addMarkers(lng=120.201998, 
            lat=22.996327, 
            popup="京都輔炸雞") %>%  
 addMarkers(lng=120.197211, 
            lat=22.991416, 
            popup="胖老爹") %>%  
 addMarkers(lng=120.193881, 
            lat=22.985852, 
            popup="昌平炸雞王")
m

Exercise

Q1.

Build a thematic plot of the results of Taiwan 2020 presidential election between the DDP and the KMT. The geographical data (maps) for Taiwan can be obtained from DIVA-GIS: Geographic Information System for Biodiversity Research. Source: Taiwan presidential election 2020 . Wikipedia.

library(classInt)
tw <- sf::st_read("TWN_adm/TWN_adm2.shp")

## Reading layer `TWN_adm2' from data source `/Users/Lynn/Documents/資料管理/0519_c10/TWN_adm/TWN_adm2.shp' using driver `ESRI Shapefile'
## Simple feature collection with 21 features and 18 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 119.3138 ymin: 21.89653 xmax: 122.1085 ymax: 25.63431
## CRS:            4326

tw$gvotes <- c(62.23,53.65,57.17,64.22,
               56.52,35.91,63.28,62.23,
               50.82,45.02,50.83,53.85,
               62.16,56.95,56.95,67.38,
               67.38,56.52,38.12,54.78,
               61.56)

pal = colorRampPalette(c("blue", "green"))
mcol = pal(7)

#set breaks for the 7 colors
brks <- classIntervals(tw$gvotes, n=7, style="quantile")
brks <- brks$brks

#plot the map
plot(tw, col = mcol[findInterval(tw$gvotes, brks,
     all.inside = TRUE)], axes = F, max.plot = 1, main = " ")

#add a title
title(paste("Taiwan Presidential Election, 2020"))

#add a legend
legend("topleft", legend=leglabs(round(brks)), cex=0.8,
       fill=mcol, bty="n", x.intersp = .6, y.intersp = .6)

Q2.

Traffic accidents on roads in Taiwan in 2011 is available on-line from the Department of Transportation. Plot the number of deaths per 10,000 vehicles over the administrative units.

Q3.

Download the data for age fisrt have sex across several countries to make the following plot:

dta <- read.csv("Age_When_You_First_Had_Sex.csv") %>% 
  separate(Date, c("no","region"), sep = '-+\\s') %>% 
  dplyr::select(-no) %>% rename(value = X1.1.2005) %>%
  mutate(region = tolower(region))
head(dta)

country_choropleth(dta, 
                   num_colors=9) +
 scale_fill_brewer(palette="Blues")+
 labs(title=" Age When You First Had Sex")

Q4.

Download all the files from github (click the downward triangle in the clone or download button in green) for flood in schools in Taipei to replicate the analysis with the markdown file included.

降雨頻率分析與淹水潛勢是洪災分析重要的資料來源，利用水文分析進行降雨觀測及淹水分析結果，進一步整合社會統計資料，可評估人類社會承受災害的潛在風險。
假設將24小時延時200年重現期降雨的淹水潛勢的深度達50cm以上的區域，定義為「潛在受災區」。

Flood <- rgdal::readOGR("flood-in-school-master/flood50.shp", encoding="big-5")

## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/Lynn/Documents/資料管理/0519_c10/flood-in-school-master/flood50.shp", layer: "flood50"
## with 5103 features
## It has 5 fields

Schools <- rgdal::readOGR("flood-in-school-master/tpecity_school.shp", encoding="big-5")

## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/Lynn/Documents/資料管理/0519_c10/flood-in-school-master/tpecity_school.shp", layer: "tpecity_school"
## with 198 features
## It has 3 fields
## Integer64 fields read as strings:  STUDENTS

Schools.Flood <- Schools[Flood, ] #把學校落在Flood中的排出來

疊三個圖層

plot(Flood, col='cyan')
plot(Schools, col='gray', pch=20, add=TRUE)
plot(Schools.Flood, col='red', pch=20, add=TRUE)

head(Schools.Flood, n=nrow(Schools.Flood))

sum(as.integer(Schools.Flood$STUDENTS))

## [1] 2645

Class9 : Dates & Times & Maps

0520

Dates & Times

In-class exercise

Q1.

Q2.

Q3.

Q4.

Exercises

Q1.

Q2.

Q3.

Q4.

Q5.

Maps

In-class exercise

Q1.

Q2.

Q3.

Exercise

Q1.

Q2.

Q3.

Q4.