Dates & Times

In-class exercises 1

  • 方法一
db_TH <- as.Date("2018/9/1")
de_TH <- as.Date("2020/5/18")
as.numeric(difftime(de_TH, db_TH, unit="days")) 
## [1] 625
  • 方法二
Sys.Date() - as.Date('2018-09-01')
## Time difference of 632 days

#參考傑恩同學

  • 方法三
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
startDate <- dmy("1-September-2018")
endDate <- dmy("18-May-2020")
endDate - startDate
## Time difference of 625 days
as.period(startDate %--% endDate)
## [1] "1y 8m 17d 0H 0M 0S"

In-class exercises 2

#資料匯入、檢視資料

dta <- read.table("/Users/User/Desktop/DM_R/hk0518/calls_nyc.csv", sep="," , header=T , stringsAsFactor=F, fill=T )
dim(dta)
## [1] 24  2
str(dta)
## 'data.frame':    24 obs. of  2 variables:
##  $ Hour : num  0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 ...
##  $ Calls: int  1080 910 770 780 380 390 200 300 275 395 ...

#查看data類型:class()、變項名稱:name()為何

class(dta)
## [1] "data.frame"
names(dta)
## [1] "Hour"  "Calls"
View(dta)

後續過程尚未完成

In-class exercises 3

db_TH <- as.Date("2000/5/24")
dd_TH <- as.Date("2100/5/24")

bDays <- seq(from=db_TH, to=dd_TH, by="years")

data <-data.frame(table(weekdays(bDays)))
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------ tidyverse 1.3.0 --
## √ ggplot2 3.3.0     √ purrr   0.3.3
## √ tibble  2.1.3     √ dplyr   0.8.5
## √ tidyr   1.0.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0
## -- Conflicts --------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
  • 檢查 weekdays function 的結果
weekdays(bDays)
##   [1] "星期三" "星期四" "星期五" "星期六" "星期一" "星期二" "星期三" "星期四"
##   [9] "星期六" "星期日" "星期一" "星期二" "星期四" "星期五" "星期六" "星期日"
##  [17] "星期二" "星期三" "星期四" "星期五" "星期日" "星期一" "星期二" "星期三"
##  [25] "星期五" "星期六" "星期日" "星期一" "星期三" "星期四" "星期五" "星期六"
##  [33] "星期一" "星期二" "星期三" "星期四" "星期六" "星期日" "星期一" "星期二"
##  [41] "星期四" "星期五" "星期六" "星期日" "星期二" "星期三" "星期四" "星期五"
##  [49] "星期日" "星期一" "星期二" "星期三" "星期五" "星期六" "星期日" "星期一"
##  [57] "星期三" "星期四" "星期五" "星期六" "星期一" "星期二" "星期三" "星期四"
##  [65] "星期六" "星期日" "星期一" "星期二" "星期四" "星期五" "星期六" "星期日"
##  [73] "星期二" "星期三" "星期四" "星期五" "星期日" "星期一" "星期二" "星期三"
##  [81] "星期五" "星期六" "星期日" "星期一" "星期三" "星期四" "星期五" "星期六"
##  [89] "星期一" "星期二" "星期三" "星期四" "星期六" "星期日" "星期一" "星期二"
##  [97] "星期四" "星期五" "星期六" "星期日" "星期一"
names(data) <- c("weekdays", "Freq")
 
# plot
ggplot(data, aes(x=weekdays, y=Freq)) +
  geom_segment( aes(x=weekdays, xend=weekdays, y=0, yend=Freq)) +
  geom_point( size=2, color="green", fill=alpha("blue", 0.3), alpha=0.7, shape=21, stroke=4) 

In-class exercises 4

dta <- read.table("/Users/User/Desktop/DM_R/hk0518/birth_college.txt", header=T , stringsAsFactor=F, fill=T )
dim(dta)
## [1] 29  2
str(dta)
## 'data.frame':    29 obs. of  2 variables:
##  $ Birth   : num  23 22.1 20.6 19.6 18 15.9 16 17.2 15.7 16.6 ...
##  $ Entrance: int  NA NA NA NA NA NA NA NA NA NA ...

#查看data類型:class()、變項名稱:name()為何

class(dta)
## [1] "data.frame"
names(dta)
## [1] "Birth"    "Entrance"
head(dta)
##   Birth Entrance
## 1  23.0       NA
## 2  22.1       NA
## 3  20.6       NA
## 4  19.6       NA
## 5  18.0       NA
## 6  15.9       NA
dta$year <- c(1981:2009)
head(dta)
##   Birth Entrance year
## 1  23.0       NA 1981
## 2  22.1       NA 1982
## 3  20.6       NA 1983
## 4  19.6       NA 1984
## 5  18.0       NA 1985
## 6  15.9       NA 1986
par(mar = c(5, 5, 2, 5))
plot(dta$year, dta$Birth, ylim = c(0, 60), 
     xlab = "Year", ylab = "Birth rate (0.1%)")

par(new = TRUE)
plot(dta$year, dta$Entrance, ylim = c(40, 100), pch = 16,xaxt = "n", yaxt = "n", xlab = "", ylab = "")     
                                                        # xaxt = "n", yaxt = "n" 用來移除其預設在左側的y軸座標
axis(4, ylim = c(40,100))                               # 加上一 y 軸,2 是左邊, 4 是右邊
mtext("Acceptance rate (%)", side = 4, line = 3)        # line = 3 用來調整右側 y 軸名稱的位置,數字愈小愈靠近座標軸

legend("topleft", c("Birth", "College"), pch = c(1, 16)) # 圖例

grid(nx = 30, ny = 0, col = "lightgray", lty = "dotted") # 背景格線

  • 學習from銘姿同學

exercises 1

dta <- read.csv("/Users/User/Desktop/DM_R/hk0518/nz_visitors.csv", header = TRUE)
dim(dta)
## [1] 170  10
str(dta)
## 'data.frame':    170 obs. of  10 variables:
##  $ Month    : Factor w/ 170 levels "1998M09","1998M10",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Australia: int  17244 18090 16750 25909 27228 19461 19200 19595 12564 10987 ...
##  $ ChinaPRof: int  748 941 1054 1270 1375 1660 1456 1488 1449 1413 ...
##  $ Japan    : int  6093 5039 6112 6670 6008 7478 7341 5030 4275 3758 ...
##  $ Korea    : int  979 1083 1144 1836 2716 2245 1611 1416 1192 1255 ...
##  $ Germany  : int  1320 2459 5195 5499 6430 7320 6094 3157 1450 765 ...
##  $ UK       : int  5794 7876 13362 20238 22557 27477 20187 13448 8587 7271 ...
##  $ Canada   : int  973 1418 2236 2935 3623 4394 3573 1795 1160 903 ...
##  $ USA      : int  3837 6093 8468 7865 10007 12533 10519 6737 4932 4845 ...
##  $ Total    : int  57930 68203 84370 113853 122130 124305 104106 83414 59800 52426 ...

#查看data類型:class()、變項名稱:name()為何

class(dta)
## [1] "data.frame"
names(dta)
##  [1] "Month"     "Australia" "ChinaPRof" "Japan"     "Korea"     "Germany"  
##  [7] "UK"        "Canada"    "USA"       "Total"
head(dta)
##     Month Australia ChinaPRof Japan Korea Germany    UK Canada   USA  Total
## 1 1998M09     17244       748  6093   979    1320  5794    973  3837  57930
## 2 1998M10     18090       941  5039  1083    2459  7876   1418  6093  68203
## 3 1998M11     16750      1054  6112  1144    5195 13362   2236  8468  84370
## 4 1998M12     25909      1270  6670  1836    5499 20238   2935  7865 113853
## 5 1999M01     27228      1375  6008  2716    6430 22557   3623 10007 122130
## 6 1999M02     19461      1660  7478  2245    7320 27477   4394 12533 124305
dta <- dta %>%
  dplyr::mutate(Year = as.integer(substr(Month, 1, 4)), 
         Month = as.integer(substr(Month, 6, 7)), 
         Season = (Month - 1) %/% 3 + 1) 
head(dta)
##   Month Australia ChinaPRof Japan Korea Germany    UK Canada   USA  Total Year
## 1     9     17244       748  6093   979    1320  5794    973  3837  57930 1998
## 2    10     18090       941  5039  1083    2459  7876   1418  6093  68203 1998
## 3    11     16750      1054  6112  1144    5195 13362   2236  8468  84370 1998
## 4    12     25909      1270  6670  1836    5499 20238   2935  7865 113853 1998
## 5     1     27228      1375  6008  2716    6430 22557   3623 10007 122130 1999
## 6     2     19461      1660  7478  2245    7320 27477   4394 12533 124305 1999
##   Season
## 1      3
## 2      4
## 3      4
## 4      4
## 5      1
## 6      1
dta <- dta %>%
  tidyr::gather(key = "Country", value = "Visitors", 2:9)   #gather有待明白~~~!!?

head(dta)
##   Month  Total Year Season   Country Visitors
## 1     9  57930 1998      3 Australia    17244
## 2    10  68203 1998      4 Australia    18090
## 3    11  84370 1998      4 Australia    16750
## 4    12 113853 1998      4 Australia    25909
## 5     1 122130 1999      1 Australia    27228
## 6     2 124305 1999      1 Australia    19461
dta <- dta %>% 
  dplyr::group_by(Country, Month) %>% 
  dplyr::summarize(Visitor = mean(Visitors))

head(dta)
## # A tibble: 6 x 3
## # Groups:   Country [1]
##   Country   Month Visitor
##   <chr>     <int>   <dbl>
## 1 Australia     1  44729.
## 2 Australia     2  30123.
## 3 Australia     3  29966.
## 4 Australia     4  29149.
## 5 Australia     5  18887.
## 6 Australia     6  17624.
ggplot()+
  # 春天
  geom_rect(data=data.frame(xstart=2.5, xend=5.5),
           aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf), 
           fill='pink',
           alpha=.2) +
  # 夏天
  geom_rect(data=data.frame(xstart=5.5, xend=8.5),
           aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf), 
           fill='green',
           alpha=.2) + 
  # 秋天
  geom_rect(data=data.frame(xstart=8.5, xend=11.5),
           aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf), 
           fill='orange',
           alpha=.2) +
  geom_line(data = dta, aes(x = Month, y = Visitor, color = Country))+
  scale_x_continuous(limits = c(1, 12), breaks = c(1, 4, 8, 12))+
  theme_classic()

  • 學習from銘姿同學

exercises 2

dta <- read.table("/Users/User/Desktop/DM_R/hk0518/nobel_lspan.txt", header = TRUE)
str(dta)
## 'data.frame':    21 obs. of  3 variables:
##  $ ID  : Factor w/ 21 levels "Bernard Katz",..: 3 8 17 1 13 15 7 14 2 16 ...
##  $ Born: Factor w/ 21 levels "August 8, 1902",..: 5 9 2 12 1 14 6 10 16 3 ...
##  $ Died: Factor w/ 21 levels "April 16, 1972",..: 21 8 20 2 19 6 5 16 7 4 ...

#查看data類型:class()、變項名稱:name()為何

class(dta)
## [1] "data.frame"
names(dta)
## [1] "ID"   "Born" "Died"
head(dta)
##                 ID              Born               Died
## 1   David H. Hubel February 27, 1926 September 22, 2013
## 2 Herbert A. Simon     June 15, 1916   February 9, 2001
## 3  Ronald H. Coase December 29, 1910  September 2, 2013
## 4     Bernard Katz    March 26, 1911     April 20, 2003
## 5     P.A.M. Dirac    August 8, 1902   October 20, 1984
## 6  Richard Feynman      May 11, 1918  February 15, 1988
library(dplyr)
library(ggplot2)
library(ggalt)
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
library(tidyr)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(tidyverse)
library(lattice)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(lubridate)
  • 參考凱揚同學

    語法關聯magrittr→mutate,lubridate→mdy,ggalt、ggplot2→geom_dumbbell

    dta <- read.table(“nobel_lspan.txt”, h=T) %>%

    mutate(born=mdy(Born),died=mdy(Died),span=died %–% born)

dta$born <- mdy(dta$Born)
dta$died <- mdy(dta$Died)
dta$span <- dta$died %--% dta$born 
head(dta)
##                 ID              Born               Died       born       died
## 1   David H. Hubel February 27, 1926 September 22, 2013 1926-02-27 2013-09-22
## 2 Herbert A. Simon     June 15, 1916   February 9, 2001 1916-06-15 2001-02-09
## 3  Ronald H. Coase December 29, 1910  September 2, 2013 1910-12-29 2013-09-02
## 4     Bernard Katz    March 26, 1911     April 20, 2003 1911-03-26 2003-04-20
## 5     P.A.M. Dirac    August 8, 1902   October 20, 1984 1902-08-08 1984-10-20
## 6  Richard Feynman      May 11, 1918  February 15, 1988 1918-05-11 1988-02-15
##                             span
## 1 2013-09-22 UTC--1926-02-27 UTC
## 2 2001-02-09 UTC--1916-06-15 UTC
## 3 2013-09-02 UTC--1910-12-29 UTC
## 4 2003-04-20 UTC--1911-03-26 UTC
## 5 1984-10-20 UTC--1902-08-08 UTC
## 6 1988-02-15 UTC--1918-05-11 UTC
ggplot(dta, aes(reorder(ID, died),  x = born, xend = died)) +
 geom_dumbbell(size=rel(1.1), 
               colour="gold", 
               colour_x="goldenrod",
               colour_xend="goldenrod", 
               dot_guide=TRUE, 
               dot_guide_size=0.2) +
 labs(x="Year", y=NULL) + 
 theme_ipsum()
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

dta$age <- as.numeric(difftime(dta$died, dta$born, unit="days")) / 365    #difftime函式——計算時差   #as.numeric() 轉換成數值
mean(dta$age)
## [1] 82.39517
  • 學習from銘姿同學
#平均壽命
dta%>% summarise(meanlife=mean((died - born)/365))
##        meanlife
## 1 82.39517 days
  • 學習from唐榮同學

exercises 3

  • Use the following sample of records for profit made, arrival date, and departure date of group travel booked at a travel agency in Taiwan to estimate the mean profit per day of service.
dta <- read.csv("/Users/User/Desktop/DM_R/hk0518/Visit_TW.csv", header =TRUE )
str(dta)
## 'data.frame':    96 obs. of  3 variables:
##  $ Expense : int  15393 27616 8876 57378 32613 46998 10744 3269 16195 55842 ...
##  $ Arrival : Factor w/ 83 levels "2014/10/10","2014/10/13",..: 79 83 78 74 75 31 31 26 29 77 ...
##  $ Depature: Factor w/ 79 levels "2014/10/1","2014/10/13",..: 73 77 73 76 75 32 69 26 28 73 ...

#查看data類型:class()、變項名稱:name()、前六項資料:head()為何

class(dta)
## [1] "data.frame"
names(dta)
## [1] "Expense"  "Arrival"  "Depature"
head(dta)
##   Expense    Arrival   Depature
## 1   15393  2015/2/16  2015/2/17
## 2   27616   2015/3/6  2015/3/11
## 3    8876  2015/2/14  2015/2/17
## 4   57378  2015/1/30   2015/2/9
## 5   32613  2015/1/31   2015/2/6
## 6   46998 2014/12/27 2014/12/31
dta$Arrival <- as.Date(dta$Arrival)
dta$Depature <- as.Date(dta$Depature)
dta$duration <- as.numeric(difftime(dta$Depature, dta$Arrival, unit="days"))+1
str(dta)
## 'data.frame':    96 obs. of  4 variables:
##  $ Expense : int  15393 27616 8876 57378 32613 46998 10744 3269 16195 55842 ...
##  $ Arrival : Date, format: "2015-02-16" "2015-03-06" ...
##  $ Depature: Date, format: "2015-02-17" "2015-03-11" ...
##  $ duration: num  2 6 4 11 7 5 9 6 4 7 ...
profit <- data.frame(All_profit <- sum(dta$Expense), 
                     Total_stay_duration <- sum(dta$duration), 
                     Mean_profit <- All_profit/Total_stay_duration)
names(profit) <- c("All profit", "Total stay duration", "Mean profit")

knitr::kable(profit)
All profit Total stay duration Mean profit
3722133 674 5522.453
  • 學習from銘姿同學

Maps

library(leaflet)
m <- leaflet() %>%
 addTiles() %>%  
 addMarkers(lng=120.219722,lat=22.996667,popup="NCKU") %>%  
 addMarkers(lng=120.223021,lat=23.005941,popup="廣越美食")%>% 
 addMarkers(lng=120.220822,lat=22.992697,popup="肉肉控")%>% 
 addMarkers(lng=120.221600,lat=22.993272,popup="布萊恩紅茶")
m

#使用function重複執行,標示出三家店的所在位置

m2 <- function(x,y,z){
  w <- addMarkers(addTiles(leaflet()),lng=x,lat=y,popup=z)
  return(w)
}
m2 (c(120.217964,120.217990,120.217995),c(22.994571,22.994828,22.995790),c("Subway","Louisa Coffee","cama cafe"))
  • 學習from主揚同學