db_TH <- as.Date("2018/9/1")
de_TH <- as.Date("2020/5/18")
as.numeric(difftime(de_TH, db_TH, unit="days"))
## [1] 625
Sys.Date() - as.Date('2018-09-01')
## Time difference of 632 days
#參考傑恩同學
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
startDate <- dmy("1-September-2018")
endDate <- dmy("18-May-2020")
endDate - startDate
## Time difference of 625 days
as.period(startDate %--% endDate)
## [1] "1y 8m 17d 0H 0M 0S"
#資料匯入、檢視資料
dta <- read.table("/Users/User/Desktop/DM_R/hk0518/calls_nyc.csv", sep="," , header=T , stringsAsFactor=F, fill=T )
dim(dta)
## [1] 24 2
str(dta)
## 'data.frame': 24 obs. of 2 variables:
## $ Hour : num 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 ...
## $ Calls: int 1080 910 770 780 380 390 200 300 275 395 ...
#查看data類型:class()、變項名稱:name()為何
class(dta)
## [1] "data.frame"
names(dta)
## [1] "Hour" "Calls"
View(dta)
後續過程尚未完成
db_TH <- as.Date("2000/5/24")
dd_TH <- as.Date("2100/5/24")
bDays <- seq(from=db_TH, to=dd_TH, by="years")
data <-data.frame(table(weekdays(bDays)))
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------ tidyverse 1.3.0 --
## √ ggplot2 3.3.0 √ purrr 0.3.3
## √ tibble 2.1.3 √ dplyr 0.8.5
## √ tidyr 1.0.2 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## -- Conflicts --------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x lubridate::setdiff() masks base::setdiff()
## x lubridate::union() masks base::union()
weekdays(bDays)
## [1] "星期三" "星期四" "星期五" "星期六" "星期一" "星期二" "星期三" "星期四"
## [9] "星期六" "星期日" "星期一" "星期二" "星期四" "星期五" "星期六" "星期日"
## [17] "星期二" "星期三" "星期四" "星期五" "星期日" "星期一" "星期二" "星期三"
## [25] "星期五" "星期六" "星期日" "星期一" "星期三" "星期四" "星期五" "星期六"
## [33] "星期一" "星期二" "星期三" "星期四" "星期六" "星期日" "星期一" "星期二"
## [41] "星期四" "星期五" "星期六" "星期日" "星期二" "星期三" "星期四" "星期五"
## [49] "星期日" "星期一" "星期二" "星期三" "星期五" "星期六" "星期日" "星期一"
## [57] "星期三" "星期四" "星期五" "星期六" "星期一" "星期二" "星期三" "星期四"
## [65] "星期六" "星期日" "星期一" "星期二" "星期四" "星期五" "星期六" "星期日"
## [73] "星期二" "星期三" "星期四" "星期五" "星期日" "星期一" "星期二" "星期三"
## [81] "星期五" "星期六" "星期日" "星期一" "星期三" "星期四" "星期五" "星期六"
## [89] "星期一" "星期二" "星期三" "星期四" "星期六" "星期日" "星期一" "星期二"
## [97] "星期四" "星期五" "星期六" "星期日" "星期一"
names(data) <- c("weekdays", "Freq")
# plot
ggplot(data, aes(x=weekdays, y=Freq)) +
geom_segment( aes(x=weekdays, xend=weekdays, y=0, yend=Freq)) +
geom_point( size=2, color="green", fill=alpha("blue", 0.3), alpha=0.7, shape=21, stroke=4)
dta <- read.table("/Users/User/Desktop/DM_R/hk0518/birth_college.txt", header=T , stringsAsFactor=F, fill=T )
dim(dta)
## [1] 29 2
str(dta)
## 'data.frame': 29 obs. of 2 variables:
## $ Birth : num 23 22.1 20.6 19.6 18 15.9 16 17.2 15.7 16.6 ...
## $ Entrance: int NA NA NA NA NA NA NA NA NA NA ...
#查看data類型:class()、變項名稱:name()為何
class(dta)
## [1] "data.frame"
names(dta)
## [1] "Birth" "Entrance"
head(dta)
## Birth Entrance
## 1 23.0 NA
## 2 22.1 NA
## 3 20.6 NA
## 4 19.6 NA
## 5 18.0 NA
## 6 15.9 NA
dta$year <- c(1981:2009)
head(dta)
## Birth Entrance year
## 1 23.0 NA 1981
## 2 22.1 NA 1982
## 3 20.6 NA 1983
## 4 19.6 NA 1984
## 5 18.0 NA 1985
## 6 15.9 NA 1986
par(mar = c(5, 5, 2, 5))
plot(dta$year, dta$Birth, ylim = c(0, 60),
xlab = "Year", ylab = "Birth rate (0.1%)")
par(new = TRUE)
plot(dta$year, dta$Entrance, ylim = c(40, 100), pch = 16,xaxt = "n", yaxt = "n", xlab = "", ylab = "")
# xaxt = "n", yaxt = "n" 用來移除其預設在左側的y軸座標
axis(4, ylim = c(40,100)) # 加上一 y 軸,2 是左邊, 4 是右邊
mtext("Acceptance rate (%)", side = 4, line = 3) # line = 3 用來調整右側 y 軸名稱的位置,數字愈小愈靠近座標軸
legend("topleft", c("Birth", "College"), pch = c(1, 16)) # 圖例
grid(nx = 30, ny = 0, col = "lightgray", lty = "dotted") # 背景格線
dta <- read.csv("/Users/User/Desktop/DM_R/hk0518/nz_visitors.csv", header = TRUE)
dim(dta)
## [1] 170 10
str(dta)
## 'data.frame': 170 obs. of 10 variables:
## $ Month : Factor w/ 170 levels "1998M09","1998M10",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Australia: int 17244 18090 16750 25909 27228 19461 19200 19595 12564 10987 ...
## $ ChinaPRof: int 748 941 1054 1270 1375 1660 1456 1488 1449 1413 ...
## $ Japan : int 6093 5039 6112 6670 6008 7478 7341 5030 4275 3758 ...
## $ Korea : int 979 1083 1144 1836 2716 2245 1611 1416 1192 1255 ...
## $ Germany : int 1320 2459 5195 5499 6430 7320 6094 3157 1450 765 ...
## $ UK : int 5794 7876 13362 20238 22557 27477 20187 13448 8587 7271 ...
## $ Canada : int 973 1418 2236 2935 3623 4394 3573 1795 1160 903 ...
## $ USA : int 3837 6093 8468 7865 10007 12533 10519 6737 4932 4845 ...
## $ Total : int 57930 68203 84370 113853 122130 124305 104106 83414 59800 52426 ...
#查看data類型:class()、變項名稱:name()為何
class(dta)
## [1] "data.frame"
names(dta)
## [1] "Month" "Australia" "ChinaPRof" "Japan" "Korea" "Germany"
## [7] "UK" "Canada" "USA" "Total"
head(dta)
## Month Australia ChinaPRof Japan Korea Germany UK Canada USA Total
## 1 1998M09 17244 748 6093 979 1320 5794 973 3837 57930
## 2 1998M10 18090 941 5039 1083 2459 7876 1418 6093 68203
## 3 1998M11 16750 1054 6112 1144 5195 13362 2236 8468 84370
## 4 1998M12 25909 1270 6670 1836 5499 20238 2935 7865 113853
## 5 1999M01 27228 1375 6008 2716 6430 22557 3623 10007 122130
## 6 1999M02 19461 1660 7478 2245 7320 27477 4394 12533 124305
dta <- dta %>%
dplyr::mutate(Year = as.integer(substr(Month, 1, 4)),
Month = as.integer(substr(Month, 6, 7)),
Season = (Month - 1) %/% 3 + 1)
head(dta)
## Month Australia ChinaPRof Japan Korea Germany UK Canada USA Total Year
## 1 9 17244 748 6093 979 1320 5794 973 3837 57930 1998
## 2 10 18090 941 5039 1083 2459 7876 1418 6093 68203 1998
## 3 11 16750 1054 6112 1144 5195 13362 2236 8468 84370 1998
## 4 12 25909 1270 6670 1836 5499 20238 2935 7865 113853 1998
## 5 1 27228 1375 6008 2716 6430 22557 3623 10007 122130 1999
## 6 2 19461 1660 7478 2245 7320 27477 4394 12533 124305 1999
## Season
## 1 3
## 2 4
## 3 4
## 4 4
## 5 1
## 6 1
dta <- dta %>%
tidyr::gather(key = "Country", value = "Visitors", 2:9) #gather有待明白~~~!!?
head(dta)
## Month Total Year Season Country Visitors
## 1 9 57930 1998 3 Australia 17244
## 2 10 68203 1998 4 Australia 18090
## 3 11 84370 1998 4 Australia 16750
## 4 12 113853 1998 4 Australia 25909
## 5 1 122130 1999 1 Australia 27228
## 6 2 124305 1999 1 Australia 19461
dta <- dta %>%
dplyr::group_by(Country, Month) %>%
dplyr::summarize(Visitor = mean(Visitors))
head(dta)
## # A tibble: 6 x 3
## # Groups: Country [1]
## Country Month Visitor
## <chr> <int> <dbl>
## 1 Australia 1 44729.
## 2 Australia 2 30123.
## 3 Australia 3 29966.
## 4 Australia 4 29149.
## 5 Australia 5 18887.
## 6 Australia 6 17624.
ggplot()+
# 春天
geom_rect(data=data.frame(xstart=2.5, xend=5.5),
aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf),
fill='pink',
alpha=.2) +
# 夏天
geom_rect(data=data.frame(xstart=5.5, xend=8.5),
aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf),
fill='green',
alpha=.2) +
# 秋天
geom_rect(data=data.frame(xstart=8.5, xend=11.5),
aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf),
fill='orange',
alpha=.2) +
geom_line(data = dta, aes(x = Month, y = Visitor, color = Country))+
scale_x_continuous(limits = c(1, 12), breaks = c(1, 4, 8, 12))+
theme_classic()
dta <- read.table("/Users/User/Desktop/DM_R/hk0518/nobel_lspan.txt", header = TRUE)
str(dta)
## 'data.frame': 21 obs. of 3 variables:
## $ ID : Factor w/ 21 levels "Bernard Katz",..: 3 8 17 1 13 15 7 14 2 16 ...
## $ Born: Factor w/ 21 levels "August 8, 1902",..: 5 9 2 12 1 14 6 10 16 3 ...
## $ Died: Factor w/ 21 levels "April 16, 1972",..: 21 8 20 2 19 6 5 16 7 4 ...
#查看data類型:class()、變項名稱:name()為何
class(dta)
## [1] "data.frame"
names(dta)
## [1] "ID" "Born" "Died"
head(dta)
## ID Born Died
## 1 David H. Hubel February 27, 1926 September 22, 2013
## 2 Herbert A. Simon June 15, 1916 February 9, 2001
## 3 Ronald H. Coase December 29, 1910 September 2, 2013
## 4 Bernard Katz March 26, 1911 April 20, 2003
## 5 P.A.M. Dirac August 8, 1902 October 20, 1984
## 6 Richard Feynman May 11, 1918 February 15, 1988
library(dplyr)
library(ggplot2)
library(ggalt)
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library(tidyr)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(tidyverse)
library(lattice)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(lubridate)
參考凱揚同學
語法關聯magrittr→mutate,lubridate→mdy,ggalt、ggplot2→geom_dumbbell
dta <- read.table(“nobel_lspan.txt”, h=T) %>%
mutate(born=mdy(Born),died=mdy(Died),span=died %–% born)
dta$born <- mdy(dta$Born)
dta$died <- mdy(dta$Died)
dta$span <- dta$died %--% dta$born
head(dta)
## ID Born Died born died
## 1 David H. Hubel February 27, 1926 September 22, 2013 1926-02-27 2013-09-22
## 2 Herbert A. Simon June 15, 1916 February 9, 2001 1916-06-15 2001-02-09
## 3 Ronald H. Coase December 29, 1910 September 2, 2013 1910-12-29 2013-09-02
## 4 Bernard Katz March 26, 1911 April 20, 2003 1911-03-26 2003-04-20
## 5 P.A.M. Dirac August 8, 1902 October 20, 1984 1902-08-08 1984-10-20
## 6 Richard Feynman May 11, 1918 February 15, 1988 1918-05-11 1988-02-15
## span
## 1 2013-09-22 UTC--1926-02-27 UTC
## 2 2001-02-09 UTC--1916-06-15 UTC
## 3 2013-09-02 UTC--1910-12-29 UTC
## 4 2003-04-20 UTC--1911-03-26 UTC
## 5 1984-10-20 UTC--1902-08-08 UTC
## 6 1988-02-15 UTC--1918-05-11 UTC
ggplot(dta, aes(reorder(ID, died), x = born, xend = died)) +
geom_dumbbell(size=rel(1.1),
colour="gold",
colour_x="goldenrod",
colour_xend="goldenrod",
dot_guide=TRUE,
dot_guide_size=0.2) +
labs(x="Year", y=NULL) +
theme_ipsum()
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
dta$age <- as.numeric(difftime(dta$died, dta$born, unit="days")) / 365 #difftime函式——計算時差 #as.numeric() 轉換成數值
mean(dta$age)
## [1] 82.39517
#平均壽命
dta%>% summarise(meanlife=mean((died - born)/365))
## meanlife
## 1 82.39517 days
dta <- read.csv("/Users/User/Desktop/DM_R/hk0518/Visit_TW.csv", header =TRUE )
str(dta)
## 'data.frame': 96 obs. of 3 variables:
## $ Expense : int 15393 27616 8876 57378 32613 46998 10744 3269 16195 55842 ...
## $ Arrival : Factor w/ 83 levels "2014/10/10","2014/10/13",..: 79 83 78 74 75 31 31 26 29 77 ...
## $ Depature: Factor w/ 79 levels "2014/10/1","2014/10/13",..: 73 77 73 76 75 32 69 26 28 73 ...
#查看data類型:class()、變項名稱:name()、前六項資料:head()為何
class(dta)
## [1] "data.frame"
names(dta)
## [1] "Expense" "Arrival" "Depature"
head(dta)
## Expense Arrival Depature
## 1 15393 2015/2/16 2015/2/17
## 2 27616 2015/3/6 2015/3/11
## 3 8876 2015/2/14 2015/2/17
## 4 57378 2015/1/30 2015/2/9
## 5 32613 2015/1/31 2015/2/6
## 6 46998 2014/12/27 2014/12/31
dta$Arrival <- as.Date(dta$Arrival)
dta$Depature <- as.Date(dta$Depature)
dta$duration <- as.numeric(difftime(dta$Depature, dta$Arrival, unit="days"))+1
str(dta)
## 'data.frame': 96 obs. of 4 variables:
## $ Expense : int 15393 27616 8876 57378 32613 46998 10744 3269 16195 55842 ...
## $ Arrival : Date, format: "2015-02-16" "2015-03-06" ...
## $ Depature: Date, format: "2015-02-17" "2015-03-11" ...
## $ duration: num 2 6 4 11 7 5 9 6 4 7 ...
profit <- data.frame(All_profit <- sum(dta$Expense),
Total_stay_duration <- sum(dta$duration),
Mean_profit <- All_profit/Total_stay_duration)
names(profit) <- c("All profit", "Total stay duration", "Mean profit")
knitr::kable(profit)
| All profit | Total stay duration | Mean profit |
|---|---|---|
| 3722133 | 674 | 5522.453 |
library(leaflet)
m <- leaflet() %>%
addTiles() %>%
addMarkers(lng=120.219722,lat=22.996667,popup="NCKU") %>%
addMarkers(lng=120.223021,lat=23.005941,popup="廣越美食")%>%
addMarkers(lng=120.220822,lat=22.992697,popup="肉肉控")%>%
addMarkers(lng=120.221600,lat=22.993272,popup="布萊恩紅茶")
m
#使用function重複執行,標示出三家店的所在位置
m2 <- function(x,y,z){
w <- addMarkers(addTiles(leaflet()),lng=x,lat=y,popup=z)
return(w)
}
m2 (c(120.217964,120.217990,120.217995),c(22.994571,22.994828,22.995790),c("Subway","Louisa Coffee","cama cafe"))