Demo20180321

計算新聞中各個詞出現的次數

f <- file('https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/disease.txt')
article <- readLines(f)

## Warning in readLines(f): 於 'https://raw.githubusercontent.com/ywchiu/
## cdc_course/master/data/disease.txt' 找到不完整的最後一列

close(f)

class(article)

## [1] "character"

str(article)

##  chr [1:33] "Scientists put on alert for deadly new pathogen - 'Disease X'" ...

?strsplit

## starting httpd help server ... done

article.split <- strsplit(tolower(article), " |,|'|\\.")
class(article.split)

## [1] "list"

?unlist
article.vec <- unlist(article.split)

?table
tb <- table(article.vec)


head(sort(tb, decreasing = TRUE))

## article.vec
##     the and   a  of  to 
##  33  33  24  21  20  15

stopwords <- c("a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also","although","always","am","among", "amongst", "amoungst", "amount",  "an", "and", "another", "any","anyhow","anyone","anything","anyway", "anywhere", "are", "around", "as",  "at", "back","be","became", "because","become","becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom","but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven","else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own","part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thickv", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "the","-")


tb2 <- tb[! names(tb) %in% stopwords]
sort(tb2, decreasing = TRUE)

## article.vec
##                         disease               x             new 
##              33              10               8               6 
##            said          health          likely          deadly 
##               6               5               5               4 
##        diseases        pathogen       rottingen        creation 
##               4               4               4               3 
##           ebola          emerge        epidemic           human 
##               3               3               3               3 
##            list              mr            risk      scientists 
##               3               3               3               3 
##           virus           world            year        zoonotic 
##               3               3               3               3 
##               ”         adviser           alert         animals 
##               2               2               2               2 
##       currently         experts          humans   international 
##               2               2               2               2 
##          killed           major            make    organisation 
##               2               2               2               2 
##          people            pose         prepare          public 
##               2               2               2               2 
##        research      scientific          spread            time 
##               2               2               2               2 
##             use         viruses               "           [who] 
##               2               2               1               1 
##           ‘plug             ‘x’             “as        “disease 
##               1               1               1               1 
##        “history             “it           “it’s      “synthetic 
##               1               1               1               1 
##            “the          “these             000        100-year 
##               1               1               1               1 
##              11           1980s            2009            2013 
##               1               1               1               1 
##            2016              35        accident             act 
##               1               1               1               1 
##           added          added:          adding        advances 
##               1               1               1               1 
##          africa           allow          allows          animal 
##               1               1               1               1 
##       appearing           aware         before”        believed 
##               1               1               1               1 
##             big      biological         biology          brazen 
##               1               1               1               1 
##          broken        bushmeat          canada            case 
##               1               1               1               1 
##           cause          caused          center          change 
##               1               1               1               1 
##         charged        chemical           chief     chimpanzees 
##               1               1               1               1 
##           close       colorized            come       committee 
##               1               1               1               1 
##        confined         contact        convenes         council 
##               1               1               1               1 
## countermeasures          create      department       depiction 
##               1               1               1               1 
##       developed     development        develops      diagnostic 
##               1               1               1               1 
##    diagnostics”             did       digitally        disease” 
##               1               1               1               1 
##       diseases;           early          eating       ecosystem 
##               1               1               1               1 
##         editing        electron       emergency        entirely 
##               1               1               1               1 
##         erasmus       executive             far           fast” 
##               1               1               1               1 
##           fears           fever     filamentous        flexibly 
##               1               1               1               1 
##             flu           fmake       frequency             gas 
##               1               1               1               1 
##            gene    geneva-based           globe         greater 
##               1               1               1               1 
##        greatest            h1n1        habitats           havoc 
##               1               1               1               1 
##            head      heightened      high-level             hiv 
##               1               1               1               1 
##        horsepox         humans”         include       including 
##               1               1               1               1 
##      infections       intensity       john-arne          jumped 
##               1               1               1               1 
##         jumping           jumps            just         killers 
##               1               1               1               1 
##       knowledge           known        koopmans      laboratory 
##               1               1               1               1 
##           lassa           makes        man-made    manipulation 
##               1               1               1               1 
##          marion            mean           means         medical 
##               1               1               1               1 
##         meeting      micrograph         million          modern 
##               1               1               1               1 
##      monitoring         mystery         natural       naturally 
##               1               1               1               1 
##          needed           nerve         nigeria           ninth 
##               1               1               1               1 
##          norway          number        occuring        outbreak 
##               1               1               1               1 
##        pandemic           panel            past            plan 
##               1               1               1               1 
##       platforms           play’           point      population 
##               1               1               1               1 
##        possible     potentially        previous        probably 
##               1               1               1               1 
##         process       professor           rapid        relative 
##               1               1               1               1 
##        renegade      represents      resistance           risks 
##               1               1               1               1 
##       rotterdam    safeguarding       salisbury             say 
##               1               1               1               1 
##        scanning            seen          senior           shows 
##               1               1               1               1 
##        smallpox         sources         sparked        sparking 
##               1               1               1               1 
##           speed         spreads       statement         strange 
##               1               1               1               1 
##          strike            sure    surveillance        sweeping 
##               1               1               1               1 
##           swine         systems           taboo      technology 
##               1               1               1               1 
##           tells           terms          terror           tests 
##               1               1               1               1 
##           trade          travel      underlines         unknown 
##               1               1               1               1 
##        vaccines         variety     viroscience           vital 
##               1               1               1               1 
##            want         warned:           watch             way 
##               1               1               1               1 
##         weapons          week’s            west            wide 
##               1               1               1               1 
##            work         workers         wreaked           years 
##               1               1               1               1 
##        zoonosis 
##               1

a <- 'HAHAHA'
tolower(a)

## [1] "hahaha"

library(wordcloud2)

## Warning: package 'wordcloud2' was built under R version 3.4.4

?wordcloud2


wordcloud2(tb2, shape = 'star')

wordcount <-  function(article){
  article.split <- strsplit(tolower(article), " |,|'|\\.")
  article.vec <- unlist(article.split)
  tb <- table(article.vec)
  tb2 <- tb[! names(tb) %in% stopwords]
  return(tb2)
}

#wordcount
#wordcount(article)

wordcloud2(wordcount(article), shape = 'star')

wordcount2 <-  function(url){
  f <- file(url)
  article <- readLines(f)
  close(f)  
  article.split <- strsplit(tolower(article), " |,|'|\\.")
  article.vec <- unlist(article.split)
  tb <- table(article.vec)
  tb2 <- tb[! names(tb) %in% stopwords]
  return(tb2)
}

res <- wordcount2('https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/cnn.txt')

## Warning in readLines(f): 於 'https://raw.githubusercontent.com/ywchiu/
## cdc_course/master/data/cnn.txt' 找到不完整的最後一列

#sort(res, decreasing = TRUE)
wordcloud2(res, shape='star')

#source('add.R')

Apply Function

x <- list(c(1,2,3,4), c(5,6,7))
x

## [[1]]
## [1] 1 2 3 4
## 
## [[2]]
## [1] 5 6 7

sum(x[[1]])

## [1] 10

sum(x[[2]])

## [1] 18

# method 1
ary <- c()
for(ele in x){
  #print(ele)
  #print(sum(ele))
  ary <- c(ary, sum(ele))
}
ary

## [1] 10 18

# metohd 2
ary2 <- c()
for( i in seq_along(x)){
  ary2 <- c(ary2 , sum(x[[i]]))
}
ary2

## [1] 10 18

# method 3
ary3 <- c(0,0)
for( i in seq_along(x)){
  ary3[i] <- sum(x[[i]])
}
ary3

## [1] 10 18

# method 4
lapply(x, sum)

## [[1]]
## [1] 10
## 
## [[2]]
## [1] 18

# method comparison
# method 4 > method 3 > method 2 = method 1


m1 <- matrix(1:4, byrow=TRUE, nrow=2)
m2 <- matrix(5:8, byrow=TRUE, nrow=2)
#m2
li <- list(m1, m2)
lapply(li, sum)

## [[1]]
## [1] 10
## 
## [[2]]
## [1] 26

lapply(li, mean)

## [[1]]
## [1] 2.5
## 
## [[2]]
## [1] 6.5

m1[1,]

## [1] 1 2

getFirstRow <- function(m){
  return(m[1,])
}


lapply(li, getFirstRow)

## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 5 6

lapply(li, function(m) m[1,])

## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 5 6

x <- list(c(1,2,3,4),c(5,6,7,8))

lapply(x, sum)

## [[1]]
## [1] 10
## 
## [[2]]
## [1] 26

unlist(lapply(x, sum))

## [1] 10 26

# sapply : s => Simplified
sapply(x, sum)

## [1] 10 26

m1 <- matrix(1:4, byrow=TRUE, nrow=2)
m2 <- matrix(5:8, byrow=TRUE, nrow=2)

li <- list(m1,m2)
lapply(li, function(m) m[1,])

## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 5 6

sapply(li, function(m) m[1,])

##      [,1] [,2]
## [1,]    1    5
## [2,]    2    6

sapply(li, mean)

## [1] 2.5 6.5

m <- matrix(1:4, byrow=TRUE, nrow=2)
rowSums(m)

## [1] 3 7

colSums(m)

## [1] 4 6

apply(m, 1, sum)

## [1] 3 7

apply(m, 2, sum)

## [1] 4 6

apply(m, 1, mean)

## [1] 1.5 3.5

apply(m, 2, mean)

## [1] 2 3

# tapply
x <- c(80,70,59,88,72,57)
t <- c(1,1,2,1,1,2)
tapply(x, t, mean)

##    1    2 
## 77.5 58.0

data(iris)
tapply(iris$Sepal.Length, iris$Species, mean)

##     setosa versicolor  virginica 
##      5.006      5.936      6.588

## read dengue dataset

library(readr)

## Warning: package 'readr' was built under R version 3.4.4

Dengue <- read_csv("https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/Dengue.csv")

## `curl` package not installed, falling back to using `url()`

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_integer(),
##   發病年 = col_integer(),
##   居住縣市 = col_character(),
##   性別 = col_character(),
##   是否境外移入 = col_character(),
##   感染國家 = col_character(),
##   病例數 = col_integer()
## )

View(Dengue)

stat <- tapply(Dengue$病例數 , Dengue$感染國家, sum)

barplot(sort(stat, decreasing = TRUE))

sort(stat, decreasing = TRUE)

##       中華民國           None           印尼           越南         菲律賓 
##          72521           2823            796            649            515 
##           泰國       馬來西亞           緬甸         柬埔寨         新加坡 
##            406            335            156            143             98 
##           印度       中國大陸         孟加拉       馬爾地夫       斯里蘭卡 
##             75             43             25             19              9 
##           寮國           未知           巴西 巴布亞紐幾內亞           東加 
##              9              7              5              4              4 
##     索羅門群島       澳大利亞         厄瓜多       巴基斯坦       帛琉群島 
##              3              3              2              2              2 
## 法屬玻里尼西亞     美屬薩摩亞         墨西哥           諾魯           日本 
##              2              2              2              2              1 
##         尼泊爾         吐瓦魯       多明尼加       宏都拉斯   沙烏地阿拉伯 
##              1              1              1              1              1 
##         貝里斯           肯亞           南非     哥斯大黎加           秘魯 
##              1              1              1              1              1 
##     馬紹爾群島     馬達加斯加       斐濟群島         聖文森       聖露西亞 
##              1              1              1              1              1 
##       薩爾瓦多 
##              1

stat2 <- tapply(Dengue$病例數 , Dengue$發病年, sum)
sort(stat2, decreasing = TRUE)

##  2015  2014  2002  2007  2010  2011  2012  2006  2009  2013  2016  2008 
## 43784 15732  5388  2179  1896  1702  1478  1074  1052   860   744   714 
##  2004  1998  2017  2005  2001  2003  2000  1999  2018 
##   427   344   343   306   281   145   139    68    23

plot(x = names(stat2 ), y = stat2, type= 'o')

stat3 <- tapply(Dengue$病例數 , Dengue$性別, sum)
pie(stat3)

DPLYR

#install.packages('dplyr')
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

help(package = 'dplyr')


library(readr)
Dengue <- read_csv("https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/Dengue.csv")

## `curl` package not installed, falling back to using `url()`

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_integer(),
##   發病年 = col_integer(),
##   居住縣市 = col_character(),
##   性別 = col_character(),
##   是否境外移入 = col_character(),
##   感染國家 = col_character(),
##   病例數 = col_integer()
## )

# R Method
Dengue[Dengue$感染國家 == '中華民國'    ,  ]

## # A tibble: 256 x 7
##       X1 發病年 居住縣市 性別  是否境外移入 感染國家 病例數
##    <int>  <int> <chr>    <chr> <chr>        <chr>     <int>
##  1     1   2001 台中市   女    否           中華民國      1
##  2     2   2002 台中市   女    否           中華民國      3
##  3     3   2004 台中市   女    否           中華民國      1
##  4     4   2011 台中市   女    否           中華民國      3
##  5     5   2013 台中市   女    否           中華民國      1
##  6     6   2014 台中市   女    否           中華民國      8
##  7     7   2015 台中市   女    否           中華民國     35
##  8     8   2016 台中市   女    否           中華民國      1
##  9    78   2001 台中市   男    否           中華民國      1
## 10    79   2002 台中市   男    否           中華民國      3
## # ... with 246 more rows

# DplyR Method
filter(Dengue, 感染國家 == '中華民國')

## Warning: package 'bindrcpp' was built under R version 3.4.4

## # A tibble: 256 x 7
##       X1 發病年 居住縣市 性別  是否境外移入 感染國家 病例數
##    <int>  <int> <chr>    <chr> <chr>        <chr>     <int>
##  1     1   2001 台中市   女    否           中華民國      1
##  2     2   2002 台中市   女    否           中華民國      3
##  3     3   2004 台中市   女    否           中華民國      1
##  4     4   2011 台中市   女    否           中華民國      3
##  5     5   2013 台中市   女    否           中華民國      1
##  6     6   2014 台中市   女    否           中華民國      8
##  7     7   2015 台中市   女    否           中華民國     35
##  8     8   2016 台中市   女    否           中華民國      1
##  9    78   2001 台中市   男    否           中華民國      1
## 10    79   2002 台中市   男    否           中華民國      3
## # ... with 246 more rows

# R Method
Dengue[Dengue$性別 == '男' & Dengue$病例數 >= 100    ,  ]

## # A tibble: 21 x 7
##       X1 發病年 居住縣市 性別  是否境外移入 感染國家 病例數
##    <int>  <int> <chr>    <chr> <chr>        <chr>     <int>
##  1   562   2007 台南市   男    否           中華民國    933
##  2   565   2010 台南市   男    否           中華民國    227
##  3   567   2012 台南市   男    否           中華民國    382
##  4   570   2015 台南市   男    否           中華民國  11299
##  5   829   2002 屏東縣   男    否           None        172
##  6   833   2004 屏東縣   男    否           中華民國    135
##  7   841   2013 屏東縣   男    否           中華民國    229
##  8   842   2014 屏東縣   男    否           中華民國    124
##  9   843   2015 屏東縣   男    否           中華民國    210
## 10  1248   2002 高雄市   男    否           None        841
## # ... with 11 more rows

# DPLYR
filter(Dengue, 性別=='男' & 病例數 >= 100)

## # A tibble: 21 x 7
##       X1 發病年 居住縣市 性別  是否境外移入 感染國家 病例數
##    <int>  <int> <chr>    <chr> <chr>        <chr>     <int>
##  1   562   2007 台南市   男    否           中華民國    933
##  2   565   2010 台南市   男    否           中華民國    227
##  3   567   2012 台南市   男    否           中華民國    382
##  4   570   2015 台南市   男    否           中華民國  11299
##  5   829   2002 屏東縣   男    否           None        172
##  6   833   2004 屏東縣   男    否           中華民國    135
##  7   841   2013 屏東縣   男    否           中華民國    229
##  8   842   2014 屏東縣   男    否           中華民國    124
##  9   843   2015 屏東縣   男    否           中華民國    210
## 10  1248   2002 高雄市   男    否           None        841
## # ... with 11 more rows

# R Method
Dengue[Dengue$性別 == '男' | Dengue$病例數 >= 100    ,  ]

## # A tibble: 1,153 x 7
##       X1 發病年 居住縣市 性別  是否境外移入 感染國家 病例數
##    <int>  <int> <chr>    <chr> <chr>        <chr>     <int>
##  1    77   1998 台中市   男    否           None          4
##  2    78   2001 台中市   男    否           中華民國      1
##  3    79   2002 台中市   男    否           中華民國      3
##  4    80   2004 台中市   男    否           中華民國      1
##  5    81   2006 台中市   男    否           中華民國      1
##  6    82   2007 台中市   男    否           中華民國      2
##  7    83   2011 台中市   男    否           中華民國      1
##  8    84   2014 台中市   男    否           中華民國      7
##  9    85   2015 台中市   男    否           中華民國     42
## 10    86   1998 台中市   男    是           None          1
## # ... with 1,143 more rows

# DPLYR
filter(Dengue, 性別=='男' | 病例數 >= 100)

## # A tibble: 1,153 x 7
##       X1 發病年 居住縣市 性別  是否境外移入 感染國家 病例數
##    <int>  <int> <chr>    <chr> <chr>        <chr>     <int>
##  1    77   1998 台中市   男    否           None          4
##  2    78   2001 台中市   男    否           中華民國      1
##  3    79   2002 台中市   男    否           中華民國      3
##  4    80   2004 台中市   男    否           中華民國      1
##  5    81   2006 台中市   男    否           中華民國      1
##  6    82   2007 台中市   男    否           中華民國      2
##  7    83   2011 台中市   男    否           中華民國      1
##  8    84   2014 台中市   男    否           中華民國      7
##  9    85   2015 台中市   男    否           中華民國     42
## 10    86   1998 台中市   男    是           None          1
## # ... with 1,143 more rows

# R Method
Dengue[Dengue$居住縣市 %in% c('台北市', '台南市'),  ]

## # A tibble: 431 x 7
##       X1 發病年 居住縣市 性別  是否境外移入 感染國家 病例數
##    <int>  <int> <chr>    <chr> <chr>        <chr>     <int>
##  1   190   1998 台北市   女    否           None          6
##  2   191   1999 台北市   女    否           None          1
##  3   192   2002 台北市   女    否           None          3
##  4   193   1998 台北市   女    否           中華民國      1
##  5   194   2001 台北市   女    否           中華民國      3
##  6   195   2008 台北市   女    否           中華民國      8
##  7   196   2010 台北市   女    否           中華民國      1
##  8   197   2011 台北市   女    否           中華民國     10
##  9   198   2013 台北市   女    否           中華民國      3
## 10   199   2014 台北市   女    否           中華民國      5
## # ... with 421 more rows

# DPLYR Method
filter(Dengue, 居住縣市 %in% c('台北市', '台南市'))

## # A tibble: 431 x 7
##       X1 發病年 居住縣市 性別  是否境外移入 感染國家 病例數
##    <int>  <int> <chr>    <chr> <chr>        <chr>     <int>
##  1   190   1998 台北市   女    否           None          6
##  2   191   1999 台北市   女    否           None          1
##  3   192   2002 台北市   女    否           None          3
##  4   193   1998 台北市   女    否           中華民國      1
##  5   194   2001 台北市   女    否           中華民國      3
##  6   195   2008 台北市   女    否           中華民國      8
##  7   196   2010 台北市   女    否           中華民國      1
##  8   197   2011 台北市   女    否           中華民國     10
##  9   198   2013 台北市   女    否           中華民國      3
## 10   199   2014 台北市   女    否           中華民國      5
## # ... with 421 more rows

# R Method
Dengue[ , c('居住縣市', '病例數')]

## # A tibble: 1,987 x 2
##    居住縣市 病例數
##    <chr>     <int>
##  1 台中市        1
##  2 台中市        3
##  3 台中市        1
##  4 台中市        3
##  5 台中市        1
##  6 台中市        8
##  7 台中市       35
##  8 台中市        1
##  9 台中市        1
## 10 台中市        1
## # ... with 1,977 more rows

# DPLYR Method
select(Dengue, '居住縣市', '病例數')

## # A tibble: 1,987 x 2
##    居住縣市 病例數
##    <chr>     <int>
##  1 台中市        1
##  2 台中市        3
##  3 台中市        1
##  4 台中市        3
##  5 台中市        1
##  6 台中市        8
##  7 台中市       35
##  8 台中市        1
##  9 台中市        1
## 10 台中市        1
## # ... with 1,977 more rows

# R Method
a <- Dengue[ Dengue$性別 == '男' & Dengue$病例數 >= 100, c('居住縣市', '病例數')]
tapply(a$病例數, a$居住縣市, sum)

## 台南市 屏東縣 高雄市 
##  12841    870  22056

# DPLYR Method
Dengue %>%
  filter(性別 == '男' & 病例數 >= 100) %>%
  select('居住縣市', '病例數')

## # A tibble: 21 x 2
##    居住縣市 病例數
##    <chr>     <int>
##  1 台南市      933
##  2 台南市      227
##  3 台南市      382
##  4 台南市    11299
##  5 屏東縣      172
##  6 屏東縣      135
##  7 屏東縣      229
##  8 屏東縣      124
##  9 屏東縣      210
## 10 高雄市      841
## # ... with 11 more rows

Dengue %>%
  filter(性別 == '男' & 病例數 >= 100) %>%
  select('居住縣市', '病例數') %>%
  group_by(居住縣市) %>%
  summarise(s = sum(病例數))

## # A tibble: 3 x 2
##   居住縣市     s
##   <chr>    <int>
## 1 台南市   12841
## 2 屏東縣     870
## 3 高雄市   22056

# SELECT 居住縣市, sum(病例數) FROM Dengue WHERE 性別 = '男' AND 病例數 >= 100 GROUP BY 居住縣市


Dengue %>%
  filter(病例數 >= 100) %>%
  select('居住縣市', '性別','病例數') %>%
  group_by(居住縣市, 性別) %>%
  summarise(s = sum(病例數))

## # A tibble: 6 x 3
## # Groups:   居住縣市 [?]
##   居住縣市 性別      s
##   <chr>    <chr> <int>
## 1 台南市   女    12954
## 2 台南市   男    12841
## 3 屏東縣   女      777
## 4 屏東縣   男      870
## 5 高雄市   女    22899
## 6 高雄市   男    22056

# SELECT 居住縣市, 性別, sum(病例數) FROM Dengue WHERE 病例數 >= 100 GROUP BY 居住縣市,性別

# R Method
sum(tail(head(iris), 3)$Sepal.Length)

## [1] 15

# Magrittr Method
iris %>% 
  head() %>%
  tail(3) %>%
  .$Sepal.Length %>%
  sum()

## [1] 15

Demo20180321

David Chiu

2018年3月21日

計算新聞中各個詞出現的次數

Apply Function

DPLYR