Demo20180314

Matrix

kevin <- c(80, 75)
toby  <- c(70, 65)
ian   <- c(90, 85)

grades <- c(kevin, toby, ian)

mat <- matrix(grades, nrow = 3, byrow = TRUE, dimnames = list(c('kevin', 'toby', 'ian'), c('first', 'second')))

mat[,'first'] * 0.4 + mat[,2] *0.6

## kevin  toby   ian 
##    77    67    87

Factor

names_vec <- c('toby', 'ian', 'brian', 'marry')

country_vec <- c('Taiwan', 'China', 'Japan', 'Korea', 'Taiwan', 'Taiwan', 'Japan')

country_factor <- factor(country_vec)
country_factor

## [1] Taiwan China  Japan  Korea  Taiwan Taiwan Japan 
## Levels: China Japan Korea Taiwan

Data Frame

names_vec   <-  c('toby', 'ian', 'brian', 'marry')
age_vec     <- c(35, 45, 27, 18)
country_vec <- factor(c('Taiwan', 'Taiwan', 'Japan', 'China'))

country_vec

## [1] Taiwan Taiwan Japan  China 
## Levels: China Japan Taiwan

mat <- matrix(c(names_vec, age_vec, country_vec), nrow = 4)
mat

##      [,1]    [,2] [,3]
## [1,] "toby"  "35" "3" 
## [2,] "ian"   "45" "3" 
## [3,] "brian" "27" "2" 
## [4,] "marry" "18" "1"

summary(mat)

##      V1     V2    V3   
##  brian:1   18:1   1:1  
##  ian  :1   27:1   2:1  
##  marry:1   35:1   3:2  
##  toby :1   45:1

df <- data.frame(names_vec, age_vec, country_vec)
df

##   names_vec age_vec country_vec
## 1      toby      35      Taiwan
## 2       ian      45      Taiwan
## 3     brian      27       Japan
## 4     marry      18       China

summary(df)

##  names_vec    age_vec      country_vec
##  brian:1   Min.   :18.00   China :1   
##  ian  :1   1st Qu.:24.75   Japan :1   
##  marry:1   Median :31.00   Taiwan:2   
##  toby :1   Mean   :31.25              
##            3rd Qu.:37.50              
##            Max.   :45.00

df2 <- data.frame(name = names_vec, age = age_vec, country = country_vec)
df2

##    name age country
## 1  toby  35  Taiwan
## 2   ian  45  Taiwan
## 3 brian  27   Japan
## 4 marry  18   China

str(df2)

## 'data.frame':    4 obs. of  3 variables:
##  $ name   : Factor w/ 4 levels "brian","ian",..: 4 2 1 3
##  $ age    : num  35 45 27 18
##  $ country: Factor w/ 3 levels "China","Japan",..: 3 3 2 1

df3 <- data.frame(name = names_vec, age = age_vec, country = country_vec, stringsAsFactors = FALSE)
df3

##    name age country
## 1  toby  35  Taiwan
## 2   ian  45  Taiwan
## 3 brian  27   Japan
## 4 marry  18   China

df3$name

## [1] "toby"  "ian"   "brian" "marry"

class(df3$name)

## [1] "character"

df3$name <- as.factor(df3$name)
df3

##    name age country
## 1  toby  35  Taiwan
## 2   ian  45  Taiwan
## 3 brian  27   Japan
## 4 marry  18   China

library(readr)
NHI_EnteroviralInfection <- read_csv("C:/Users/nc20/Downloads/NHI_EnteroviralInfection.csv")

## Parsed with column specification:
## cols(
##   年 = col_integer(),
##   週 = col_integer(),
##   就診類別 = col_character(),
##   年齡別 = col_character(),
##   縣市 = col_character(),
##   腸病毒健保就診人次 = col_integer(),
##   健保就診總人次 = col_integer()
## )

#View(NHI_EnteroviralInfection)

head(NHI_EnteroviralInfection)

## # A tibble: 6 x 7
##      年    週 就診類別 年齡別 縣市   腸病毒健保就診人次 健保就診總人次
##   <int> <int> <chr>    <chr>  <chr>               <int>          <int>
## 1  2008    14 住院     0-2    台中市                  0            105
## 2  2008    14 住院     0-2    台北市                  2            151
## 3  2008    14 住院     0-2    台東縣                  0             14
## 4  2008    14 住院     0-2    台南市                  0             20
## 5  2008    14 住院     0-2    宜蘭縣                  0             44
## 6  2008    14 住院     0-2    花蓮縣                  0             17

tail(NHI_EnteroviralInfection)

## # A tibble: 6 x 7
##      年    週 就診類別 年齡別 縣市   腸病毒健保就診人次 健保就診總人次
##   <int> <int> <chr>    <chr>  <chr>               <int>          <int>
## 1  2018    10 門診     5-9    新竹市                 33           7809
## 2  2018    10 門診     5-9    新竹縣                 14           6900
## 3  2018    10 門診     5-9    嘉義市                 37           5180
## 4  2018    10 門診     5-9    嘉義縣                 22           3131
## 5  2018    10 門診     5-9    彰化縣                 66          15423
## 6  2018    10 門診     5-9    澎湖縣                  1           1075

class(NHI_EnteroviralInfection)

## [1] "tbl_df"     "tbl"        "data.frame"

summary(NHI_EnteroviralInfection)

##        年             週          就診類別            年齡別         
##  Min.   :2008   Min.   : 1.00   Length:110049      Length:110049     
##  1st Qu.:2010   1st Qu.:14.00   Class :character   Class :character  
##  Median :2013   Median :27.00   Mode  :character   Mode  :character  
##  Mean   :2013   Mean   :26.74                                        
##  3rd Qu.:2015   3rd Qu.:40.00                                        
##  Max.   :2018   Max.   :53.00                                        
##      縣市           腸病毒健保就診人次 健保就診總人次  
##  Length:110049      Min.   :   0.0     Min.   :     0  
##  Class :character   1st Qu.:   0.0     1st Qu.:    28  
##  Mode  :character   Median :   3.0     Median :   997  
##                     Mean   :  46.2     Mean   : 22672  
##                     3rd Qu.:  30.0     3rd Qu.:  7372  
##                     Max.   :2925.0     Max.   :870061

str(NHI_EnteroviralInfection)

## Classes 'tbl_df', 'tbl' and 'data.frame':    110049 obs. of  7 variables:
##  $ 年                : int  2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 ...
##  $ 週                : int  14 14 14 14 14 14 14 14 14 14 ...
##  $ 就診類別          : chr  "住院" "住院" "住院" "住院" ...
##  $ 年齡別            : chr  "0-2" "0-2" "0-2" "0-2" ...
##  $ 縣市              : chr  "台中市" "台北市" "台東縣" "台南市" ...
##  $ 腸病毒健保就診人次: int  0 2 0 0 0 0 0 0 0 0 ...
##  $ 健保就診總人次    : int  105 151 14 20 44 17 1 19 1 141 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 7
##   .. ..$ 年                : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ 週                : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ 就診類別          : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ 年齡別            : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ 縣市              : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ 腸病毒健保就診人次: list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ 健保就診總人次    : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"

a <- '123'
as.integer(a)

## [1] 123

as.numeric(a)

## [1] 123

NHI_EnteroviralInfection$就診類別 <- as.factor(NHI_EnteroviralInfection$就診類別)
levels(NHI_EnteroviralInfection$就診類別)

## [1] "住院" "門診"

NHI_EnteroviralInfection$年齡別 <- as.factor(NHI_EnteroviralInfection$年齡別)
levels(NHI_EnteroviralInfection$年齡別)

## [1] "0-2"   "10-14" "15+"   "3-4"   "5-9"

NHI_EnteroviralInfection$縣市 <- as.factor(NHI_EnteroviralInfection$縣市)
levels(NHI_EnteroviralInfection$縣市)

##  [1] "台中市" "台北市" "台東縣" "台南市" "宜蘭縣" "花蓮縣" "金門縣"
##  [8] "南投縣" "屏東縣" "苗栗縣" "桃園市" "高雄市" "基隆市" "連江縣"
## [15] "雲林縣" "新北市" "新竹市" "新竹縣" "嘉義市" "嘉義縣" "彰化縣"
## [22] "澎湖縣"

str(NHI_EnteroviralInfection)

## Classes 'tbl_df', 'tbl' and 'data.frame':    110049 obs. of  7 variables:
##  $ 年                : int  2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 ...
##  $ 週                : int  14 14 14 14 14 14 14 14 14 14 ...
##  $ 就診類別          : Factor w/ 2 levels "住院","門診": 1 1 1 1 1 1 1 1 1 1 ...
##  $ 年齡別            : Factor w/ 5 levels "0-2","10-14",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ 縣市              : Factor w/ 22 levels "台中市","台北市",..: 1 2 3 4 5 6 7 9 10 11 ...
##  $ 腸病毒健保就診人次: int  0 2 0 0 0 0 0 0 0 0 ...
##  $ 健保就診總人次    : int  105 151 14 20 44 17 1 19 1 141 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 7
##   .. ..$ 年                : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ 週                : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ 就診類別          : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ 年齡別            : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ 縣市              : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ 腸病毒健保就診人次: list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ 健保就診總人次    : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"

summary(NHI_EnteroviralInfection)

##        年             週        就診類別       年齡別           縣市      
##  Min.   :2008   Min.   : 1.00   住院:52849   0-2  :21887   台中市 : 5200  
##  1st Qu.:2010   1st Qu.:14.00   門診:57200   10-14:21700   台北市 : 5200  
##  Median :2013   Median :27.00                15+  :22865   台南市 : 5200  
##  Mean   :2013   Mean   :26.74                3-4  :21720   桃園市 : 5200  
##  3rd Qu.:2015   3rd Qu.:40.00                5-9  :21877   高雄市 : 5200  
##  Max.   :2018   Max.   :53.00                              新北市 : 5200  
##                                                            (Other):78849  
##  腸病毒健保就診人次 健保就診總人次  
##  Min.   :   0.0     Min.   :     0  
##  1st Qu.:   0.0     1st Qu.:    28  
##  Median :   3.0     Median :   997  
##  Mean   :  46.2     Mean   : 22672  
##  3rd Qu.:  30.0     3rd Qu.:  7372  
##  Max.   :2925.0     Max.   :870061  
##

head(sort(NHI_EnteroviralInfection$健保就診總人次, decreasing=TRUE))

## [1] 870061 810839 809395 772582 763579 756824

NHI_EnteroviralInfection[order(NHI_EnteroviralInfection$健保就診總人次, decreasing=TRUE), ]

## # A tibble: 110,049 x 7
##       年    週 就診類別 年齡別 縣市   腸病毒健保就診人次 健保就診總人次
##    <int> <int> <fct>    <fct>  <fct>               <int>          <int>
##  1  2016     5 門診     15+    新北市                 94         870061
##  2  2016     7 門診     15+    新北市                 90         810839
##  3  2012     3 門診     15+    新北市                138         809395
##  4  2016     9 門診     15+    新北市                 54         772582
##  5  2014     4 門診     15+    新北市                 63         763579
##  6  2011     4 門診     15+    新北市                 42         756824
##  7  2016     5 門診     15+    台北市                 72         738735
##  8  2016     8 門診     15+    新北市                 56         737420
##  9  2015     6 門診     15+    新北市                 63         729586
## 10  2016     5 門診     15+    高雄市                 47         729150
## # ... with 110,039 more rows

NHI_EnteroviralInfection[order(NHI_EnteroviralInfection$腸病毒健保就診人次, decreasing=TRUE), ]

## # A tibble: 110,049 x 7
##       年    週 就診類別 年齡別 縣市   腸病毒健保就診人次 健保就診總人次
##    <int> <int> <fct>    <fct>  <fct>               <int>          <int>
##  1  2010    27 門診     5-9    新北市               2925          50125
##  2  2014    23 門診     5-9    新北市               2869          49034
##  3  2014    24 門診     5-9    新北市               2785          50469
##  4  2014    22 門診     5-9    新北市               2711          52373
##  5  2014    25 門診     5-9    新北市               2494          47971
##  6  2010    25 門診     5-9    新北市               2422          47021
##  7  2010    26 門診     5-9    新北市               2330          41716
##  8  2010    28 門診     5-9    新北市               2153          45395
##  9  2014    24 門診     5-9    台中市               2099          36455
## 10  2014    22 門診     5-9    台中市               2092          39640
## # ... with 110,039 more rows

NHI_EnteroviralInfection$腸病毒健保就診ratio <- NHI_EnteroviralInfection$腸病毒健保就診人次 / NHI_EnteroviralInfection$健保就診總人次

new_taipei <- NHI_EnteroviralInfection[ NHI_EnteroviralInfection$縣市 == '新北市', ]
new_taipei[order(new_taipei$腸病毒健保就診ratio, decreasing=TRUE), ]

## # A tibble: 5,200 x 8
##       年    週 就診類別 年齡別 縣市   腸病毒健保就診人次 健保就診總人次
##    <int> <int> <fct>    <fct>  <fct>               <int>          <int>
##  1  2010    26 住院     3-4    新北市                 18             46
##  2  2010    28 住院     3-4    新北市                 19             50
##  3  2010    29 住院     0-2    新北市                 31             91
##  4  2010    24 住院     3-4    新北市                 17             56
##  5  2010    27 住院     0-2    新北市                 35            122
##  6  2010    25 住院     3-4    新北市                 19             67
##  7  2013    25 住院     3-4    新北市                 13             47
##  8  2010    33 住院     0-2    新北市                 29            106
##  9  2010    28 住院     0-2    新北市                 29            109
## 10  2011    47 住院     3-4    新北市                 10             38
## # ... with 5,190 more rows, and 1 more variable: 腸病毒健保就診ratio <dbl>

dataset <- new_taipei[new_taipei$年齡別 == '3-4' & new_taipei$就診類別 == '住院', c('年','週', '腸病毒健保就診人次') ]

hist(dataset$腸病毒健保就診人次)

boxplot(dataset$腸病毒健保就診人次)

plot(dataset$腸病毒健保就診人次, type = 'line', main='腸病毒分析結果', col="red")

## Warning in plot.xy(xy, type, ...): 繪圖類型 'line' 被截短成第一個字元

stat <- tapply(NHI_EnteroviralInfection$腸病毒健保就診人次,NHI_EnteroviralInfection$縣市, sum)

stat2 <- sort(stat, decreasing = TRUE)
barplot(stat2)

pie(stat2, init.angle = 90, clockwise = TRUE)

List

phone <-list(thing="iphoneX" , height=5.65, width=2.79 )
phone

## $thing
## [1] "iphoneX"
## 
## $height
## [1] 5.65
## 
## $width
## [1] 2.79

phone$thing

## [1] "iphoneX"

phone$height

## [1] 5.65

student <- list(name = 'Toby',score =c(87,57,72))
student$score

## [1] 87 57 72

student$score[1]

## [1] 87

min(student$score)

## [1] 57

student[[1]]

## [1] "Toby"

li <- list(c(70,65,90), c(56,75,66,63))

li[[1]]

## [1] 70 65 90

li[[2]]

## [1] 56 75 66 63

mean(li[[1]])

## [1] 75

mean(li[[2]])

## [1] 65

?lapply

## starting httpd help server ... done

lapply(li, mean)

## [[1]]
## [1] 75
## 
## [[2]]
## [1] 65

sum(li[[1]])

## [1] 225

sum(li[[2]])

## [1] 260

lapply(li, sum)

## [[1]]
## [1] 225
## 
## [[2]]
## [1] 260

a <- 180
b <- 250
a

## [1] 180

## [1] 250

print(a)

## [1] 180

print(b)

## [1] 250

x <- 5

if(x > 3){
  print("x > 3")
}else{
  print("x<=3")
}

## [1] "x > 3"

x <- 2
if(x > 3){
  print("x > 3")
} else if( x==3 ){
  print("x == 3")
} else{
  print("x<=3")
}

## [1] "x<=3"

1:10

##  [1]  1  2  3  4  5  6  7  8  9 10

for (qoo in 1:10){
  print(qoo)
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10

# using for loop: not recommend  
s <- 0
for (qoo in 1:100){
  s <- s + qoo
}
s

## [1] 5050

# using built-in function: recommended method
sum(1:100)

## [1] 5050

# using for loop: not recommend  
for (qoo in 1:10){
  print(qoo ^ (1/2))
}

## [1] 1
## [1] 1.414214
## [1] 1.732051
## [1] 2
## [1] 2.236068
## [1] 2.44949
## [1] 2.645751
## [1] 2.828427
## [1] 3
## [1] 3.162278

# using vectorize programming: recommended method
(1:10) ^ (1/2)

##  [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751
##  [8] 2.828427 3.000000 3.162278

x <-c("sunny","rainy", "cloudy", "rainy", "cloudy")

length(x)

## [1] 5

x[1]

## [1] "sunny"

for( i in 1:length(x)){
  print(x[i])
}

## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"

seq_along(x)

## [1] 1 2 3 4 5

for(i in seq_along(x)){
  print(x[i])
}

## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"

for(letter in x){
  print(letter)
}

## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"

s   <- 0 
cnt <- 0

while (cnt <= 100){
  s   <- s + cnt 
  cnt <- cnt + 1
}
s

## [1] 5050

cnt

## [1] 101

sum(1:100)

## [1] 5050

## while loop: becareful infinity loop

#cnt <- 0
#while (cnt <= 100){
#  s   <- s + cnt 
#  #cnt <- cnt + 1
#}


url<-'https://tw.appledaily.com/new/realtime/'
?paste
paste(url, 1)

## [1] "https://tw.appledaily.com/new/realtime/ 1"

paste(url, 1, sep = '' )

## [1] "https://tw.appledaily.com/new/realtime/1"

paste0(url, 1)

## [1] "https://tw.appledaily.com/new/realtime/1"

for (i in 1:10){
  print(paste0(url,i))
}

## [1] "https://tw.appledaily.com/new/realtime/1"
## [1] "https://tw.appledaily.com/new/realtime/2"
## [1] "https://tw.appledaily.com/new/realtime/3"
## [1] "https://tw.appledaily.com/new/realtime/4"
## [1] "https://tw.appledaily.com/new/realtime/5"
## [1] "https://tw.appledaily.com/new/realtime/6"
## [1] "https://tw.appledaily.com/new/realtime/7"
## [1] "https://tw.appledaily.com/new/realtime/8"
## [1] "https://tw.appledaily.com/new/realtime/9"
## [1] "https://tw.appledaily.com/new/realtime/10"

lapply(1:10, function(i) paste0(url, i))

## [[1]]
## [1] "https://tw.appledaily.com/new/realtime/1"
## 
## [[2]]
## [1] "https://tw.appledaily.com/new/realtime/2"
## 
## [[3]]
## [1] "https://tw.appledaily.com/new/realtime/3"
## 
## [[4]]
## [1] "https://tw.appledaily.com/new/realtime/4"
## 
## [[5]]
## [1] "https://tw.appledaily.com/new/realtime/5"
## 
## [[6]]
## [1] "https://tw.appledaily.com/new/realtime/6"
## 
## [[7]]
## [1] "https://tw.appledaily.com/new/realtime/7"
## 
## [[8]]
## [1] "https://tw.appledaily.com/new/realtime/8"
## 
## [[9]]
## [1] "https://tw.appledaily.com/new/realtime/9"
## 
## [[10]]
## [1] "https://tw.appledaily.com/new/realtime/10"

f <- function(){
}

addNum <- function(a,b){
  a + b
}

addNum(3,5)

## [1] 8

multiplyNum <- function(a,b){
  a + b
  a * b
}

multiplyNum(3,5)

## [1] 15

multiplyNum2 <- function(a,b){
  a + b
  return(a * b)
}

multiplyNum2(3,5)

## [1] 15

addNum <- function(a,b = 2){
  a + b
}

addNum(3)

## [1] 5

addNum(5)

## [1] 7

addNum <- function(a = 3,b = 2){
  a * 8 + b * 2
}

addNum()

## [1] 28

addNum(2,3)

## [1] 22

addNum(b=2,a=3)

## [1] 28

addNum <- function(a = 3,b = 2, pineapplepen = 7){
  a * 8 + b * 2 + pineapplepen
}

addNum(3,5,2)

## [1] 36

addNum(a = 3,b = 5,pin = 2)

## [1] 36

addNum <- function(a = 2, b = 3){
  s <- a+b
  s
}

addNum(3,5)

## [1] 8

addNum()

## [1] 5

f <- function(a,b){
  a * 2
}

f(3)

## [1] 6

f <- function(a,b){
  return(a + b)
}
#f(3)

f <- function(a,b = 3){
  return(a+b)
}
f(3)

## [1] 6

?file
f <- file('https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/disease.txt')
article <- readLines(f)

## Warning in readLines(f): 於 'https://raw.githubusercontent.com/ywchiu/
## cdc_course/master/data/disease.txt' 找到不完整的最後一列

#article
close(f)

# split a string by given delimiter
?strsplit
a <- 'this is a book'
strsplit(a, ' ')

## [[1]]
## [1] "this" "is"   "a"    "book"

#using unlist to convert list to vec
?unlist
b <- list(c(1,2), c(2,3))
b

## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 2 3

unlist(b)

## [1] 1 2 2 3

# count occurence of an element of a vector
?table
a <- c(1,2,1,2,3,3,2)
table(a)

## a
## 1 2 3 
## 2 3 2

wordcount <- function(article){
  article.split <- strsplit(article, ' ')
  article.vec   <- unlist(article.split)
  tb <- table(article.vec)
  sort(tb, decreasing = TRUE)
}

stopwords <- c("a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also","although","always","am","among", "amongst", "amoungst", "amount",  "an", "and", "another", "any","anyhow","anyone","anything","anyway", "anywhere", "are", "around", "as",  "at", "back","be","became", "because","become","becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom","but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven","else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own","part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thickv", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "the","-")

wordcount <- function(article){
  article.split <- strsplit(article, ' ')
  article.vec   <- unlist(article.split)
  tb <- table(article.vec[! article.vec %in% stopwords])
  
  sort(tb, decreasing = TRUE)
}

wordcount(article)

## 
##         Disease             new            said               X 
##               6               6               6               6 
##          deadly        diseases          likely        pathogen 
##               4               4               4               4 
##                        creation         disease          emerge 
##               3               3               3               3 
##        epidemic          health           human            list 
##               3               3               3               3 
##              Mr           virus             WHO            year 
##               3               3               3               3 
##         adviser           alert         animals       currently 
##               2               2               2               2 
##           Ebola          humans   international              It 
##               2               2               2               2 
##          killed           major            make          people 
##               2               2               2               2 
##            pose          public            risk      Rottingen, 
##               2               2               2               2 
##      scientific      scientists            This           time. 
##               2               2               2               2 
##             use           world        zoonotic        'Disease 
##               2               2               2               1 
##           [WHO]           ‘plug             ‘X’             “As 
##               1               1               1               1 
##        “Disease        “History             “It           “It’s 
##               1               1               1               1 
##      “Synthetic            “The          “These        100-year 
##               1               1               1               1 
##          11,000          1980s.           2009.            2013 
##               1               1               1               1 
##           2016.              35               A        accident 
##               1               1               1               1 
##             act           added          added:          adding 
##               1               1               1               1 
##        Advances          Africa           allow          allows 
##               1               1               1               1 
##             And          animal            any,       appearing 
##               1               1               1               1 
##           aware        before”,        believed             big 
##               1               1               1               1 
##      biological         biology          brazen          broken 
##               1               1               1               1 
##        bushmeat          Canada            case           cause 
##               1               1               1               1 
##          caused          Center          change         charged 
##               1               1               1               1 
##        chemical           chief     chimpanzees           close 
##               1               1               1               1 
##       colorized            come      committee.        confined 
##               1               1               1               1 
##         contact        convenes         Council countermeasures 
##               1               1               1               1 
##          create      department       depiction       developed 
##               1               1               1               1 
##     development       develops.      diagnostic   diagnostics”, 
##               1               1               1               1 
##             did       digitally       disease”.       diseases; 
##               1               1               1               1 
##           down.            Each           early          eating 
##               1               1               1               1 
##          Ebola,       ecosystem         editing        electron 
##               1               1               1               1 
##      emergency.        entirely         Erasmus       executive 
##               1               1               1               1 
##         experts         Experts             far          fast”. 
##               1               1               1               1 
##           fears          fever,     filamentous        flexibly 
##               1               1               1               1 
##             flu           Fmake       frequency             gas 
##               1               1               1               1 
##            gene    Geneva-based           globe         greater 
##               1               1               1               1 
##        greatest            H1N1        habitats           havoc 
##               1               1               1               1 
##            head          Health         health,      heightened 
##               1               1               1               1 
##      high-level            HIV,        horsepox        However, 
##               1               1               1               1 
##        humans”,              In         include       including 
##               1               1               1               1 
##      infections       intensity       John-Arne          jumped 
##               1               1               1               1 
##         jumping           jumps            just         killers 
##               1               1               1               1 
##       knowledge           known       Koopmans,     laboratory. 
##               1               1               1               1 
##           Lassa         likely,           makes        man-made 
##               1               1               1               1 
##    manipulation          Marion            mean           means 
##               1               1               1               1 
##         Medical         meeting      micrograph         million 
##               1               1               1               1 
##          modern      monitoring            More         mystery 
##               1               1               1               1 
##         natural       naturally         needed,           nerve 
##               1               1               1               1 
##        Nigeria,           ninth          Norway          number 
##               1               1               1               1 
##        occuring    Organisation   organisation,        outbreak 
##               1               1               1               1 
##        pandemic           panel            past            plan 
##               1               1               1               1 
##       platforms           play’           point      population 
##               1               1               1               1 
##       possible,     potentially         prepare        prepare. 
##               1               1               1               1 
##        previous        probably         process       Professor 
##               1               1               1               1 
##          rapid.        relative        renegade      represents 
##               1               1               1               1 
##        research        Research      resistance          risk.” 
##               1               1               1               1 
##          risks,       Rotterdam       Rottingen      Rottingen. 
##               1               1               1               1 
##    safeguarding       Salisbury             say        scanning 
##               1               1               1               1 
##      Scientists            seen          senior             She 
##               1               1               1               1 
##           shows        Smallpox         sources         sparked 
##               1               1               1               1 
##        sparking         speed.”          spread        spread." 
##               1               1               1               1 
##         spreads      statement.         strange          strike 
##               1               1               1               1 
##            sure    surveillance        sweeping           Swine 
##               1               1               1               1 
##         systems           taboo     technology,           tells 
##               1               1               1               1 
##           terms         terror.          tests.             The 
##               1               1               1               1 
##           trade          travel      underlines         unknown 
##               1               1               1               1 
##        vaccines         variety     Viroscience         viruses 
##               1               1               1               1 
##        viruses.           vital            want         warned: 
##               1               1               1               1 
##           watch             way              We         weapons 
##               1               1               1               1 
##          week’s            West            WHO,            wide 
##               1               1               1               1 
##            work         workers           World         wreaked 
##               1               1               1               1 
##              X'              X.           years       zoonosis. 
##               1               1               1               1 
##        Zoonotic 
##               1

# install.packages('wordcloud2')
library(wordcloud2)
wordcloud2(wordcount(article))

Demo20180314

David Chiu

2018年3月14日

Matrix

Factor

Data Frame

List