Data Frame
names_vec <- c('toby', 'ian', 'brian', 'marry')
age_vec <- c(35, 45, 27, 18)
country_vec <- factor(c('Taiwan', 'Taiwan', 'Japan', 'China'))
country_vec
## [1] Taiwan Taiwan Japan China
## Levels: China Japan Taiwan
mat <- matrix(c(names_vec, age_vec, country_vec), nrow = 4)
mat
## [,1] [,2] [,3]
## [1,] "toby" "35" "3"
## [2,] "ian" "45" "3"
## [3,] "brian" "27" "2"
## [4,] "marry" "18" "1"
summary(mat)
## V1 V2 V3
## brian:1 18:1 1:1
## ian :1 27:1 2:1
## marry:1 35:1 3:2
## toby :1 45:1
df <- data.frame(names_vec, age_vec, country_vec)
df
## names_vec age_vec country_vec
## 1 toby 35 Taiwan
## 2 ian 45 Taiwan
## 3 brian 27 Japan
## 4 marry 18 China
summary(df)
## names_vec age_vec country_vec
## brian:1 Min. :18.00 China :1
## ian :1 1st Qu.:24.75 Japan :1
## marry:1 Median :31.00 Taiwan:2
## toby :1 Mean :31.25
## 3rd Qu.:37.50
## Max. :45.00
df2 <- data.frame(name = names_vec, age = age_vec, country = country_vec)
df2
## name age country
## 1 toby 35 Taiwan
## 2 ian 45 Taiwan
## 3 brian 27 Japan
## 4 marry 18 China
str(df2)
## 'data.frame': 4 obs. of 3 variables:
## $ name : Factor w/ 4 levels "brian","ian",..: 4 2 1 3
## $ age : num 35 45 27 18
## $ country: Factor w/ 3 levels "China","Japan",..: 3 3 2 1
df3 <- data.frame(name = names_vec, age = age_vec, country = country_vec, stringsAsFactors = FALSE)
df3
## name age country
## 1 toby 35 Taiwan
## 2 ian 45 Taiwan
## 3 brian 27 Japan
## 4 marry 18 China
df3$name
## [1] "toby" "ian" "brian" "marry"
class(df3$name)
## [1] "character"
df3$name <- as.factor(df3$name)
df3
## name age country
## 1 toby 35 Taiwan
## 2 ian 45 Taiwan
## 3 brian 27 Japan
## 4 marry 18 China
library(readr)
NHI_EnteroviralInfection <- read_csv("C:/Users/nc20/Downloads/NHI_EnteroviralInfection.csv")
## Parsed with column specification:
## cols(
## 年 = col_integer(),
## 週 = col_integer(),
## 就診類別 = col_character(),
## 年齡別 = col_character(),
## 縣市 = col_character(),
## 腸病毒健保就診人次 = col_integer(),
## 健保就診總人次 = col_integer()
## )
#View(NHI_EnteroviralInfection)
head(NHI_EnteroviralInfection)
## # A tibble: 6 x 7
## 年 週 就診類別 年齡別 縣市 腸病毒健保就診人次 健保就診總人次
## <int> <int> <chr> <chr> <chr> <int> <int>
## 1 2008 14 住院 0-2 台中市 0 105
## 2 2008 14 住院 0-2 台北市 2 151
## 3 2008 14 住院 0-2 台東縣 0 14
## 4 2008 14 住院 0-2 台南市 0 20
## 5 2008 14 住院 0-2 宜蘭縣 0 44
## 6 2008 14 住院 0-2 花蓮縣 0 17
tail(NHI_EnteroviralInfection)
## # A tibble: 6 x 7
## 年 週 就診類別 年齡別 縣市 腸病毒健保就診人次 健保就診總人次
## <int> <int> <chr> <chr> <chr> <int> <int>
## 1 2018 10 門診 5-9 新竹市 33 7809
## 2 2018 10 門診 5-9 新竹縣 14 6900
## 3 2018 10 門診 5-9 嘉義市 37 5180
## 4 2018 10 門診 5-9 嘉義縣 22 3131
## 5 2018 10 門診 5-9 彰化縣 66 15423
## 6 2018 10 門診 5-9 澎湖縣 1 1075
class(NHI_EnteroviralInfection)
## [1] "tbl_df" "tbl" "data.frame"
summary(NHI_EnteroviralInfection)
## 年 週 就診類別 年齡別
## Min. :2008 Min. : 1.00 Length:110049 Length:110049
## 1st Qu.:2010 1st Qu.:14.00 Class :character Class :character
## Median :2013 Median :27.00 Mode :character Mode :character
## Mean :2013 Mean :26.74
## 3rd Qu.:2015 3rd Qu.:40.00
## Max. :2018 Max. :53.00
## 縣市 腸病毒健保就診人次 健保就診總人次
## Length:110049 Min. : 0.0 Min. : 0
## Class :character 1st Qu.: 0.0 1st Qu.: 28
## Mode :character Median : 3.0 Median : 997
## Mean : 46.2 Mean : 22672
## 3rd Qu.: 30.0 3rd Qu.: 7372
## Max. :2925.0 Max. :870061
str(NHI_EnteroviralInfection)
## Classes 'tbl_df', 'tbl' and 'data.frame': 110049 obs. of 7 variables:
## $ 年 : int 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 ...
## $ 週 : int 14 14 14 14 14 14 14 14 14 14 ...
## $ 就診類別 : chr "住院" "住院" "住院" "住院" ...
## $ 年齡別 : chr "0-2" "0-2" "0-2" "0-2" ...
## $ 縣市 : chr "台中市" "台北市" "台東縣" "台南市" ...
## $ 腸病毒健保就診人次: int 0 2 0 0 0 0 0 0 0 0 ...
## $ 健保就診總人次 : int 105 151 14 20 44 17 1 19 1 141 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 7
## .. ..$ 年 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ 週 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ 就診類別 : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ 年齡別 : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ 縣市 : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ 腸病毒健保就診人次: list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ 健保就診總人次 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
a <- '123'
as.integer(a)
## [1] 123
as.numeric(a)
## [1] 123
NHI_EnteroviralInfection$就診類別 <- as.factor(NHI_EnteroviralInfection$就診類別)
levels(NHI_EnteroviralInfection$就診類別)
## [1] "住院" "門診"
NHI_EnteroviralInfection$年齡別 <- as.factor(NHI_EnteroviralInfection$年齡別)
levels(NHI_EnteroviralInfection$年齡別)
## [1] "0-2" "10-14" "15+" "3-4" "5-9"
NHI_EnteroviralInfection$縣市 <- as.factor(NHI_EnteroviralInfection$縣市)
levels(NHI_EnteroviralInfection$縣市)
## [1] "台中市" "台北市" "台東縣" "台南市" "宜蘭縣" "花蓮縣" "金門縣"
## [8] "南投縣" "屏東縣" "苗栗縣" "桃園市" "高雄市" "基隆市" "連江縣"
## [15] "雲林縣" "新北市" "新竹市" "新竹縣" "嘉義市" "嘉義縣" "彰化縣"
## [22] "澎湖縣"
str(NHI_EnteroviralInfection)
## Classes 'tbl_df', 'tbl' and 'data.frame': 110049 obs. of 7 variables:
## $ 年 : int 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 ...
## $ 週 : int 14 14 14 14 14 14 14 14 14 14 ...
## $ 就診類別 : Factor w/ 2 levels "住院","門診": 1 1 1 1 1 1 1 1 1 1 ...
## $ 年齡別 : Factor w/ 5 levels "0-2","10-14",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ 縣市 : Factor w/ 22 levels "台中市","台北市",..: 1 2 3 4 5 6 7 9 10 11 ...
## $ 腸病毒健保就診人次: int 0 2 0 0 0 0 0 0 0 0 ...
## $ 健保就診總人次 : int 105 151 14 20 44 17 1 19 1 141 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 7
## .. ..$ 年 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ 週 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ 就診類別 : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ 年齡別 : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ 縣市 : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ 腸病毒健保就診人次: list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ 健保就診總人次 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
summary(NHI_EnteroviralInfection)
## 年 週 就診類別 年齡別 縣市
## Min. :2008 Min. : 1.00 住院:52849 0-2 :21887 台中市 : 5200
## 1st Qu.:2010 1st Qu.:14.00 門診:57200 10-14:21700 台北市 : 5200
## Median :2013 Median :27.00 15+ :22865 台南市 : 5200
## Mean :2013 Mean :26.74 3-4 :21720 桃園市 : 5200
## 3rd Qu.:2015 3rd Qu.:40.00 5-9 :21877 高雄市 : 5200
## Max. :2018 Max. :53.00 新北市 : 5200
## (Other):78849
## 腸病毒健保就診人次 健保就診總人次
## Min. : 0.0 Min. : 0
## 1st Qu.: 0.0 1st Qu.: 28
## Median : 3.0 Median : 997
## Mean : 46.2 Mean : 22672
## 3rd Qu.: 30.0 3rd Qu.: 7372
## Max. :2925.0 Max. :870061
##
head(sort(NHI_EnteroviralInfection$健保就診總人次, decreasing=TRUE))
## [1] 870061 810839 809395 772582 763579 756824
NHI_EnteroviralInfection[order(NHI_EnteroviralInfection$健保就診總人次, decreasing=TRUE), ]
## # A tibble: 110,049 x 7
## 年 週 就診類別 年齡別 縣市 腸病毒健保就診人次 健保就診總人次
## <int> <int> <fct> <fct> <fct> <int> <int>
## 1 2016 5 門診 15+ 新北市 94 870061
## 2 2016 7 門診 15+ 新北市 90 810839
## 3 2012 3 門診 15+ 新北市 138 809395
## 4 2016 9 門診 15+ 新北市 54 772582
## 5 2014 4 門診 15+ 新北市 63 763579
## 6 2011 4 門診 15+ 新北市 42 756824
## 7 2016 5 門診 15+ 台北市 72 738735
## 8 2016 8 門診 15+ 新北市 56 737420
## 9 2015 6 門診 15+ 新北市 63 729586
## 10 2016 5 門診 15+ 高雄市 47 729150
## # ... with 110,039 more rows
NHI_EnteroviralInfection[order(NHI_EnteroviralInfection$腸病毒健保就診人次, decreasing=TRUE), ]
## # A tibble: 110,049 x 7
## 年 週 就診類別 年齡別 縣市 腸病毒健保就診人次 健保就診總人次
## <int> <int> <fct> <fct> <fct> <int> <int>
## 1 2010 27 門診 5-9 新北市 2925 50125
## 2 2014 23 門診 5-9 新北市 2869 49034
## 3 2014 24 門診 5-9 新北市 2785 50469
## 4 2014 22 門診 5-9 新北市 2711 52373
## 5 2014 25 門診 5-9 新北市 2494 47971
## 6 2010 25 門診 5-9 新北市 2422 47021
## 7 2010 26 門診 5-9 新北市 2330 41716
## 8 2010 28 門診 5-9 新北市 2153 45395
## 9 2014 24 門診 5-9 台中市 2099 36455
## 10 2014 22 門診 5-9 台中市 2092 39640
## # ... with 110,039 more rows
NHI_EnteroviralInfection$腸病毒健保就診ratio <- NHI_EnteroviralInfection$腸病毒健保就診人次 / NHI_EnteroviralInfection$健保就診總人次
new_taipei <- NHI_EnteroviralInfection[ NHI_EnteroviralInfection$縣市 == '新北市', ]
new_taipei[order(new_taipei$腸病毒健保就診ratio, decreasing=TRUE), ]
## # A tibble: 5,200 x 8
## 年 週 就診類別 年齡別 縣市 腸病毒健保就診人次 健保就診總人次
## <int> <int> <fct> <fct> <fct> <int> <int>
## 1 2010 26 住院 3-4 新北市 18 46
## 2 2010 28 住院 3-4 新北市 19 50
## 3 2010 29 住院 0-2 新北市 31 91
## 4 2010 24 住院 3-4 新北市 17 56
## 5 2010 27 住院 0-2 新北市 35 122
## 6 2010 25 住院 3-4 新北市 19 67
## 7 2013 25 住院 3-4 新北市 13 47
## 8 2010 33 住院 0-2 新北市 29 106
## 9 2010 28 住院 0-2 新北市 29 109
## 10 2011 47 住院 3-4 新北市 10 38
## # ... with 5,190 more rows, and 1 more variable: 腸病毒健保就診ratio <dbl>
dataset <- new_taipei[new_taipei$年齡別 == '3-4' & new_taipei$就診類別 == '住院', c('年','週', '腸病毒健保就診人次') ]
hist(dataset$腸病毒健保就診人次)

boxplot(dataset$腸病毒健保就診人次)

plot(dataset$腸病毒健保就診人次, type = 'line', main='腸病毒分析結果', col="red")
## Warning in plot.xy(xy, type, ...): 繪圖類型 'line' 被截短成第一個字元

stat <- tapply(NHI_EnteroviralInfection$腸病毒健保就診人次,NHI_EnteroviralInfection$縣市, sum)
stat2 <- sort(stat, decreasing = TRUE)
barplot(stat2)

pie(stat2, init.angle = 90, clockwise = TRUE)

List
phone <-list(thing="iphoneX" , height=5.65, width=2.79 )
phone
## $thing
## [1] "iphoneX"
##
## $height
## [1] 5.65
##
## $width
## [1] 2.79
phone$thing
## [1] "iphoneX"
phone$height
## [1] 5.65
student <- list(name = 'Toby',score =c(87,57,72))
student$score
## [1] 87 57 72
student$score[1]
## [1] 87
min(student$score)
## [1] 57
student[[1]]
## [1] "Toby"
li <- list(c(70,65,90), c(56,75,66,63))
li[[1]]
## [1] 70 65 90
li[[2]]
## [1] 56 75 66 63
mean(li[[1]])
## [1] 75
mean(li[[2]])
## [1] 65
?lapply
## starting httpd help server ... done
lapply(li, mean)
## [[1]]
## [1] 75
##
## [[2]]
## [1] 65
sum(li[[1]])
## [1] 225
sum(li[[2]])
## [1] 260
lapply(li, sum)
## [[1]]
## [1] 225
##
## [[2]]
## [1] 260
a <- 180
b <- 250
a
## [1] 180
b
## [1] 250
print(a)
## [1] 180
print(b)
## [1] 250
x <- 5
if(x > 3){
print("x > 3")
}else{
print("x<=3")
}
## [1] "x > 3"
x <- 2
if(x > 3){
print("x > 3")
} else if( x==3 ){
print("x == 3")
} else{
print("x<=3")
}
## [1] "x<=3"
1:10
## [1] 1 2 3 4 5 6 7 8 9 10
for (qoo in 1:10){
print(qoo)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
# using for loop: not recommend
s <- 0
for (qoo in 1:100){
s <- s + qoo
}
s
## [1] 5050
# using built-in function: recommended method
sum(1:100)
## [1] 5050
# using for loop: not recommend
for (qoo in 1:10){
print(qoo ^ (1/2))
}
## [1] 1
## [1] 1.414214
## [1] 1.732051
## [1] 2
## [1] 2.236068
## [1] 2.44949
## [1] 2.645751
## [1] 2.828427
## [1] 3
## [1] 3.162278
# using vectorize programming: recommended method
(1:10) ^ (1/2)
## [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751
## [8] 2.828427 3.000000 3.162278
x <-c("sunny","rainy", "cloudy", "rainy", "cloudy")
length(x)
## [1] 5
x[1]
## [1] "sunny"
for( i in 1:length(x)){
print(x[i])
}
## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"
seq_along(x)
## [1] 1 2 3 4 5
for(i in seq_along(x)){
print(x[i])
}
## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"
for(letter in x){
print(letter)
}
## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"
s <- 0
cnt <- 0
while (cnt <= 100){
s <- s + cnt
cnt <- cnt + 1
}
s
## [1] 5050
cnt
## [1] 101
sum(1:100)
## [1] 5050
## while loop: becareful infinity loop
#cnt <- 0
#while (cnt <= 100){
# s <- s + cnt
# #cnt <- cnt + 1
#}
url<-'https://tw.appledaily.com/new/realtime/'
?paste
paste(url, 1)
## [1] "https://tw.appledaily.com/new/realtime/ 1"
paste(url, 1, sep = '' )
## [1] "https://tw.appledaily.com/new/realtime/1"
paste0(url, 1)
## [1] "https://tw.appledaily.com/new/realtime/1"
for (i in 1:10){
print(paste0(url,i))
}
## [1] "https://tw.appledaily.com/new/realtime/1"
## [1] "https://tw.appledaily.com/new/realtime/2"
## [1] "https://tw.appledaily.com/new/realtime/3"
## [1] "https://tw.appledaily.com/new/realtime/4"
## [1] "https://tw.appledaily.com/new/realtime/5"
## [1] "https://tw.appledaily.com/new/realtime/6"
## [1] "https://tw.appledaily.com/new/realtime/7"
## [1] "https://tw.appledaily.com/new/realtime/8"
## [1] "https://tw.appledaily.com/new/realtime/9"
## [1] "https://tw.appledaily.com/new/realtime/10"
lapply(1:10, function(i) paste0(url, i))
## [[1]]
## [1] "https://tw.appledaily.com/new/realtime/1"
##
## [[2]]
## [1] "https://tw.appledaily.com/new/realtime/2"
##
## [[3]]
## [1] "https://tw.appledaily.com/new/realtime/3"
##
## [[4]]
## [1] "https://tw.appledaily.com/new/realtime/4"
##
## [[5]]
## [1] "https://tw.appledaily.com/new/realtime/5"
##
## [[6]]
## [1] "https://tw.appledaily.com/new/realtime/6"
##
## [[7]]
## [1] "https://tw.appledaily.com/new/realtime/7"
##
## [[8]]
## [1] "https://tw.appledaily.com/new/realtime/8"
##
## [[9]]
## [1] "https://tw.appledaily.com/new/realtime/9"
##
## [[10]]
## [1] "https://tw.appledaily.com/new/realtime/10"
f <- function(){
}
addNum <- function(a,b){
a + b
}
addNum(3,5)
## [1] 8
multiplyNum <- function(a,b){
a + b
a * b
}
multiplyNum(3,5)
## [1] 15
multiplyNum2 <- function(a,b){
a + b
return(a * b)
}
multiplyNum2(3,5)
## [1] 15
addNum <- function(a,b = 2){
a + b
}
addNum(3)
## [1] 5
addNum(5)
## [1] 7
addNum <- function(a = 3,b = 2){
a * 8 + b * 2
}
addNum()
## [1] 28
addNum(2,3)
## [1] 22
addNum(b=2,a=3)
## [1] 28
addNum <- function(a = 3,b = 2, pineapplepen = 7){
a * 8 + b * 2 + pineapplepen
}
addNum(3,5,2)
## [1] 36
addNum(a = 3,b = 5,pin = 2)
## [1] 36
addNum <- function(a = 2, b = 3){
s <- a+b
s
}
addNum(3,5)
## [1] 8
addNum()
## [1] 5
f <- function(a,b){
a * 2
}
f(3)
## [1] 6
f <- function(a,b){
return(a + b)
}
#f(3)
f <- function(a,b = 3){
return(a+b)
}
f(3)
## [1] 6
?file
f <- file('https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/disease.txt')
article <- readLines(f)
## Warning in readLines(f): 於 'https://raw.githubusercontent.com/ywchiu/
## cdc_course/master/data/disease.txt' 找到不完整的最後一列
#article
close(f)
# split a string by given delimiter
?strsplit
a <- 'this is a book'
strsplit(a, ' ')
## [[1]]
## [1] "this" "is" "a" "book"
#using unlist to convert list to vec
?unlist
b <- list(c(1,2), c(2,3))
b
## [[1]]
## [1] 1 2
##
## [[2]]
## [1] 2 3
unlist(b)
## [1] 1 2 2 3
# count occurence of an element of a vector
?table
a <- c(1,2,1,2,3,3,2)
table(a)
## a
## 1 2 3
## 2 3 2
wordcount <- function(article){
article.split <- strsplit(article, ' ')
article.vec <- unlist(article.split)
tb <- table(article.vec)
sort(tb, decreasing = TRUE)
}
stopwords <- c("a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also","although","always","am","among", "amongst", "amoungst", "amount", "an", "and", "another", "any","anyhow","anyone","anything","anyway", "anywhere", "are", "around", "as", "at", "back","be","became", "because","become","becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom","but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven","else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own","part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thickv", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "the","-")
wordcount <- function(article){
article.split <- strsplit(article, ' ')
article.vec <- unlist(article.split)
tb <- table(article.vec[! article.vec %in% stopwords])
sort(tb, decreasing = TRUE)
}
wordcount(article)
##
## Disease new said X
## 6 6 6 6
## deadly diseases likely pathogen
## 4 4 4 4
## creation disease emerge
## 3 3 3 3
## epidemic health human list
## 3 3 3 3
## Mr virus WHO year
## 3 3 3 3
## adviser alert animals currently
## 2 2 2 2
## Ebola humans international It
## 2 2 2 2
## killed major make people
## 2 2 2 2
## pose public risk Rottingen,
## 2 2 2 2
## scientific scientists This time.
## 2 2 2 2
## use world zoonotic 'Disease
## 2 2 2 1
## [WHO] ‘plug ‘X’ “As
## 1 1 1 1
## “Disease “History “It “It’s
## 1 1 1 1
## “Synthetic “The “These 100-year
## 1 1 1 1
## 11,000 1980s. 2009. 2013
## 1 1 1 1
## 2016. 35 A accident
## 1 1 1 1
## act added added: adding
## 1 1 1 1
## Advances Africa allow allows
## 1 1 1 1
## And animal any, appearing
## 1 1 1 1
## aware before”, believed big
## 1 1 1 1
## biological biology brazen broken
## 1 1 1 1
## bushmeat Canada case cause
## 1 1 1 1
## caused Center change charged
## 1 1 1 1
## chemical chief chimpanzees close
## 1 1 1 1
## colorized come committee. confined
## 1 1 1 1
## contact convenes Council countermeasures
## 1 1 1 1
## create department depiction developed
## 1 1 1 1
## development develops. diagnostic diagnostics”,
## 1 1 1 1
## did digitally disease”. diseases;
## 1 1 1 1
## down. Each early eating
## 1 1 1 1
## Ebola, ecosystem editing electron
## 1 1 1 1
## emergency. entirely Erasmus executive
## 1 1 1 1
## experts Experts far fast”.
## 1 1 1 1
## fears fever, filamentous flexibly
## 1 1 1 1
## flu Fmake frequency gas
## 1 1 1 1
## gene Geneva-based globe greater
## 1 1 1 1
## greatest H1N1 habitats havoc
## 1 1 1 1
## head Health health, heightened
## 1 1 1 1
## high-level HIV, horsepox However,
## 1 1 1 1
## humans”, In include including
## 1 1 1 1
## infections intensity John-Arne jumped
## 1 1 1 1
## jumping jumps just killers
## 1 1 1 1
## knowledge known Koopmans, laboratory.
## 1 1 1 1
## Lassa likely, makes man-made
## 1 1 1 1
## manipulation Marion mean means
## 1 1 1 1
## Medical meeting micrograph million
## 1 1 1 1
## modern monitoring More mystery
## 1 1 1 1
## natural naturally needed, nerve
## 1 1 1 1
## Nigeria, ninth Norway number
## 1 1 1 1
## occuring Organisation organisation, outbreak
## 1 1 1 1
## pandemic panel past plan
## 1 1 1 1
## platforms play’ point population
## 1 1 1 1
## possible, potentially prepare prepare.
## 1 1 1 1
## previous probably process Professor
## 1 1 1 1
## rapid. relative renegade represents
## 1 1 1 1
## research Research resistance risk.”
## 1 1 1 1
## risks, Rotterdam Rottingen Rottingen.
## 1 1 1 1
## safeguarding Salisbury say scanning
## 1 1 1 1
## Scientists seen senior She
## 1 1 1 1
## shows Smallpox sources sparked
## 1 1 1 1
## sparking speed.” spread spread."
## 1 1 1 1
## spreads statement. strange strike
## 1 1 1 1
## sure surveillance sweeping Swine
## 1 1 1 1
## systems taboo technology, tells
## 1 1 1 1
## terms terror. tests. The
## 1 1 1 1
## trade travel underlines unknown
## 1 1 1 1
## vaccines variety Viroscience viruses
## 1 1 1 1
## viruses. vital want warned:
## 1 1 1 1
## watch way We weapons
## 1 1 1 1
## week’s West WHO, wide
## 1 1 1 1
## work workers World wreaked
## 1 1 1 1
## X' X. years zoonosis.
## 1 1 1 1
## Zoonotic
## 1
# install.packages('wordcloud2')
library(wordcloud2)
wordcloud2(wordcount(article))