pacman::p_load(readr, tidyr, ggplot2, dplyr)
Exercises-06
fL <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/cities10.txt")
fwf_empty(fL)[1:2]
## $begin
## [1] 0 19
##
## $end
## [1] 17 NA
head(dta <- read.fwf(fL, width = c(19, 8), col.names = c("city", "population"), n = 10), 10)
## city population
## 1 New York, NY 66,834.6
## 2 Kings, NY 34,722.9
## 3 Bronx, NY 31,729.8
## 4 Queens, NY 20,453.0
## 5 San Francisco, CA 16,526.2
## 6 Hudson, NJ 12,956.9
## 7 Suffolk, MA 11,691.6
## 8 Philadelphia, PA 11,241.1
## 9 Washington, DC 9,378.0
## 10 Alexandria IC, VA 8,552.2
ex6_dta <- dta %>% separate(city, into = c("city", "state"), sep = ",") %>%
mutate(city = factor(city),
population = as.numeric(population),
state = as.factor(gsub(" ", "", state)))
ggplot(ex6_dta, aes(reorder(city, -population), population, fill = state))+
geom_bar(stat="identity")+
labs(x = "city", y = "population")

Exercises-07
fL <- "http://www.stat.columbia.edu/~gelman/book/data/schiz.asc"
dta <- read.table(fL, skip = 4, col.names = paste0("T", 01:30))
head(dta)
## T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14 T15 T16 T17 T18
## 1 312 272 350 286 268 328 298 356 292 308 296 372 396 402 280 330 254 282
## 2 354 346 384 342 302 312 322 376 306 402 320 298 308 414 304 422 388 422
## 3 256 284 320 274 324 268 370 430 314 312 362 256 342 388 302 366 298 396
## 4 260 294 306 292 264 290 272 268 344 362 330 280 354 320 334 276 418 288
## 5 204 272 250 260 314 308 246 236 208 268 272 264 308 236 238 350 272 252
## 6 590 312 286 310 778 364 318 316 316 298 344 262 274 330 312 310 376 326
## T19 T20 T21 T22 T23 T24 T25 T26 T27 T28 T29 T30
## 1 350 328 332 308 292 258 340 242 306 328 294 272
## 2 426 338 332 426 478 372 392 374 430 388 354 368
## 3 274 226 328 274 258 220 236 272 322 284 274 356
## 4 338 350 350 324 286 322 280 256 218 256 220 356
## 5 252 236 306 238 350 206 260 280 274 318 268 210
## 6 346 334 282 292 282 300 290 302 300 306 294 444
ex07_dta <- dta %>%
mutate(schizoid = c(rep("Y", 11), rep("N", 6)), id = 1:17) %>%
gather(key = "measure", value = "ms", 1:30) %>%
mutate(measure = as.factor(measure))
aggregate(ms ~ schizoid, ex07_dta, mean)
## schizoid ms
## 1 N 506.8667
## 2 Y 310.1697
aggregate(ms ~ schizoid, ex07_dta, sd)
## schizoid ms
## 1 N 262.8473
## 2 Y 64.8805
ggplot(ex07_dta, aes(measure, ms, color = schizoid))+
coord_flip()+
geom_boxplot()

m0 <- lm(ms ~ measure + schizoid, data = ex07_dta)
anova(m0)
## Analysis of Variance Table
##
## Response: ms
## Df Sum Sq Mean Sq F value Pr(>F)
## measure 29 638735 22025 0.8046 0.7571
## schizoid 1 4506212 4506212 164.6050 <2e-16 ***
## Residuals 479 13113064 27376
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Exercises-08
fL <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/ncku_roster.csv")
headers <- read.csv(fL, header = F, nrows = 1, as.is = T)
ex08_dta <- read.csv(fL, skip = 2, header = F)
colnames(ex08_dta) <- headers
N_ex08_dta <- ex08_dta %>%
separate(2, into = c("Department", "info"), sep = " ") %>%
mutate(Department = as.factor(Department))
## Warning: Expected 2 pieces. Additional pieces discarded in 15 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].
ggplot(N_ex08_dta, aes(Department, ..count..))+
geom_bar()+
labs(x = "Department", y = "Count", subtitle = "105-2資料管理選課名單")

Exercises-09
pacman::p_load(RISmed)
Span <- 2001:2017
Tally <- sapply(Span,
function(i) {
QueryCount(EUtilsSummary('deep learning',
type = 'esearch',db = 'pubmed',
mindate = i, maxdate = i))
}
)
names(Tally) <- Span
barplot(Tally, las = 2, ylim = c(0, 2000), main = "", ylab = "Number of Articles")
