pacman::p_load(readr, tidyr, ggplot2, dplyr)

Exercises-06

fL <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/cities10.txt")

fwf_empty(fL)[1:2]
## $begin
## [1]  0 19
## 
## $end
## [1] 17 NA
head(dta <- read.fwf(fL, width = c(19, 8), col.names = c("city", "population"), n = 10), 10)
##                   city population
## 1  New York, NY          66,834.6
## 2  Kings, NY             34,722.9
## 3  Bronx, NY             31,729.8
## 4  Queens, NY            20,453.0
## 5  San Francisco, CA     16,526.2
## 6  Hudson, NJ            12,956.9
## 7  Suffolk, MA           11,691.6
## 8  Philadelphia, PA      11,241.1
## 9  Washington, DC         9,378.0
## 10 Alexandria IC, VA      8,552.2
ex6_dta <- dta %>% separate(city, into = c("city", "state"), sep = ",") %>% 
  mutate(city = factor(city),
         population = as.numeric(population),
         state = as.factor(gsub(" ", "", state)))
ggplot(ex6_dta, aes(reorder(city, -population), population, fill = state))+
  geom_bar(stat="identity")+
  labs(x = "city", y = "population")

Exercises-07

fL <- "http://www.stat.columbia.edu/~gelman/book/data/schiz.asc"
dta <- read.table(fL, skip = 4, col.names = paste0("T", 01:30))
head(dta)
##    T1  T2  T3  T4  T5  T6  T7  T8  T9 T10 T11 T12 T13 T14 T15 T16 T17 T18
## 1 312 272 350 286 268 328 298 356 292 308 296 372 396 402 280 330 254 282
## 2 354 346 384 342 302 312 322 376 306 402 320 298 308 414 304 422 388 422
## 3 256 284 320 274 324 268 370 430 314 312 362 256 342 388 302 366 298 396
## 4 260 294 306 292 264 290 272 268 344 362 330 280 354 320 334 276 418 288
## 5 204 272 250 260 314 308 246 236 208 268 272 264 308 236 238 350 272 252
## 6 590 312 286 310 778 364 318 316 316 298 344 262 274 330 312 310 376 326
##   T19 T20 T21 T22 T23 T24 T25 T26 T27 T28 T29 T30
## 1 350 328 332 308 292 258 340 242 306 328 294 272
## 2 426 338 332 426 478 372 392 374 430 388 354 368
## 3 274 226 328 274 258 220 236 272 322 284 274 356
## 4 338 350 350 324 286 322 280 256 218 256 220 356
## 5 252 236 306 238 350 206 260 280 274 318 268 210
## 6 346 334 282 292 282 300 290 302 300 306 294 444
ex07_dta <- dta %>% 
  mutate(schizoid = c(rep("Y", 11), rep("N", 6)), id = 1:17) %>% 
  gather(key = "measure", value = "ms", 1:30) %>% 
  mutate(measure = as.factor(measure))
aggregate(ms ~ schizoid, ex07_dta, mean)
##   schizoid       ms
## 1        N 506.8667
## 2        Y 310.1697
aggregate(ms ~ schizoid, ex07_dta, sd)
##   schizoid       ms
## 1        N 262.8473
## 2        Y  64.8805
ggplot(ex07_dta, aes(measure, ms, color = schizoid))+
  coord_flip()+
  geom_boxplot()

m0 <- lm(ms ~ measure + schizoid, data = ex07_dta)
anova(m0)
## Analysis of Variance Table
## 
## Response: ms
##            Df   Sum Sq Mean Sq  F value Pr(>F)    
## measure    29   638735   22025   0.8046 0.7571    
## schizoid    1  4506212 4506212 164.6050 <2e-16 ***
## Residuals 479 13113064   27376                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Exercises-08

fL <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/ncku_roster.csv")
headers <- read.csv(fL, header = F, nrows = 1, as.is = T)
ex08_dta <- read.csv(fL, skip = 2, header = F)
colnames(ex08_dta) <- headers
N_ex08_dta <- ex08_dta %>%
  separate(2, into = c("Department", "info"), sep = " ") %>% 
  mutate(Department = as.factor(Department))
## Warning: Expected 2 pieces. Additional pieces discarded in 15 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].
ggplot(N_ex08_dta, aes(Department, ..count..))+
  geom_bar()+
  labs(x = "Department", y = "Count", subtitle = "105-2資料管理選課名單")

Exercises-09

pacman::p_load(RISmed)
Span <- 2001:2017
Tally <- sapply(Span, 
                function(i) {
                  QueryCount(EUtilsSummary('deep learning',
                             type = 'esearch',db = 'pubmed',
                             mindate = i, maxdate = i))
                }
)
names(Tally) <- Span
barplot(Tally, las = 2, ylim = c(0, 2000), main = "", ylab = "Number of Articles")