EX2

head(dta2 <- read.table("http://www1.aucegypt.edu/faculty/hadi/RABE5/Data5/P005.txt", sep = "\t", h = T))
##          City COL   PD URate     Pop Taxes Income RTWL
## 1     Atlanta 169  414  13.6 1790128  5128   2961    1
## 2      Austin 143  239  11.0  396891  4303   1711    1
## 3 Bakersfield 339   43  23.7  349874  4166   2122    0
## 4   Baltimore 173  951  21.0 2147850  5001   4654    0
## 5 Baton Rouge  99  255  16.0  411725  3965   1620    1
## 6      Boston 363 1257  24.4 3914071  4928   5634    0

EX3

head(dta <- read.table(paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/juniorSchools.txt"), header = T))
##   school class sex soc ravens pupil english math year
## 1     S1    C1   G   9     23    P1      72   23    0
## 2     S1    C1   G   9     23    P1      80   24    1
## 3     S1    C1   G   9     23    P1      39   23    2
## 4     S1    C1   B   2     15    P2       7   14    0
## 5     S1    C1   B   2     15    P2      17   11    1
## 6     S1    C1   B   2     22    P3      88   36    0
#EX3-1
names(dta)[3] <- "Gender"
colnames(dta)
## [1] "school"  "class"   "Gender"  "soc"     "ravens"  "pupil"   "english"
## [8] "math"    "year"
#EX3-2
dta$new_soc <- factor(dta$soc, 1:9, c("I", "II", "III_0man", "III_man", "IV", "V", "VI_Unemp_L", "VII_emp_NC", "VIII_Miss_Dad"))
levels(dta$new_soc)
## [1] "I"             "II"            "III_0man"      "III_man"      
## [5] "IV"            "V"             "VI_Unemp_L"    "VII_emp_NC"   
## [9] "VIII_Miss_Dad"
plot(x = dta$new_soc, y = dta$math, ylab = "Math",xlab = "SOC")

#EX3-3
write.csv(dta, "1070319EX03HW.csv")

EX4

library(readr)
fL <- "http://www.amstat.org/publications/jse/datasets/aaup2.dat.txt"
readr::fwf_empty(fL)[1:2]
## $begin
##  [1]  0  6 40 45 49 53 57 61 66 70 74 79 83 87 92 95
## 
## $end
##  [1]  5 39 43 48 52 56 60 65 69 73 78 82 86 90 94 NA
head(dta2 <- read_fwf(fL,fwf_cols(ID = 5, Uni = 32, var3 = 3, var4 = 4, var5 = 4, var6 = 4, var7 = 4, var8 = 5, var9 = 4, var10 = 4, var11 = 5, var12 = 4, var13 = 4, var14 = 4, var15 = 4, var16 = 4, var17 = 5), na = c("*", "NA")))
## Parsed with column specification:
## cols(
##   ID = col_integer(),
##   Uni = col_character(),
##   var3 = col_character(),
##   var4 = col_character(),
##   var5 = col_integer(),
##   var6 = col_integer(),
##   var7 = col_integer(),
##   var8 = col_integer(),
##   var9 = col_integer(),
##   var10 = col_integer(),
##   var11 = col_integer(),
##   var12 = col_integer(),
##   var13 = col_integer(),
##   var14 = col_integer(),
##   var15 = col_integer(),
##   var16 = col_integer(),
##   var17 = col_integer()
## )
## # A tibble: 6 x 17
##      ID Uni    var3  var4   var5  var6  var7  var8  var9 var10 var11 var12
##   <int> <chr>  <chr> <chr> <int> <int> <int> <int> <int> <int> <int> <int>
## 1  1061 Alask~ AK    IIB     454   382   362   382   567   485   471   487
## 2  1063 Univ.~ AK    I       686   560   432   508   914   753   572   677
## 3  1065 Univ.~ AK    IIA     533   494   329   415   716   663   442   559
## 4 11462 Univ.~ AK    IIA     612   507   414   498   825   681   557   670
## 5  1002 Alaba~ AL    IIA     442   369   310   350   530   444   376   423
## 6  1004 Unive~ AL    IIA     441   385   310   388   542   473   383   477
## # ... with 5 more variables: var13 <int>, var14 <int>, var15 <int>,
## #   var16 <int>, var17 <int>

EX5

pacman::p_load(ggplot2, data.table, dplyr, magrittr, tidyr )
fL <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/Subject1.zip")
download.file(fL, "Subject1.zip", mode = "wb")
unzip("Subject1.zip")
fLs <- paste0("Subject1/", 1:4, sep = "w.dat")
dta <- lapply(fLs, read.table, header = TRUE, skip = 1, sep = "\t") %>% rbindlist %>% select(-X)
names(dta) <- gsub("X......", "", names(dta))
names(dta) <- gsub("[.]", "", names(dta))
new_dta <- dta %>% mutate(ID = 1:1804) %>% gather(key = "Attribute", value = "score", 1:30) %>% mutate(Attribute_f = as.factor(Attribute))
ggplot(new_dta, aes(Attribute_f, score))+ geom_boxplot()+labs(x = "腦位置", y = "毫秒")

EX6

fL <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/cities10.txt")
fwf_empty(fL)[1:2]
## $begin
## [1]  0 19
## 
## $end
## [1] 17 NA
head(dta <- read.fwf(fL, width = c(19, 8), col.names = c("city", "population"), n = 10))
##                  city population
## 1 New York, NY          66,834.6
## 2 Kings, NY             34,722.9
## 3 Bronx, NY             31,729.8
## 4 Queens, NY            20,453.0
## 5 San Francisco, CA     16,526.2
## 6 Hudson, NJ            12,956.9
new_dta <- dta %>% separate(city, into = c("city", "state"), sep = ",") %>% 
  mutate(city = factor(city),
         population = as.numeric(population),
         state = as.factor(gsub(" ", "", state)))
ggplot(new_dta, aes(reorder(city, population), population, fill = state))+
  geom_bar(stat="identity")+
  labs(x = "city", y = "population")

EX7

fL <- "http://www.stat.columbia.edu/~gelman/book/data/schiz.asc"
head(dta <- read.table(fL, skip = 4, col.names = paste0("T", 01:30)))
##    T1  T2  T3  T4  T5  T6  T7  T8  T9 T10 T11 T12 T13 T14 T15 T16 T17 T18
## 1 312 272 350 286 268 328 298 356 292 308 296 372 396 402 280 330 254 282
## 2 354 346 384 342 302 312 322 376 306 402 320 298 308 414 304 422 388 422
## 3 256 284 320 274 324 268 370 430 314 312 362 256 342 388 302 366 298 396
## 4 260 294 306 292 264 290 272 268 344 362 330 280 354 320 334 276 418 288
## 5 204 272 250 260 314 308 246 236 208 268 272 264 308 236 238 350 272 252
## 6 590 312 286 310 778 364 318 316 316 298 344 262 274 330 312 310 376 326
##   T19 T20 T21 T22 T23 T24 T25 T26 T27 T28 T29 T30
## 1 350 328 332 308 292 258 340 242 306 328 294 272
## 2 426 338 332 426 478 372 392 374 430 388 354 368
## 3 274 226 328 274 258 220 236 272 322 284 274 356
## 4 338 350 350 324 286 322 280 256 218 256 220 356
## 5 252 236 306 238 350 206 260 280 274 318 268 210
## 6 346 334 282 292 282 300 290 302 300 306 294 444
new_dta <- dta %>% 
  mutate(schizoid = c(rep("Y", 11), rep("N", 6)), id = 1:17) %>% 
  gather(key = "measure", value = "ms", 1:30) %>% 
  mutate(measure = as.factor(measure))
aggregate (ms ~ schizoid, new_dta, mean)
##   schizoid       ms
## 1        N 506.8667
## 2        Y 310.1697
aggregate(ms ~ schizoid, new_dta, sd)
##   schizoid       ms
## 1        N 262.8473
## 2        Y  64.8805
ggplot(new_dta, aes(measure, ms, color = schizoid))+
  coord_flip()+
  geom_boxplot()

anova( m0 <- lm(ms ~ measure + schizoid, data = new_dta) )
## Analysis of Variance Table
## 
## Response: ms
##            Df   Sum Sq Mean Sq  F value Pr(>F)    
## measure    29   638735   22025   0.8046 0.7571    
## schizoid    1  4506212 4506212 164.6050 <2e-16 ***
## Residuals 479 13113064   27376                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

EX8

fL <- paste0("http://", IDPW , "140.116.183.121/~sheu/dataM/Data/ncku_roster.csv")
headers <- read.csv(fL, header = F, nrows = 1, as.is = T)
dta <- read.csv(fL, skip = 2, header = F)
colnames(dta) <- headers
new_dta <- dta %>% separate(2, into = c("major", "info"), sep = " ") %>% mutate(major = as.factor(major))
## Warning: Expected 2 pieces. Additional pieces discarded in 15 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].
ggplot(new_dta, aes(major, ..count..))+ geom_bar()+ labs(x = "修課學生系所", y = "人數")

EX9

pacman::p_load(RISmed)
## Installing package into 'C:/Users/123/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## package 'RISmed' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\123\AppData\Local\Temp\RtmpsjwJUy\downloaded_packages
## 
## RISmed installed
Span <- 2001:2017
Tally <- sapply(Span, 
                function(i) {
                  QueryCount(EUtilsSummary('deep learning',
                             type = 'esearch',db = 'pubmed',
                             mindate = i, maxdate = i))
                }
)
names(Tally) <- Span
barplot(Tally, las = 2, ylim = c(0, 1500), main = "", ylab = "Number of Articles")