2

把網路資料放在fl裡,並放在新創的fd中 使用mode='wb'可以讓資料用binary mode被抓下來,這台電腦才不會出現error

fl<-"http://www1.aucegypt.edu/faculty/hadi/RABE5/Data5/P005.txt"
fd<-"C:/Users/user/Dropbox/1062-Data_manage/0319/P005.txt"
download.file(fl,destfile = fd,mode='wb')
data.table::fread(fd,fill=TRUE)
##                      City COL   PD URate     Pop Taxes Income RTWL
##  1:               Atlanta 169  414  13.6 1790128  5128   2961    1
##  2:                Austin 143  239  11.0  396891  4303   1711    1
##  3:           Bakersfield 339   43  23.7  349874  4166   2122    0
##  4:             Baltimore 173  951  21.0 2147850  5001   4654    0
##  5:           Baton Rouge  99  255  16.0  411725  3965   1620    1
##  6:                Boston 363 1257  24.4 3914071  4928   5634    0
##  7:               Buffalo 253  834  39.2 1326848  4471   7213    0
##  8:      Champaign-Urbana 117  162  31.5  162304  4813   5535    0
##  9:          Cedar Rapids 294  229  18.2  164145  4839   7224    1
## 10:               Chicago 291 1886  31.5 7015251  5408   6113    0
## 11:            Cincinnati 170  643  29.5 1381196  4637   4806    0
## 12:             Cleveland 239 1295  29.5 1966725  5138   6432    0
## 13:                 Dalas 174  302  11.0 2527224  4923   2363    1
## 14:                Dayton 183  489  29.5  835708  4787   5606    0
## 15:                Denvar 227  304  15.2 1413318  5386   5982    0
## 16:               Detriot 255 1130  34.6 4424382  5246   6275    0
## 17:             Green Bay 249  323  27.8  169467  4289   8214    0
## 18:              Hartford 326  696  21.9 1062565  5134   6235    0
## 19:               Houston 194  337  11.0 2286247  5084   1278    1
## 20:          Indianapolis 251  371  29.3 1138753  4837   5699    0
## 21:           Kansas City 201  386  30.0 1290110  5052   4868    0
## 22:         Lancaster, PA 124  362  34.2  342797  4377   5205    0
## 23:           Los Angeles 340 1717  23.7 6986898  5281   1349    0
## 24:             Milwaukee 328  968  27.8 1409363  5176   7635    0
## 25: Minneapolis, St. Paul 265  433  24.4 2010841  5206   8392    0
## 26:             Nashville 120  183  17.7  748493  4454   3578    1
## 27:              New York 323 6908  39.2 9561089  5260   4862    0
## 28:               Orlando 117  230  11.7  582664  4613    782    1
## 29:          Philadelphia 182 1353  34.2 4807001  4877   5144    0
## 30:            Pittsburgh 169  762  34.2 2322224  4677   5987    0
## 31:              Portland 267  201  23.1  228417  4123   7511    0
## 32:             St. Louis 184  480  30.0 2366542  4721   4809    0
## 33:             San Diego 256  372  23.7 1584583  4837   1458    0
## 34:         San Francisco 381 1266  23.7 3140306  5940   3015    0
## 35:               Seattle 195  333  33.1 1406746  5416   4424    0
## 36:            Washington 205 1073  21.0 3021801  6404   4224    0
## 37:               Wichita 206  157  12.8  384920  4796   4620    1
## 38:        Raleigh-Durham 126  302   6.5  468512  4614   3393    1
##                      City COL   PD URate     Pop Taxes Income RTWL

3

使用read.delim讀進下載好的juniorschool.txt,會先下載好是因為老師的網站有需要輸入密碼,我怕這樣子沒辦法直接從網站上下載,所以才會手動下載下來。

jsp <- read.delim("C:/Users/user/Dropbox/1062-Data_manage/0319/juniorschool.txt")
knitr::kable(head(jsp))
school class sex soc ravens pupil english math year
S1 C1 G 9 23 P1 72 23 0
S1 C1 G 9 23 P1 80 24 1
S1 C1 G 9 23 P1 39 23 2
S1 C1 B 2 15 P2 7 14 0
S1 C1 B 2 15 P2 17 11 1
S1 C1 B 2 22 P3 88 36 0

參考Stack overflow的Code更改sex為Gender

names(jsp)[names(jsp) == 'sex'] <- 'Gender'
knitr::kable(head(jsp))
school class Gender soc ravens pupil english math year
S1 C1 G 9 23 P1 72 23 0
S1 C1 G 9 23 P1 80 24 1
S1 C1 G 9 23 P1 39 23 2
S1 C1 B 2 15 P2 7 14 0
S1 C1 B 2 15 P2 17 11 1
S1 C1 B 2 22 P3 88 36 0

更改social class的變數名稱後,畫散布圖

jsp$soc <- factor(jsp$soc,1:9,c("I","II","III_0man","III_man","IV","V","VI_Unemp_L","VII_emp_NC","VII_Miss_Dad"))
plot(jsp$soc,jsp$math)

Write the edited jsp. data object out as a comma-separated-value file to a data folder

write.csv(jsp,file="C:/Users/user/Dropbox/1062-Data_manage/0319/jsp.txt",quote=F,row.names = F)

4

先使用fwf_empty看每欄裡佔多少格,然後使用read_fwf讀進資料,再把資料中的*用NA取代

readr::fwf_empty("C:/Users/user/Dropbox/1062-Data_manage/0319/AAUP.txt")[1:2]
## $begin
##  [1]  0  6 40 45 49 53 57 61 66 70 74 79 83 87 92 95
## 
## $end
##  [1]  5 39 43 48 52 56 60 65 69 73 78 82 86 90 94 NA
AAUP <- read_fwf("C:/Users/user/Dropbox/1062-Data_manage/0319/AAUP.txt",skip=0,fwf_cols(v1=6,v2=31,v22=3,v3=4,v4=4,v5=4,v6=4,v7=4,v8=5,v9=4,v10=4,v11=5,v12=4,v13=4,v14=4,v15=4,v16=5))
AAUP <- mutate_all(AAUP, funs(replace(., .=='*', NA)))
AAUP[38,]
## # A tibble: 1 x 17
##      v1 v2     v22   v3    v4    v5    v6       v7 v8    v9    v10     v11
##   <int> <chr>  <chr> <chr> <chr> <chr> <chr> <int> <chr> <chr> <chr> <int>
## 1  1106 Willi~ AR    IIB   <NA>  <NA>  264     266 <NA>  <NA>  327     331
## # ... with 5 more variables: v12 <int>, v13 <int>, v14 <int>, v15 <int>,
## #   v16 <int>

5

首先把資料下載後解壓縮

fl<-"C:/Users/user/Dropbox/1062-Data_manage/0319/Subject1.zip"
unzip(fl,exdir="Subject1")

把四個.dat檔抓到R裡面

fls<-list.files(path="C:/Users/user/Dropbox/1062-Data_manage/0319/Subject1/Subject1",all.files = T,no.. = T)
fl<-paste0("C:/Users/user/Dropbox/1062-Data_manage/0319/Subject1/Subject1/",fls)
dta<-lapply(fl,read.csv,sep="\t",header=T,skip=1) %>% bind_rows
str(dta<-as.data.frame(dta))
## 'data.frame':    1804 obs. of  31 variables:
##  $ X.......F7.: num  -0.9733 -0.7079 -0.3732 -0.0225 0.3523 ...
##  $ X......FT7.: num  -1.007 -1.022 -0.981 -0.878 -0.71 ...
##  $ X.......T7.: num  -0.1834 -0.1705 -0.1544 -0.1239 -0.0611 ...
##  $ X......TP7.: num  -1.05 -1.14 -1.18 -1.09 -0.88 ...
##  $ X.......P7.: num  -0.705 -0.791 -0.821 -0.817 -0.697 ...
##  $ X......Fp1.: num  -1.15 -1.084 -1.007 -0.922 -0.817 ...
##  $ X.......F3.: num  -1.042 -1.017 -0.956 -0.864 -0.748 ...
##  $ X......FC3.: num  -0.521 -0.52 -0.499 -0.457 -0.36 ...
##  $ X.......C3.: num  -0.248 -0.264 -0.249 -0.204 -0.14 ...
##  $ X......CP3.: num  -0.0064 0.0064 0.0595 0.1351 0.2059 ...
##  $ X.......P3.: num  0.235 0.244 0.304 0.385 0.492 ...
##  $ X.......O1.: num  0.623 0.484 0.389 0.31 0.269 ...
##  $ X.......Fz.: num  -0.362 -0.301 -0.236 -0.177 -0.138 ...
##  $ X......FCz.: num  0.203 0.224 0.249 0.285 0.293 ...
##  $ X.......Cz.: num  0.322 0.275 0.22 0.195 0.167 ...
##  $ X......CPz.: num  0.529 0.462 0.441 0.433 0.404 ...
##  $ X.......Pz.: num  0.0692 -0.0322 -0.0949 -0.1078 -0.1046 ...
##  $ X.......Oz.: num  0.922 0.821 0.758 0.724 0.726 ...
##  $ X......Fp2.: num  -1.234 -1.121 -0.931 -0.706 -0.516 ...
##  $ X.......F4.: num  -0.689 -0.643 -0.579 -0.532 -0.52 ...
##  $ X......FC4.: num  0.756 0.753 0.75 0.735 0.677 ...
##  $ X.......C4.: num  0.793 0.714 0.656 0.602 0.536 ...
##  $ X......CP4.: num  0.571 0.631 0.66 0.648 0.59 ...
##  $ X.......P4.: num  0.956 0.933 0.917 0.896 0.821 ...
##  $ X.......O2.: num  0.909 0.763 0.645 0.537 0.47 ...
##  $ X.......F8.: num  0.0611 0.1673 0.2461 0.2687 0.2059 ...
##  $ X......FT8.: num  -0.3459 -0.2864 -0.1657 -0.0322 0.0676 ...
##  $ X.......T8.: num  0.507 0.809 1.097 1.35 1.511 ...
##  $ X......TP8.: num  1.44 1.82 2.16 2.41 2.53 ...
##  $ X.......P8.: num  1.45 1.47 1.47 1.42 1.34 ...
##  $ X          : logi  NA NA NA NA NA NA ...
#刪掉多出來的那一行
dta<-dta[-31]

修改變項名稱並畫圖

namelist<-names(dta) %>% sub("X......","",.) 
namelist<-names(dta) %>% sub("[.]","",.)
colnames(dta)<-namelist
dta2<-dta %>% mutate(ID=1:1804) %>% gather(key="Attribute",value="score",1:30) %>% mutate(Attribute_f=as.factor(Attribute))
ggplot(dta2,aes(Attribute_f,score))+geom_boxplot()+labs(x="腦位置",y="毫秒")

6

讀進資料後畫圖

readr::fwf_empty("C:/Users/user/Dropbox/1062-Data_manage/0319/city.txt")[1:2]
## $begin
## [1]  0 19
## 
## $end
## [1] 17 NA
city <- read_fwf("C:/Users/user/Dropbox/1062-Data_manage/0319/city.txt",skip=0,fwf_cols(cityname=18,density=10))
## Parsed with column specification:
## cols(
##   cityname = col_character(),
##   density = col_number()
## )
#讓city結構簡單一點
city<-city[,1:2]
#讓cityname是factor來畫圖
city$cityname<-factor(city$cityname)
plot(city$cityname,city$density)

7

schiz <- read.csv("C:/Users/user/Dropbox/1062-Data_manage/0319/schiz.csv",,header=F)
schiz2 <- schiz %>% 
  mutate(schiztab = c(rep("non", 11), rep("schiz", 6)), id = 1:17) %>% 
  gather(key = "measure", value = "ms", 1:30) %>% 
  mutate(measure = as.factor(measure))
plot(schiz2$id,schiz2$ms)

用anova檢驗正常人與患者間是否有差異,發現存在顯著差異。

summary(aov(ms ~ schiztab,data=schiz2))
##              Df   Sum Sq Mean Sq F value Pr(>F)    
## schiztab      1  4506212 4506212   166.5 <2e-16 ***
## Residuals   508 13751799   27070                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

8

Make a barplot to dispaly the number of students from each major.

#看資料的樣子
roster<-read.csv("C:/Users/user/Dropbox/1062-Data_manage/0319/ncku_roster.csv",skip=1,header=T,fileEncoding = "big5")
colnames(roster)<-c("id","major","code","ID","name","NA","time")
roster <- roster %>% 
  separate(2, into = c("major", "info"), sep = " ") %>% 
  mutate(major = as.factor(major))
head(roster)
##   id  major info  code      ID name NA                  time
## 1  1 心理系      U7031 D840239   蘇 NA 02/17/2016 09:17:40  
## 2  2 心理系      U7031 D840057   吳 NA 02/17/2016 09:17:28  
## 3  3 心理系      U7031 D841311   余 NA 02/17/2016 09:09:10  
## 4  4 心理系      U7031 D840140   王 NA 02/17/2016 09:09:34  
## 5  5 教育所      U3006 U360098   劉 NA 01/18/2016 14:56:35  
## 6  6 教育所      U3006 U380416   陳 NA 01/25/2016 16:01:08
barplot(table(roster$major))

9

Search the PubMed for the numbers of articles published related to “deep learning” from year 2001 to year 2017. Plot the results

pacman::p_load(RISmed)
Span <- 2001:2017
Tally <- sapply(Span, 
                function(i) {
                  QueryCount(EUtilsSummary('deep learning',
                             type = 'esearch',db = 'pubmed',
                             mindate = i, maxdate = i))
                }
)
names(Tally) <- Span
barplot(Tally, las = 2, ylab = "Number of Articles")