2
把網路資料放在fl裡,並放在新創的fd中 使用mode='wb'可以讓資料用binary mode被抓下來,這台電腦才不會出現error
fl<-"http://www1.aucegypt.edu/faculty/hadi/RABE5/Data5/P005.txt"
fd<-"C:/Users/user/Dropbox/1062-Data_manage/0319/P005.txt"
download.file(fl,destfile = fd,mode='wb')
data.table::fread(fd,fill=TRUE)## City COL PD URate Pop Taxes Income RTWL
## 1: Atlanta 169 414 13.6 1790128 5128 2961 1
## 2: Austin 143 239 11.0 396891 4303 1711 1
## 3: Bakersfield 339 43 23.7 349874 4166 2122 0
## 4: Baltimore 173 951 21.0 2147850 5001 4654 0
## 5: Baton Rouge 99 255 16.0 411725 3965 1620 1
## 6: Boston 363 1257 24.4 3914071 4928 5634 0
## 7: Buffalo 253 834 39.2 1326848 4471 7213 0
## 8: Champaign-Urbana 117 162 31.5 162304 4813 5535 0
## 9: Cedar Rapids 294 229 18.2 164145 4839 7224 1
## 10: Chicago 291 1886 31.5 7015251 5408 6113 0
## 11: Cincinnati 170 643 29.5 1381196 4637 4806 0
## 12: Cleveland 239 1295 29.5 1966725 5138 6432 0
## 13: Dalas 174 302 11.0 2527224 4923 2363 1
## 14: Dayton 183 489 29.5 835708 4787 5606 0
## 15: Denvar 227 304 15.2 1413318 5386 5982 0
## 16: Detriot 255 1130 34.6 4424382 5246 6275 0
## 17: Green Bay 249 323 27.8 169467 4289 8214 0
## 18: Hartford 326 696 21.9 1062565 5134 6235 0
## 19: Houston 194 337 11.0 2286247 5084 1278 1
## 20: Indianapolis 251 371 29.3 1138753 4837 5699 0
## 21: Kansas City 201 386 30.0 1290110 5052 4868 0
## 22: Lancaster, PA 124 362 34.2 342797 4377 5205 0
## 23: Los Angeles 340 1717 23.7 6986898 5281 1349 0
## 24: Milwaukee 328 968 27.8 1409363 5176 7635 0
## 25: Minneapolis, St. Paul 265 433 24.4 2010841 5206 8392 0
## 26: Nashville 120 183 17.7 748493 4454 3578 1
## 27: New York 323 6908 39.2 9561089 5260 4862 0
## 28: Orlando 117 230 11.7 582664 4613 782 1
## 29: Philadelphia 182 1353 34.2 4807001 4877 5144 0
## 30: Pittsburgh 169 762 34.2 2322224 4677 5987 0
## 31: Portland 267 201 23.1 228417 4123 7511 0
## 32: St. Louis 184 480 30.0 2366542 4721 4809 0
## 33: San Diego 256 372 23.7 1584583 4837 1458 0
## 34: San Francisco 381 1266 23.7 3140306 5940 3015 0
## 35: Seattle 195 333 33.1 1406746 5416 4424 0
## 36: Washington 205 1073 21.0 3021801 6404 4224 0
## 37: Wichita 206 157 12.8 384920 4796 4620 1
## 38: Raleigh-Durham 126 302 6.5 468512 4614 3393 1
## City COL PD URate Pop Taxes Income RTWL
3
使用read.delim讀進下載好的juniorschool.txt,會先下載好是因為老師的網站有需要輸入密碼,我怕這樣子沒辦法直接從網站上下載,所以才會手動下載下來。
jsp <- read.delim("C:/Users/user/Dropbox/1062-Data_manage/0319/juniorschool.txt")
knitr::kable(head(jsp))| school | class | sex | soc | ravens | pupil | english | math | year |
|---|---|---|---|---|---|---|---|---|
| S1 | C1 | G | 9 | 23 | P1 | 72 | 23 | 0 |
| S1 | C1 | G | 9 | 23 | P1 | 80 | 24 | 1 |
| S1 | C1 | G | 9 | 23 | P1 | 39 | 23 | 2 |
| S1 | C1 | B | 2 | 15 | P2 | 7 | 14 | 0 |
| S1 | C1 | B | 2 | 15 | P2 | 17 | 11 | 1 |
| S1 | C1 | B | 2 | 22 | P3 | 88 | 36 | 0 |
參考Stack overflow的Code更改sex為Gender
names(jsp)[names(jsp) == 'sex'] <- 'Gender'
knitr::kable(head(jsp))| school | class | Gender | soc | ravens | pupil | english | math | year |
|---|---|---|---|---|---|---|---|---|
| S1 | C1 | G | 9 | 23 | P1 | 72 | 23 | 0 |
| S1 | C1 | G | 9 | 23 | P1 | 80 | 24 | 1 |
| S1 | C1 | G | 9 | 23 | P1 | 39 | 23 | 2 |
| S1 | C1 | B | 2 | 15 | P2 | 7 | 14 | 0 |
| S1 | C1 | B | 2 | 15 | P2 | 17 | 11 | 1 |
| S1 | C1 | B | 2 | 22 | P3 | 88 | 36 | 0 |
更改social class的變數名稱後,畫散布圖
jsp$soc <- factor(jsp$soc,1:9,c("I","II","III_0man","III_man","IV","V","VI_Unemp_L","VII_emp_NC","VII_Miss_Dad"))
plot(jsp$soc,jsp$math)Write the edited jsp. data object out as a comma-separated-value file to a data folder
write.csv(jsp,file="C:/Users/user/Dropbox/1062-Data_manage/0319/jsp.txt",quote=F,row.names = F)4
先使用fwf_empty看每欄裡佔多少格,然後使用read_fwf讀進資料,再把資料中的*用NA取代
readr::fwf_empty("C:/Users/user/Dropbox/1062-Data_manage/0319/AAUP.txt")[1:2]## $begin
## [1] 0 6 40 45 49 53 57 61 66 70 74 79 83 87 92 95
##
## $end
## [1] 5 39 43 48 52 56 60 65 69 73 78 82 86 90 94 NA
AAUP <- read_fwf("C:/Users/user/Dropbox/1062-Data_manage/0319/AAUP.txt",skip=0,fwf_cols(v1=6,v2=31,v22=3,v3=4,v4=4,v5=4,v6=4,v7=4,v8=5,v9=4,v10=4,v11=5,v12=4,v13=4,v14=4,v15=4,v16=5))
AAUP <- mutate_all(AAUP, funs(replace(., .=='*', NA)))
AAUP[38,]## # A tibble: 1 x 17
## v1 v2 v22 v3 v4 v5 v6 v7 v8 v9 v10 v11
## <int> <chr> <chr> <chr> <chr> <chr> <chr> <int> <chr> <chr> <chr> <int>
## 1 1106 Willi~ AR IIB <NA> <NA> 264 266 <NA> <NA> 327 331
## # ... with 5 more variables: v12 <int>, v13 <int>, v14 <int>, v15 <int>,
## # v16 <int>
5
首先把資料下載後解壓縮
fl<-"C:/Users/user/Dropbox/1062-Data_manage/0319/Subject1.zip"
unzip(fl,exdir="Subject1")把四個.dat檔抓到R裡面
fls<-list.files(path="C:/Users/user/Dropbox/1062-Data_manage/0319/Subject1/Subject1",all.files = T,no.. = T)
fl<-paste0("C:/Users/user/Dropbox/1062-Data_manage/0319/Subject1/Subject1/",fls)
dta<-lapply(fl,read.csv,sep="\t",header=T,skip=1) %>% bind_rows
str(dta<-as.data.frame(dta))## 'data.frame': 1804 obs. of 31 variables:
## $ X.......F7.: num -0.9733 -0.7079 -0.3732 -0.0225 0.3523 ...
## $ X......FT7.: num -1.007 -1.022 -0.981 -0.878 -0.71 ...
## $ X.......T7.: num -0.1834 -0.1705 -0.1544 -0.1239 -0.0611 ...
## $ X......TP7.: num -1.05 -1.14 -1.18 -1.09 -0.88 ...
## $ X.......P7.: num -0.705 -0.791 -0.821 -0.817 -0.697 ...
## $ X......Fp1.: num -1.15 -1.084 -1.007 -0.922 -0.817 ...
## $ X.......F3.: num -1.042 -1.017 -0.956 -0.864 -0.748 ...
## $ X......FC3.: num -0.521 -0.52 -0.499 -0.457 -0.36 ...
## $ X.......C3.: num -0.248 -0.264 -0.249 -0.204 -0.14 ...
## $ X......CP3.: num -0.0064 0.0064 0.0595 0.1351 0.2059 ...
## $ X.......P3.: num 0.235 0.244 0.304 0.385 0.492 ...
## $ X.......O1.: num 0.623 0.484 0.389 0.31 0.269 ...
## $ X.......Fz.: num -0.362 -0.301 -0.236 -0.177 -0.138 ...
## $ X......FCz.: num 0.203 0.224 0.249 0.285 0.293 ...
## $ X.......Cz.: num 0.322 0.275 0.22 0.195 0.167 ...
## $ X......CPz.: num 0.529 0.462 0.441 0.433 0.404 ...
## $ X.......Pz.: num 0.0692 -0.0322 -0.0949 -0.1078 -0.1046 ...
## $ X.......Oz.: num 0.922 0.821 0.758 0.724 0.726 ...
## $ X......Fp2.: num -1.234 -1.121 -0.931 -0.706 -0.516 ...
## $ X.......F4.: num -0.689 -0.643 -0.579 -0.532 -0.52 ...
## $ X......FC4.: num 0.756 0.753 0.75 0.735 0.677 ...
## $ X.......C4.: num 0.793 0.714 0.656 0.602 0.536 ...
## $ X......CP4.: num 0.571 0.631 0.66 0.648 0.59 ...
## $ X.......P4.: num 0.956 0.933 0.917 0.896 0.821 ...
## $ X.......O2.: num 0.909 0.763 0.645 0.537 0.47 ...
## $ X.......F8.: num 0.0611 0.1673 0.2461 0.2687 0.2059 ...
## $ X......FT8.: num -0.3459 -0.2864 -0.1657 -0.0322 0.0676 ...
## $ X.......T8.: num 0.507 0.809 1.097 1.35 1.511 ...
## $ X......TP8.: num 1.44 1.82 2.16 2.41 2.53 ...
## $ X.......P8.: num 1.45 1.47 1.47 1.42 1.34 ...
## $ X : logi NA NA NA NA NA NA ...
#刪掉多出來的那一行
dta<-dta[-31]修改變項名稱並畫圖
namelist<-names(dta) %>% sub("X......","",.)
namelist<-names(dta) %>% sub("[.]","",.)
colnames(dta)<-namelist
dta2<-dta %>% mutate(ID=1:1804) %>% gather(key="Attribute",value="score",1:30) %>% mutate(Attribute_f=as.factor(Attribute))
ggplot(dta2,aes(Attribute_f,score))+geom_boxplot()+labs(x="腦位置",y="毫秒")6
讀進資料後畫圖
readr::fwf_empty("C:/Users/user/Dropbox/1062-Data_manage/0319/city.txt")[1:2]## $begin
## [1] 0 19
##
## $end
## [1] 17 NA
city <- read_fwf("C:/Users/user/Dropbox/1062-Data_manage/0319/city.txt",skip=0,fwf_cols(cityname=18,density=10))## Parsed with column specification:
## cols(
## cityname = col_character(),
## density = col_number()
## )
#讓city結構簡單一點
city<-city[,1:2]
#讓cityname是factor來畫圖
city$cityname<-factor(city$cityname)
plot(city$cityname,city$density)7
schiz <- read.csv("C:/Users/user/Dropbox/1062-Data_manage/0319/schiz.csv",,header=F)
schiz2 <- schiz %>%
mutate(schiztab = c(rep("non", 11), rep("schiz", 6)), id = 1:17) %>%
gather(key = "measure", value = "ms", 1:30) %>%
mutate(measure = as.factor(measure))
plot(schiz2$id,schiz2$ms)用anova檢驗正常人與患者間是否有差異,發現存在顯著差異。
summary(aov(ms ~ schiztab,data=schiz2))## Df Sum Sq Mean Sq F value Pr(>F)
## schiztab 1 4506212 4506212 166.5 <2e-16 ***
## Residuals 508 13751799 27070
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
8
Make a barplot to dispaly the number of students from each major.
#看資料的樣子
roster<-read.csv("C:/Users/user/Dropbox/1062-Data_manage/0319/ncku_roster.csv",skip=1,header=T,fileEncoding = "big5")
colnames(roster)<-c("id","major","code","ID","name","NA","time")
roster <- roster %>%
separate(2, into = c("major", "info"), sep = " ") %>%
mutate(major = as.factor(major))
head(roster)## id major info code ID name NA time
## 1 1 心理系 U7031 D840239 蘇 NA 02/17/2016 09:17:40
## 2 2 心理系 U7031 D840057 吳 NA 02/17/2016 09:17:28
## 3 3 心理系 U7031 D841311 余 NA 02/17/2016 09:09:10
## 4 4 心理系 U7031 D840140 王 NA 02/17/2016 09:09:34
## 5 5 教育所 U3006 U360098 劉 NA 01/18/2016 14:56:35
## 6 6 教育所 U3006 U380416 陳 NA 01/25/2016 16:01:08
barplot(table(roster$major))9
Search the PubMed for the numbers of articles published related to “deep learning” from year 2001 to year 2017. Plot the results
pacman::p_load(RISmed)
Span <- 2001:2017
Tally <- sapply(Span,
function(i) {
QueryCount(EUtilsSummary('deep learning',
type = 'esearch',db = 'pubmed',
mindate = i, maxdate = i))
}
)
names(Tally) <- Span
barplot(Tally, las = 2, ylab = "Number of Articles")