EX2
#讀取資料
link1<-"http://www1.aucegypt.edu/faculty/hadi/RABE5/Data5/P005.txt"
dta<-read.table(link1,header=T,sep="\t",as.is=TRUE)
#展示前六筆資料
head(dta)
## City COL PD URate Pop Taxes Income RTWL
## 1 Atlanta 169 414 13.6 1790128 5128 2961 1
## 2 Austin 143 239 11.0 396891 4303 1711 1
## 3 Bakersfield 339 43 23.7 349874 4166 2122 0
## 4 Baltimore 173 951 21.0 2147850 5001 4654 0
## 5 Baton Rouge 99 255 16.0 411725 3965 1620 1
## 6 Boston 363 1257 24.4 3914071 4928 5634 0
#確認資料格式
str(dta)
## 'data.frame': 38 obs. of 8 variables:
## $ City : chr "Atlanta" "Austin" "Bakersfield" "Baltimore" ...
## $ COL : int 169 143 339 173 99 363 253 117 294 291 ...
## $ PD : int 414 239 43 951 255 1257 834 162 229 1886 ...
## $ URate : num 13.6 11 23.7 21 16 24.4 39.2 31.5 18.2 31.5 ...
## $ Pop : int 1790128 396891 349874 2147850 411725 3914071 1326848 162304 164145 7015251 ...
## $ Taxes : int 5128 4303 4166 5001 3965 4928 4471 4813 4839 5408 ...
## $ Income: int 2961 1711 2122 4654 1620 5634 7213 5535 7224 6113 ...
## $ RTWL : int 1 1 0 0 1 0 0 0 1 0 ...
EX3
EX3.1 Re-name the variable ‘sex’ as ‘Gender’.
#合併帳號密碼字串進網址中
link2<-paste("http://",IDPW,"140.116.183.121/~sheu/dataM/Data/juniorSchools.txt", sep="")
#讀取資料
jsp<-read.table(link2,header = T)
#顯示前六筆資料
head(jsp)
## school class sex soc ravens pupil english math year
## 1 S1 C1 G 9 23 P1 72 23 0
## 2 S1 C1 G 9 23 P1 80 24 1
## 3 S1 C1 G 9 23 P1 39 23 2
## 4 S1 C1 B 2 15 P2 7 14 0
## 5 S1 C1 B 2 15 P2 17 11 1
## 6 S1 C1 B 2 22 P3 88 36 0
#將變項名稱sex改成Gender
colnames(jsp)[3]<-"Gender"
#再次查看資料
head(jsp)
## school class Gender soc ravens pupil english math year
## 1 S1 C1 G 9 23 P1 72 23 0
## 2 S1 C1 G 9 23 P1 80 24 1
## 3 S1 C1 G 9 23 P1 39 23 2
## 4 S1 C1 B 2 15 P2 7 14 0
## 5 S1 C1 B 2 15 P2 17 11 1
## 6 S1 C1 B 2 22 P3 88 36 0
3.2 Re-label the values of the social class variable using the (long character strings) descriptive terms to produce the following plot.
#將soc變相轉換成Factor格式,並轉換Level名稱
jsp$soc<-factor(jsp$soc,1:9,labels=c("I","II","III_0man","III_man","IV","V","VI_Unemp_L","VII_emp_NC","VIII_Miss_Dad"))
#查看soc資料格式
str(jsp$soc)
## Factor w/ 9 levels "I","II","III_0man",..: 9 9 9 2 2 2 2 2 9 9 ...
#查看soc水準名稱
levels(jsp$soc)
## [1] "I" "II" "III_0man" "III_man"
## [5] "IV" "V" "VI_Unemp_L" "VII_emp_NC"
## [9] "VIII_Miss_Dad"
#畫相形圖
boxplot(math~soc,data=jsp, ylab = "math",xlab = "SOC",cex.axis=0.7)

Ex3.3 Write the edited jsp. data object out as a comma-separated-value file to a data folder
write.csv(jsp,"junior_school_project.csv")
EX4
library(readr)
link3<-"https://ww2.amstat.org/publications/jse/datasets/aaup2.dat.txt"
#查看格數差異
readr::fwf_empty(link3)[1:2]
## $begin
## [1] 0 6 40 45 49 53 57 61 66 70 74 79 83 87 92 95
##
## $end
## [1] 5 39 43 48 52 56 60 65 69 73 78 82 86 90 94 NA
#調整格數
dta<-read_fwf(link3,skip=0,fwf_cols(ID=5,University=32,var3=3,var4=4,var5=4,var6=4,var7=4,var8=5,var9=4,var10=4,var11=5,var12=4,var13=4,var14=4,var15=4,var16=4,var17=5),na=c("*","NA"))
## Parsed with column specification:
## cols(
## ID = col_integer(),
## University = col_character(),
## var3 = col_character(),
## var4 = col_character(),
## var5 = col_integer(),
## var6 = col_integer(),
## var7 = col_integer(),
## var8 = col_integer(),
## var9 = col_integer(),
## var10 = col_integer(),
## var11 = col_integer(),
## var12 = col_integer(),
## var13 = col_integer(),
## var14 = col_integer(),
## var15 = col_integer(),
## var16 = col_integer(),
## var17 = col_integer()
## )
head(dta)
## # A tibble: 6 x 17
## ID University var3 var4 var5 var6 var7 var8 var9 var10 var11
## <int> <chr> <chr> <chr> <int> <int> <int> <int> <int> <int> <int>
## 1 1061 Alaska Paci… AK IIB 454 382 362 382 567 485 471
## 2 1063 Univ.Alaska… AK I 686 560 432 508 914 753 572
## 3 1065 Univ.Alaska… AK IIA 533 494 329 415 716 663 442
## 4 11462 Univ.Alaska… AK IIA 612 507 414 498 825 681 557
## 5 1002 Alabama Agr… AL IIA 442 369 310 350 530 444 376
## 6 1004 University … AL IIA 441 385 310 388 542 473 383
## # ... with 6 more variables: var12 <int>, var13 <int>, var14 <int>,
## # var15 <int>, var16 <int>, var17 <int>
EX5
# 下載檔案並解壓縮
link4<-paste0("http://",IDPW,"140.116.183.121/~sheu/dataM/Data/Subject1.zip")
download.file(link4,"Subject1.zip",mode="wb")
unzip("Subject1.zip")
#設定路徑
link5<-paste0("Subject1/",1:4,sep ="w.dat")
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
dta2<-lapply(link5,read.table,header=TRUE,skip=1,sep="\t")%>%rbindlist%>%select(-X)
#更改變項名稱
names(dta2)<-gsub("X......","",names(dta2))
names(dta2)<-gsub("[.]","",names(dta2))
#繪圖
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:Matrix':
##
## expand
dta3<-dta2%>%mutate(ID=1:1804)%>%gather(key="Attribute",value="score",1:30)%>%mutate(Attribute_f=as.factor(Attribute))
ggplot(dta3,aes(Attribute_f,score))+geom_boxplot()+labs(x="腦位置",y="毫秒")

EX6
#讀取與整理檔案
link6<-paste0("http://",IDPW,"140.116.183.121/~sheu/dataM/Data/cities10.txt")
fwf_empty(link6)[1:2]
## $begin
## [1] 0 19
##
## $end
## [1] 17 NA
head(dta4<-read.fwf(link6,width=c(19,8),col.names=c("city","population"),n=10))
## city population
## 1 New York, NY 66,834.6
## 2 Kings, NY 34,722.9
## 3 Bronx, NY 31,729.8
## 4 Queens, NY 20,453.0
## 5 San Francisco, CA 16,526.2
## 6 Hudson, NJ 12,956.9
#繪圖
ggplot(data=dta4, aes(x=city, y=population)) +
geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=population),vjust=1.6,color="white",size=3.5)+
theme_minimal()

EX7
#讀取資料
link7<-"http://www.stat.columbia.edu/~gelman/book/data/schiz.asc"
dta5<-read.table(link7,skip=4,col.names=paste0("T",101:130))
head(dta5)
## T101 T102 T103 T104 T105 T106 T107 T108 T109 T110 T111 T112 T113 T114
## 1 312 272 350 286 268 328 298 356 292 308 296 372 396 402
## 2 354 346 384 342 302 312 322 376 306 402 320 298 308 414
## 3 256 284 320 274 324 268 370 430 314 312 362 256 342 388
## 4 260 294 306 292 264 290 272 268 344 362 330 280 354 320
## 5 204 272 250 260 314 308 246 236 208 268 272 264 308 236
## 6 590 312 286 310 778 364 318 316 316 298 344 262 274 330
## T115 T116 T117 T118 T119 T120 T121 T122 T123 T124 T125 T126 T127 T128
## 1 280 330 254 282 350 328 332 308 292 258 340 242 306 328
## 2 304 422 388 422 426 338 332 426 478 372 392 374 430 388
## 3 302 366 298 396 274 226 328 274 258 220 236 272 322 284
## 4 334 276 418 288 338 350 350 324 286 322 280 256 218 256
## 5 238 350 272 252 252 236 306 238 350 206 260 280 274 318
## 6 312 310 376 326 346 334 282 292 282 300 290 302 300 306
## T129 T130
## 1 294 272
## 2 354 368
## 3 274 356
## 4 220 356
## 5 268 210
## 6 294 444
#整理資料
dta5<-dta5%>%
mutate(schizo=c(rep("N",11),rep("Y",6)),ID=paste0("S",1:17))%>%
gather(key="trial",value="RT",1:30) %>%
mutate(ID=factor(ID),schizo=factor(schizo),trial=factor(trial))
#繪圖
dta5$ID<-factor(dta5$ID,levels = c(paste0("S",1:17)))
ggplot(dta5,aes(x=ID,y=RT,color=schizo))+geom_boxplot()+labs(x ="Subject",y="Reaction Time(ms)")

#統計分析
aov_dta5<-aov(RT~schizo+ID+trial,data=dta5)
summary(aov_dta5)
## Df Sum Sq Mean Sq F value Pr(>F)
## schizo 1 4506212 4506212 204.034 <2e-16 ***
## ID 15 2865353 191024 8.649 <2e-16 ***
## trial 29 638735 22025 0.997 0.472
## Residuals 464 10247711 22086
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
分析結果顯示個案是否為失覺失調症患者在反應時間上有達統計上顯著差異
EX8
#讀取資料
headers<-read.csv("course.csv",header=F,nrows=1,as.is=T,fileEncoding = "big5")
dta6<-read.csv("course.csv",skip=2,header=F,fileEncoding = "big5")
colnames(dta6)<-headers
head(dta6)
## 座號 dpr 開課系序號 學號 姓名 成績 選課時間
## 1 1 心理系 U7031 D840239 蘇 NA 02/17/2016 09:17:40
## 2 2 心理系 U7031 D840057 吳 NA 02/17/2016 09:17:28
## 3 3 心理系 U7031 D841311 余 NA 02/17/2016 09:09:10
## 4 4 心理系 U7031 D840140 王 NA 02/17/2016 09:09:34
## 5 5 教育所 U3006 U360098 劉 NA 01/18/2016 14:56:35
## 6 6 教育所 U3006 U380416 陳 NA 01/25/2016 16:01:08
#繪圖
ggplot(dta6,aes(dpr, ..count..))+geom_bar(fill="#000099")+labs(x="系所", y="次數")+theme(text = element_text(family = "黑體-繁 中黑"))

EX9
使用老師ppt pg32 code
pacman::p_load(RISmed)
Span <- 2001:2017
Tally <- sapply(Span,
function(i) {
QueryCount(EUtilsSummary('deep learning',
type = 'esearch',db = 'pubmed',
mindate = i, maxdate = i))
}
)
names(Tally) <- Span
barplot(Tally, las = 2, ylim = c(0, 1500), main = "", ylab = "Number of Articles")
