2016.03.21 ppt

ppt.3 Input data

fLoc<-“http://www.amstat.org/publications/jse/datasets/sat.dat.txt” dta <- read.table(fLoc, row.names=1) names(dta) <- c(“spend”, “PTR”, “pay”,“PE”,“verb”,“math”,“sat”) str(dta) #選擇名字,which選一行 row.names(dta[which(nchar(row.names(dta)) < 5), ])

ppt.4

attributes(state.region) #若希望針對四種區域做分別,給名字 dta\(region <- state.region table(dta\)region) row.names(dta[which(dta$region==“Northeast”),])

ppt.5

head(dta[ , c(“sat”, “region”)]) #針對兩項,看資料 unstack(dta[ , c(“sat”, “region”)]) #unstack,根據名字作清單(依據資料屬性,逕自執行)

ppt.6

subset(dta, pay > quantile(pay, prob=0.9)) #選擇90%以上有錢人(唯一個條件句) subset(dta, region==“Northeast”, select=c(verb, math))

ppt.7

split(dta, dta\(region)\)Northeast # with(dta, split(dta[,-c(1:4)], math < quantile(math, prob=0.1))$’TRUE’)

ppt.8

aggregate(dta[, -8], by=list(dta\(region), FUN=median) #aggregate(data,list(條件句,選取的方式) cbind(aggregate(dta[, "sat"], by=list(dta\)region), FUN=median), iqr=aggregate(dta[, “sat”], by=list(dta\(region), FUN=IQR)\)x) #iqr,中間50%;mean=

ppt.9

with(dta, tapply(sat, region, mean)) #ly() with(dta, by(sat, region, mean))

ppt.10

sapply(dta[, 5:7], mean) #對5~7 lapply(dta[, 5:7], sd)

ppt.11

set.seed(20141001) #定住起始值 print(rn <- sample(50, 9)) dtarn, 5:7

ppt.12

sort(dta\(sat)[1:6] #排序 sort(dta\)sat, dec=T)[1:6] head(dta[order(dta$sat), 7:8]) #order tail(dta[order(dta$sat), 7:8])

ppt.13

dta[order(dta\(verb, dta\)math), 5:7] data.frame(SAT=dta\(sat, Rank=rank(-dta\)sat)) #rank,名次

ppt.14

dta[1:9, c(“sat”, “math”, “verb”)]

ppt.15

dta\(payf <- ifelse(dta\)pay > mean(dta\(pay), "aa", "ba") dta\)payf <- ordered(dta$payf, levels=c(“ba”, “aa”)) histogram(~ sat | payf, data=dta, xlab=“SAT score”)

ppt.16

plot(sat ~ region, data=dta, xlab=“Region”, ylab=“SAT”) obysat <- with(dta, reorder(region, sat, median)) boxplot(sat ~ obysat, data=dta, ylab=“SAT”, col=“lightgray”, varwidth=T); grid() #右邊的圖較佳 #合狀圖中間是median

ppt.17

dta\(satf <- with(dta, cut(sat, ordered=T, breaks=c(0,900,1050,1210), labels=c("L", "M", "H"))) table(dta\)satf) with(dta, table(satf, region)) xtabs(~ satf + region, data=dta) prop.table(xtabs(~ satf + region, data=dta))

ppt.18

t0 <- with(dta, table(above=(sat > mean(sat)), region)) addmargins(t0, 2) data.frame(t0) #匯出後變成Data frame

ppt.19

print(nE <- subset(dta, region==“Northeast”, select=c(“verb”, “math”, “sat”))) #重點,寬及長 #寬==> 1 2 3 4 # S1 # s2 # s3 #長==> # S1 # S1 # S1 # S1 # S2 # S2 # S2 # S2 # S3 # S4

ppt.20

print(nEL <- reshape(nE, v.names=“score”,timevar=“Test”, varying=list(1:2), times=c(“verb”, “math”), ids=row.names(nE), idvar=“state”, direction=“long”))

ppt.21

reshape(nEL, idvar=“state”, timevar=“Test”, v.names=“score”, direction=“wide”)

ppt.22

head(state.x77[, 1:2]) dtax <- data.frame(state.x77[, 1:2]) dtax\(state <- row.names(state.x77) dtay <- data.frame(dta[, 2:3]) dtay\)state <- row.names(dta) merge(dtax, dtay, by=“state”)

ppt.23

any(is.na(dta)) dta[sample(dim(dta)[1], 5), “spend”] <- NA apply(apply(dta, 2, is.na), 2, sum) dta[which(is.na(dta$spend)), ] rbind(dim(dta), dim(na.omit(dta))) dta[which(is.na(dta$spend)), “spend”] <- mean(dta$spend, na.rm=T)

ppt.24

str(nE) require(reshape2) melt(nE, variable.name=“Test”, value.name=“Score”)

ppt.25

nEL <- melt(nE, variable.name=“Test”, value.name=“Score”) nEL$State <- rep(row.names(nE), 3) dcast(nEL, State ~ Test, value.var=“Score”)

ppt.26

dta <- UCBAdmissions str(dta) dim(dta) dta[,,1:2]

ppt.27

print(dtal <- melt(dta))

ppt.28

print(dtal <- dtal[order(dtal$Admit), ]) dtal.1 <- dtal[1:12, ] dtal.2 <- dtal[13:24, ] dtaw <- merge(dtal.1, dtal.2, by=c(“Dept”,“Gender”)) print(dtaw <- dtaw[, c(1,2,4,6)]) names(dtaw)[3:4] <- c(“Admit”, “Reject”) dtaw

ppt.29

dcast(dtal, Dept + Gender ~ Admit)

ppt.30

unstack(dtaw[, c(“Admit”, “Dept”)]) unstack(dtaw[, c(“Admit”, “Gender”)])

ppt.31

acast(dtal, Admit ~ Gender ~ Dept)[,,1:2]

ppt.32

acast(dtal, Admit ~ Dept, sum) dcast(dtal, Gender ~ Dept, sum)

ppt.33

coplot(len ~ dose | supp, data=ToothGrowth, panel=panel.smooth, xlab=“Dose”, ylab=“Length”)

ppt.34

?ToothGrowth str(ToothGrowth) head(ToothGrowth)

ppt.35

library(plyr) print(dta <- ddply(ToothGrowth, c(“dose”, “supp”), summarize, ll=mean(len)+c(-1.96,1.96)*sd(len)/sqrt(length(len))))

ppt.36

dta$bnd <- rep(c(“l”, “u”), 6) print(dtaw <- dcast(dta, supp + dose ~ bnd, value.var=“ll”))

ppt.37

s <- 1:6; d <- 2pi/1000 plot(c(0.25, 2.25), c(5, 30), type=“n”, xlab=“Dose”, ylab=“Length”) segments(dtaw\(dose[s]+d*(s-1), dtaw\)l[s], dtaw\(dose[s]+d*(s-1), dtaw\)u[s], col=dtaw\(supp, lwd=2) points(dtaw\)dose[s]+d(s-1), (dtaw\(l[s]+dtaw\)u[s])/2, col=dtaw$supp, pch=16) legend(“topleft”, legend=c(“OJ”, “VC”), text.col=c(1,2))