fLoc<-“http://www.amstat.org/publications/jse/datasets/sat.dat.txt” dta <- read.table(fLoc, row.names=1) names(dta) <- c(“spend”, “PTR”, “pay”,“PE”,“verb”,“math”,“sat”) str(dta) #選擇名字,which選一行 row.names(dta[which(nchar(row.names(dta)) < 5), ])
attributes(state.region) #若希望針對四種區域做分別,給名字 dta\(region <- state.region table(dta\)region) row.names(dta[which(dta$region==“Northeast”),])
head(dta[ , c(“sat”, “region”)]) #針對兩項,看資料 unstack(dta[ , c(“sat”, “region”)]) #unstack,根據名字作清單(依據資料屬性,逕自執行)
subset(dta, pay > quantile(pay, prob=0.9)) #選擇90%以上有錢人(唯一個條件句) subset(dta, region==“Northeast”, select=c(verb, math))
split(dta, dta\(region)\)Northeast # with(dta, split(dta[,-c(1:4)], math < quantile(math, prob=0.1))$’TRUE’)
aggregate(dta[, -8], by=list(dta\(region), FUN=median) #aggregate(data,list(條件句,選取的方式) cbind(aggregate(dta[, "sat"], by=list(dta\)region), FUN=median), iqr=aggregate(dta[, “sat”], by=list(dta\(region), FUN=IQR)\)x) #iqr,中間50%;mean=
with(dta, tapply(sat, region, mean)) #ly() with(dta, by(sat, region, mean))
sapply(dta[, 5:7], mean) #對5~7 lapply(dta[, 5:7], sd)
set.seed(20141001) #定住起始值 print(rn <- sample(50, 9)) dtarn, 5:7
sort(dta\(sat)[1:6] #排序 sort(dta\)sat, dec=T)[1:6] head(dta[order(dta$sat), 7:8]) #order tail(dta[order(dta$sat), 7:8])
dta[order(dta\(verb, dta\)math), 5:7] data.frame(SAT=dta\(sat, Rank=rank(-dta\)sat)) #rank,名次
dta[1:9, c(“sat”, “math”, “verb”)]
dta\(payf <- ifelse(dta\)pay > mean(dta\(pay), "aa", "ba") dta\)payf <- ordered(dta$payf, levels=c(“ba”, “aa”)) histogram(~ sat | payf, data=dta, xlab=“SAT score”)
plot(sat ~ region, data=dta, xlab=“Region”, ylab=“SAT”) obysat <- with(dta, reorder(region, sat, median)) boxplot(sat ~ obysat, data=dta, ylab=“SAT”, col=“lightgray”, varwidth=T); grid() #右邊的圖較佳 #合狀圖中間是median
dta\(satf <- with(dta, cut(sat, ordered=T, breaks=c(0,900,1050,1210), labels=c("L", "M", "H"))) table(dta\)satf) with(dta, table(satf, region)) xtabs(~ satf + region, data=dta) prop.table(xtabs(~ satf + region, data=dta))
t0 <- with(dta, table(above=(sat > mean(sat)), region)) addmargins(t0, 2) data.frame(t0) #匯出後變成Data frame
print(nE <- subset(dta, region==“Northeast”, select=c(“verb”, “math”, “sat”))) #重點,寬及長 #寬==> 1 2 3 4 # S1 # s2 # s3 #長==> # S1 # S1 # S1 # S1 # S2 # S2 # S2 # S2 # S3 # S4
print(nEL <- reshape(nE, v.names=“score”,timevar=“Test”, varying=list(1:2), times=c(“verb”, “math”), ids=row.names(nE), idvar=“state”, direction=“long”))
reshape(nEL, idvar=“state”, timevar=“Test”, v.names=“score”, direction=“wide”)
head(state.x77[, 1:2]) dtax <- data.frame(state.x77[, 1:2]) dtax\(state <- row.names(state.x77) dtay <- data.frame(dta[, 2:3]) dtay\)state <- row.names(dta) merge(dtax, dtay, by=“state”)
any(is.na(dta)) dta[sample(dim(dta)[1], 5), “spend”] <- NA apply(apply(dta, 2, is.na), 2, sum) dta[which(is.na(dta$spend)), ] rbind(dim(dta), dim(na.omit(dta))) dta[which(is.na(dta$spend)), “spend”] <- mean(dta$spend, na.rm=T)
str(nE) require(reshape2) melt(nE, variable.name=“Test”, value.name=“Score”)
nEL <- melt(nE, variable.name=“Test”, value.name=“Score”) nEL$State <- rep(row.names(nE), 3) dcast(nEL, State ~ Test, value.var=“Score”)
dta <- UCBAdmissions str(dta) dim(dta) dta[,,1:2]
print(dtal <- melt(dta))
print(dtal <- dtal[order(dtal$Admit), ]) dtal.1 <- dtal[1:12, ] dtal.2 <- dtal[13:24, ] dtaw <- merge(dtal.1, dtal.2, by=c(“Dept”,“Gender”)) print(dtaw <- dtaw[, c(1,2,4,6)]) names(dtaw)[3:4] <- c(“Admit”, “Reject”) dtaw
dcast(dtal, Dept + Gender ~ Admit)
unstack(dtaw[, c(“Admit”, “Dept”)]) unstack(dtaw[, c(“Admit”, “Gender”)])
acast(dtal, Admit ~ Gender ~ Dept)[,,1:2]
acast(dtal, Admit ~ Dept, sum) dcast(dtal, Gender ~ Dept, sum)
coplot(len ~ dose | supp, data=ToothGrowth, panel=panel.smooth, xlab=“Dose”, ylab=“Length”)
?ToothGrowth str(ToothGrowth) head(ToothGrowth)
library(plyr) print(dta <- ddply(ToothGrowth, c(“dose”, “supp”), summarize, ll=mean(len)+c(-1.96,1.96)*sd(len)/sqrt(length(len))))
dta$bnd <- rep(c(“l”, “u”), 6) print(dtaw <- dcast(dta, supp + dose ~ bnd, value.var=“ll”))
s <- 1:6; d <- 2pi/1000 plot(c(0.25, 2.25), c(5, 30), type=“n”, xlab=“Dose”, ylab=“Length”) segments(dtaw\(dose[s]+d*(s-1), dtaw\)l[s], dtaw\(dose[s]+d*(s-1), dtaw\)u[s], col=dtaw\(supp, lwd=2) points(dtaw\)dose[s]+d(s-1), (dtaw\(l[s]+dtaw\)u[s])/2, col=dtaw$supp, pch=16) legend(“topleft”, legend=c(“OJ”, “VC”), text.col=c(1,2))