true
Exercise 1
library(MASS)
str(anorexia)
## 'data.frame': 72 obs. of 3 variables:
## $ Treat : Factor w/ 3 levels "CBT","Cont","FT": 2 2 2 2 2 2 2 2 2 2 ...
## $ Prewt : num 80.7 89.4 91.8 74 78.1 88.3 87.3 75.1 80.6 78.4 ...
## $ Postwt: num 80.2 80.1 86.4 86.3 76.1 78.1 75.1 86.7 73.5 84.6 ...
X1<-anorexia[,2]
head(X1)
## [1] 80.7 89.4 91.8 74.0 78.1 88.3
class(X1)
## [1] "numeric"
#出來的是Prewt的數值
X2<-anorexia["Prewt"]
head(X2)
## Prewt
## 1 80.7
## 2 89.4
## 3 91.8
## 4 74.0
## 5 78.1
## 6 88.3
class(X2)
## [1] "data.frame"
#出來的是數據框
lm(Postwt ~ Treat, data=anorexia)
##
## Call:
## lm(formula = Postwt ~ Treat, data = anorexia)
##
## Coefficients:
## (Intercept) TreatCont TreatFT
## 85.697 -4.589 4.798
#Treat 是類別自變項
lm(Postwt ~ as.numeric(Treat), data=anorexia)
##
## Call:
## lm(formula = Postwt ~ as.numeric(Treat), data = anorexia)
##
## Coefficients:
## (Intercept) as.numeric(Treat)
## 82.036 1.711
#Treat 被轉換成連續自變項
#可能表示隨著治療強度的增加/降低,後側數據的相應變化
#但需要治療強度可量化、不同等級間等距
Exercise 2
library(dplyr)
##
## 载入程辑包:'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
set.seed(1)
x1 <- sample(1:6,10000,replace = TRUE , prob = rep(1/6, 6))
x2 <- sample(1:6,10000,replace = TRUE , prob = rep(1/6, 6))
x3 <- sample(1:6,10000,replace = TRUE , prob = rep(1/6, 6))
x <- x1+x2+x3
x %>% hist()

# Empirical Histogram
Exercise 3
#
# An R script for IQ_Beh data set
#
#
dta <- read.table("C:\\Users\\shabby\\Desktop\\资料管理\\DataManagement\\week3\\IQ_Beh.txt", header = T, row.names = 1)
#顯示對象結構
str(dta)
## 'data.frame': 94 obs. of 3 variables:
## $ Dep: chr "N" "N" "N" "N" ...
## $ IQ : int 103 124 124 104 96 92 124 99 92 116 ...
## $ BP : int 4 12 9 3 3 3 6 4 3 9 ...
#取得前幾筆資訊
head(dta)
## Dep IQ BP
## 1 N 103 4
## 2 N 124 12
## 3 N 124 9
## 4 N 104 3
## 5 D 96 3
## 6 N 92 3
#顯示數據集類型
class(dta)
## [1] "data.frame"
#顯示列表行列數
dim(dta)
## [1] 94 3
#顯示變量名稱
names(dta)
## [1] "Dep" "IQ" "BP"
#判斷BP列類型是否為向量
is.vector(dta$BP)
## [1] TRUE
#顯示第一筆觀察資料數據
dta[1, ]
## Dep IQ BP
## 1 N 103 4
#
dta[1:3, "IQ"] #顯示IQ列1-3行數據
## [1] 103 124 124
#將BP數據從小到大排序,然後看最後6個數據為何
tail(dta[order(dta$BP), ])
## Dep IQ BP
## 16 N 89 11
## 58 N 117 11
## 66 N 126 11
## 2 N 124 12
## 73 D 99 13
## 12 D 22 17
#同上,但只顯示4個數據
tail(dta[order(-dta$BP), ], 4)
## Dep IQ BP
## 77 N 124 1
## 80 N 121 1
## 24 N 106 0
## 75 N 122 0
#畫出直方圖,顯示主要落入區域
with(dta, hist(IQ, xlab = "IQ", main = ""))

#畫出盒須圖,橫軸DP,縱軸BPC
boxplot(BP ~ Dep, data = dta,
xlab = "Depression",
ylab = "Behavior problem score")

#畫出IQ對BP的散點圖
plot(IQ ~ BP, data = dta, pch = 20, col = 2,
xlab = "Behavior problem score", ylab = "IQ")
grid()

#畫出BP對IQ的散點圖,並加上D組和N組的趨勢線
plot(BP ~ IQ, data = dta, type = "n",
ylab = "Behavior problem score", xlab = "IQ")
text(dta$IQ, dta$BP, labels = dta$Dep, cex = 0.5)
abline(lm(BP ~ IQ, data = dta, subset = Dep == "D"))
abline(lm(BP ~ IQ, data = dta, subset = Dep == "N"), lty = 2)

## end
Exercise 4
dta2 <- read.table("C:\\Users\\shabby\\Desktop\\资料管理\\DataManagement\\week3\\usBirths2015.txt",header = T)
dta <- dta2[c(3:12,1:2),]
dta$season <- rep(c("spring","summer","autumn","winter"), rep(3,4))
aggregate(birth~season, data=dta, sum)
## season birth
## 1 autumn 1005343
## 2 spring 977672
## 3 summer 1035747
## 4 winter 959735
Exercise 5
dta3 <- read.table("C:\\Users\\shabby\\Desktop\\资料管理\\DataManagement\\week3\\readingtimes.txt",header =T)
str(dta3)
## 'data.frame': 7 obs. of 14 variables:
## $ Snt : int 1 2 3 4 5 6 7
## $ Sp : int 1 2 3 4 5 6 7
## $ Wrds: int 13 16 9 9 10 18 6
## $ New : int 1 3 2 2 3 4 1
## $ S01 : num 3.43 6.48 1.71 3.68 4 ...
## $ S02 : num 2.79 5.41 2.34 3.71 2.9 ...
## $ S03 : num 4.16 4.49 3.02 2.87 2.99 ...
## $ S04 : num 3.07 5.06 2.46 2.73 2.67 ...
## $ S05 : num 3.62 9.29 6.04 4.21 3.88 ...
## $ S06 : num 3.16 5.64 2.46 6.24 3.22 ...
## $ S07 : num 3.23 8.36 4.92 3.72 3.14 ...
## $ S08 : num 7.16 4.31 3.37 6.33 6.14 ...
## $ S09 : num 1.54 2.95 1.38 1.15 2.76 ...
## $ S10 : num 4.06 6.65 2.18 3.66 3.33 ...
colMeans(dta3[,5:14])
## S01 S02 S03 S04 S05 S06 S07 S08
## 4.130143 3.847000 3.774286 3.779286 5.620000 5.371286 5.228429 5.011429
## S09 S10
## 2.741000 4.493714
rank(colMeans(dta3[,5:14]))
## S01 S02 S03 S04 S05 S06 S07 S08 S09 S10
## 5 4 2 3 10 9 8 7 1 6
colMeans(dta3[,5:14])/mean(dta3$Wrds)
## S01 S02 S03 S04 S05 S06 S07 S08
## 0.3569259 0.3324568 0.3261728 0.3266049 0.4856790 0.4641852 0.4518395 0.4330864
## S09 S10
## 0.2368765 0.3883457
mean(colMeans(dta3[,5:14])/mean(dta3$Wrds))
## [1] 0.3802173