homework

true

Exercise 1

library(MASS)
str(anorexia)

## 'data.frame':    72 obs. of  3 variables:
##  $ Treat : Factor w/ 3 levels "CBT","Cont","FT": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Prewt : num  80.7 89.4 91.8 74 78.1 88.3 87.3 75.1 80.6 78.4 ...
##  $ Postwt: num  80.2 80.1 86.4 86.3 76.1 78.1 75.1 86.7 73.5 84.6 ...

X1<-anorexia[,2]
head(X1)

## [1] 80.7 89.4 91.8 74.0 78.1 88.3

class(X1)

## [1] "numeric"

#出來的是Prewt的數值
X2<-anorexia["Prewt"]
head(X2)

##   Prewt
## 1  80.7
## 2  89.4
## 3  91.8
## 4  74.0
## 5  78.1
## 6  88.3

class(X2)

## [1] "data.frame"

#出來的是數據框

lm(Postwt ~ Treat, data=anorexia)

## 
## Call:
## lm(formula = Postwt ~ Treat, data = anorexia)
## 
## Coefficients:
## (Intercept)    TreatCont      TreatFT  
##      85.697       -4.589        4.798

#Treat 是類別自變項
lm(Postwt ~ as.numeric(Treat), data=anorexia)

## 
## Call:
## lm(formula = Postwt ~ as.numeric(Treat), data = anorexia)
## 
## Coefficients:
##       (Intercept)  as.numeric(Treat)  
##            82.036              1.711

#Treat 被轉換成連續自變項
#可能表示隨著治療強度的增加/降低，後側數據的相應變化
#但需要治療強度可量化、不同等級間等距

Exercise 2

library(dplyr)

## 
## 载入程辑包：'dplyr'

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

set.seed(1)
x1 <- sample(1:6,10000,replace = TRUE , prob = rep(1/6, 6))
x2 <- sample(1:6,10000,replace = TRUE , prob = rep(1/6, 6))
x3 <- sample(1:6,10000,replace = TRUE , prob = rep(1/6, 6))
x <- x1+x2+x3
x %>% hist()

# Empirical Histogram

Exercise 3

#
# An R script for IQ_Beh data set
#

#
dta <- read.table("C:\\Users\\shabby\\Desktop\\资料管理\\DataManagement\\week3\\IQ_Beh.txt", header = T, row.names = 1)

#顯示對象結構
str(dta)

## 'data.frame':    94 obs. of  3 variables:
##  $ Dep: chr  "N" "N" "N" "N" ...
##  $ IQ : int  103 124 124 104 96 92 124 99 92 116 ...
##  $ BP : int  4 12 9 3 3 3 6 4 3 9 ...

#取得前幾筆資訊
head(dta)

##   Dep  IQ BP
## 1   N 103  4
## 2   N 124 12
## 3   N 124  9
## 4   N 104  3
## 5   D  96  3
## 6   N  92  3

#顯示數據集類型
class(dta)

## [1] "data.frame"

#顯示列表行列數
dim(dta)

## [1] 94  3

#顯示變量名稱
names(dta)

## [1] "Dep" "IQ"  "BP"

#判斷BP列類型是否為向量
is.vector(dta$BP)

## [1] TRUE

#顯示第一筆觀察資料數據
dta[1, ]

##   Dep  IQ BP
## 1   N 103  4

#
dta[1:3, "IQ"] #顯示IQ列1-3行數據

## [1] 103 124 124

#將BP數據從小到大排序，然後看最後6個數據為何
tail(dta[order(dta$BP), ])

##    Dep  IQ BP
## 16   N  89 11
## 58   N 117 11
## 66   N 126 11
## 2    N 124 12
## 73   D  99 13
## 12   D  22 17

#同上，但只顯示4個數據
tail(dta[order(-dta$BP), ], 4)

##    Dep  IQ BP
## 77   N 124  1
## 80   N 121  1
## 24   N 106  0
## 75   N 122  0

#畫出直方圖，顯示主要落入區域
with(dta, hist(IQ, xlab = "IQ", main = ""))

#畫出盒須圖，橫軸DP，縱軸BPC
boxplot(BP ~ Dep, data = dta, 
        xlab = "Depression", 
        ylab = "Behavior problem score")

#畫出IQ對BP的散點圖
plot(IQ ~ BP, data = dta, pch = 20, col = 2, 
     xlab = "Behavior problem score", ylab = "IQ")
grid()

#畫出BP對IQ的散點圖，並加上D組和N組的趨勢線
plot(BP ~ IQ, data = dta, type = "n",
     ylab = "Behavior problem score", xlab = "IQ")
text(dta$IQ, dta$BP, labels = dta$Dep, cex = 0.5)
abline(lm(BP ~ IQ, data = dta, subset = Dep == "D"))
abline(lm(BP ~ IQ, data = dta, subset = Dep == "N"), lty = 2)

## end

Exercise 4

dta2 <- read.table("C:\\Users\\shabby\\Desktop\\资料管理\\DataManagement\\week3\\usBirths2015.txt",header = T)
dta <- dta2[c(3:12,1:2),]
dta$season <- rep(c("spring","summer","autumn","winter"), rep(3,4))
aggregate(birth~season, data=dta, sum)

##   season   birth
## 1 autumn 1005343
## 2 spring  977672
## 3 summer 1035747
## 4 winter  959735

Exercise 5

dta3 <- read.table("C:\\Users\\shabby\\Desktop\\资料管理\\DataManagement\\week3\\readingtimes.txt",header =T)
str(dta3)

## 'data.frame':    7 obs. of  14 variables:
##  $ Snt : int  1 2 3 4 5 6 7
##  $ Sp  : int  1 2 3 4 5 6 7
##  $ Wrds: int  13 16 9 9 10 18 6
##  $ New : int  1 3 2 2 3 4 1
##  $ S01 : num  3.43 6.48 1.71 3.68 4 ...
##  $ S02 : num  2.79 5.41 2.34 3.71 2.9 ...
##  $ S03 : num  4.16 4.49 3.02 2.87 2.99 ...
##  $ S04 : num  3.07 5.06 2.46 2.73 2.67 ...
##  $ S05 : num  3.62 9.29 6.04 4.21 3.88 ...
##  $ S06 : num  3.16 5.64 2.46 6.24 3.22 ...
##  $ S07 : num  3.23 8.36 4.92 3.72 3.14 ...
##  $ S08 : num  7.16 4.31 3.37 6.33 6.14 ...
##  $ S09 : num  1.54 2.95 1.38 1.15 2.76 ...
##  $ S10 : num  4.06 6.65 2.18 3.66 3.33 ...

colMeans(dta3[,5:14])

##      S01      S02      S03      S04      S05      S06      S07      S08 
## 4.130143 3.847000 3.774286 3.779286 5.620000 5.371286 5.228429 5.011429 
##      S09      S10 
## 2.741000 4.493714

rank(colMeans(dta3[,5:14]))

## S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 
##   5   4   2   3  10   9   8   7   1   6

colMeans(dta3[,5:14])/mean(dta3$Wrds)

##       S01       S02       S03       S04       S05       S06       S07       S08 
## 0.3569259 0.3324568 0.3261728 0.3266049 0.4856790 0.4641852 0.4518395 0.4330864 
##       S09       S10 
## 0.2368765 0.3883457

mean(colMeans(dta3[,5:14])/mean(dta3$Wrds))

## [1] 0.3802173

homework

LJ

2021/10/24

Exercise 1

Exercise 2

Exercise 3

Exercise 4

Exercise 5