setwd("D:\\course\\111-1\\Survey Sampling\\Rnote\\Rpub") # 設定路徑
getwd() #取得目前路徑
## [1] "D:/course/111-1/Survey Sampling/Rnote/Rpub"
data <- read.csv("mammals.csv")
head(data) # print the first 8 rows
## species brain_wt
## 1 African Elephant 5712.0
## 2 African giant pouched rat 6.6
## 3 Arctic Fox 44.5
## 4 Arctic ground squirrel 5.7
## 5 Asian elephant 4603.0
## 6 Baboon 179.5
建立矩陣 \[ a= \begin{pmatrix} 1 & 2 & 3 & 4\\ 5 & 6 & 7 & 8\\ \end{pmatrix} \] \(matrix(data=NA, nrow=1, ncol=1, byrow=FALSE,...)\), check \(help(matrix)\) for more information
a <- matrix(1:8,nrow=2)
print(a)
## [,1] [,2] [,3] [,4]
## [1,] 1 3 5 7
## [2,] 2 4 6 8
抽樣 \(sample(x, size, replace = FALSE, prob=NULL,...)\), check \(help(sample)\) for more information
set.seed(122) # 設定random seed, 確保 generate 同樣的data
sample(1:8, 10, replace = T) # random sampling with replacement
## [1] 8 8 1 2 7 7 4 7 1 7
sample(1:4, 2, prob = c(.1,.1,.4,.4)) # sampling with weight = c(,1,.1,.4,.4), without replacement
## [1] 1 4
\(dim(x)\): 矩陣 x 的 dimension
\(colMeans(x)\) / \(rowMeans(x)\): x為矩陣,算出矩陣每個 column /row 的平均
\(summary(x)\): 算出 x 中數值的敘述統計量
dim(a)
## [1] 2 4
colMeans(a)
## [1] 1.5 3.5 5.5 7.5
summary(1:100)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 25.75 50.50 50.50 75.25 100.00
長條圖 Histogram: \(hist(x, breaks = "Sturges",...)\)
x<-sample(1:100,500,replace=T)
hist(x)
箱型圖 Box plot: \(boxplot(x, ...)\)
boxplot(x, horizontal=T)
分布圖 scatter plot: \(plot(x, y, xlim = NULL, ylim = NULL, main = NULL, xlab = NULL, ylab = NULL,...)\)
y <- x + sample(1:100,500,replace=T)
plot(x, y)
population = {0,1,2,…,9}; equal proportion; sampling with replacement.
x.old<-sample(0:9,10*50,replace=T)
sample.all<-matrix(sample(0:9,10*50,replace=T),ncol=50) # each column represents a sample, 10 row* 50 column
ybar <- colMeans(sample.all) # table 3.3
hist(sample.all,xlab="Means", ylab="Relative frequency",breaks=(0:10-.5),main="yi", freq=F)
hist(ybar,xlab="Means", ylab="Relative frequency", main="Figure 3.2: ybar")
data <- read.csv("D:\\course\\111-1\\Survey Sampling\\Rnote\\Rpub\\mammals.csv")
dim(data)
## [1] 68 2
head(data)
## species brain_wt
## 1 African Elephant 5712.0
## 2 African giant pouched rat 6.6
## 3 Arctic Fox 44.5
## 4 Arctic ground squirrel 5.7
## 5 Asian elephant 4603.0
## 6 Baboon 179.5
v.break<-c(0:16)*500
hist(data[,2], breaks = v.break,xlab="Brain weight", ylab="Count",main="Figure 3.3")
data.s.n5<-matrix(sample(data[,2],5*100,replace=T),ncol=100)
data.s.n40<-matrix(sample(data[,2],40*100,replace=T),ncol=100)
ybar.n5<-colMeans(data.s.n5)
ybar.n40<-colMeans(data.s.n40)
summary(ybar.n5)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.41 80.40 158.16 450.65 512.19 2412.85
summary(ybar.n40)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 59.28 234.89 360.06 363.54 482.44 804.44
hist(ybar.n5, breaks = (0:32)*100, xlab="Sample means", ylab="Count, n=5",main="Figure 3.4")
hist(ybar.n40, breaks = (0:32)*100, xlab="Sample means", ylab="Count, n=40",main="Figure 3.4")
ln.brain<-log(data[,2]) #取 natural log 之後的資料
summary(ln.brain)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.966 1.071 2.436 2.977 5.170 8.825
hist(ln.brain, breaks = seq(from=-4,to=12,by=1),xlab="ln(Brain weight)", ylab="Count",main="Figure 3.5")