1. Read data from .csv

setwd("D:\\course\\111-1\\Survey Sampling\\Rnote\\Rpub") # 設定路徑
getwd() #取得目前路徑
## [1] "D:/course/111-1/Survey Sampling/Rnote/Rpub"
data <- read.csv("mammals.csv")

head(data) # print the first 8 rows
##                     species brain_wt
## 1          African Elephant   5712.0
## 2 African giant pouched rat      6.6
## 3                Arctic Fox     44.5
## 4    Arctic ground squirrel      5.7
## 5            Asian elephant   4603.0
## 6                    Baboon    179.5

2. Some R commends

建立矩陣 \[ a= \begin{pmatrix} 1 & 2 & 3 & 4\\ 5 & 6 & 7 & 8\\ \end{pmatrix} \] \(matrix(data=NA, nrow=1, ncol=1, byrow=FALSE,...)\), check \(help(matrix)\) for more information

a <- matrix(1:8,nrow=2)
print(a)
##      [,1] [,2] [,3] [,4]
## [1,]    1    3    5    7
## [2,]    2    4    6    8

抽樣 \(sample(x, size, replace = FALSE, prob=NULL,...)\), check \(help(sample)\) for more information

set.seed(122) # 設定random seed, 確保 generate 同樣的data

sample(1:8, 10, replace = T) # random sampling with replacement
##  [1] 8 8 1 2 7 7 4 7 1 7
sample(1:4, 2, prob = c(.1,.1,.4,.4)) # sampling with weight = c(,1,.1,.4,.4), without replacement
## [1] 1 4

\(dim(x)\): 矩陣 x 的 dimension

\(colMeans(x)\) / \(rowMeans(x)\): x為矩陣,算出矩陣每個 column /row 的平均

\(summary(x)\): 算出 x 中數值的敘述統計量

dim(a)
## [1] 2 4
colMeans(a)
## [1] 1.5 3.5 5.5 7.5
summary(1:100)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   25.75   50.50   50.50   75.25  100.00

長條圖 Histogram: \(hist(x, breaks = "Sturges",...)\)

x<-sample(1:100,500,replace=T)
hist(x)

箱型圖 Box plot: \(boxplot(x, ...)\)

boxplot(x, horizontal=T)

分布圖 scatter plot: \(plot(x, y, xlim = NULL, ylim = NULL, main = NULL, xlab = NULL, ylab = NULL,...)\)

y <- x + sample(1:100,500,replace=T)
plot(x, y)

3. Chapter 3.4: Table 1 & Figure 3.2

population = {0,1,2,…,9}; equal proportion; sampling with replacement.

x.old<-sample(0:9,10*50,replace=T)
sample.all<-matrix(sample(0:9,10*50,replace=T),ncol=50) # each column represents a sample, 10 row* 50 column
ybar <- colMeans(sample.all) # table 3.3
hist(sample.all,xlab="Means", ylab="Relative frequency",breaks=(0:10-.5),main="yi", freq=F)

hist(ybar,xlab="Means", ylab="Relative frequency", main="Figure 3.2: ybar")

4. Chapter 3.4: Figure 3.3-5

data <- read.csv("D:\\course\\111-1\\Survey Sampling\\Rnote\\Rpub\\mammals.csv")
dim(data)
## [1] 68  2
head(data)
##                     species brain_wt
## 1          African Elephant   5712.0
## 2 African giant pouched rat      6.6
## 3                Arctic Fox     44.5
## 4    Arctic ground squirrel      5.7
## 5            Asian elephant   4603.0
## 6                    Baboon    179.5
v.break<-c(0:16)*500
hist(data[,2], breaks = v.break,xlab="Brain weight", ylab="Count",main="Figure 3.3")

data.s.n5<-matrix(sample(data[,2],5*100,replace=T),ncol=100)
data.s.n40<-matrix(sample(data[,2],40*100,replace=T),ncol=100)

ybar.n5<-colMeans(data.s.n5)
ybar.n40<-colMeans(data.s.n40)

summary(ybar.n5)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.41   80.40  158.16  450.65  512.19 2412.85
summary(ybar.n40)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   59.28  234.89  360.06  363.54  482.44  804.44
hist(ybar.n5, breaks = (0:32)*100, xlab="Sample means", ylab="Count, n=5",main="Figure 3.4")

hist(ybar.n40, breaks = (0:32)*100, xlab="Sample means", ylab="Count, n=40",main="Figure 3.4")

ln.brain<-log(data[,2]) #取 natural log 之後的資料
summary(ln.brain)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -1.966   1.071   2.436   2.977   5.170   8.825
hist(ln.brain, breaks = seq(from=-4,to=12,by=1),xlab="ln(Brain weight)", ylab="Count",main="Figure 3.5")