dataM_week2

Q1

dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/readingtimes.txt",header = T)
str(dta1)

## 'data.frame':    7 obs. of  14 variables:
##  $ Snt : int  1 2 3 4 5 6 7
##  $ Sp  : int  1 2 3 4 5 6 7
##  $ Wrds: int  13 16 9 9 10 18 6
##  $ New : int  1 3 2 2 3 4 1
##  $ S01 : num  3.43 6.48 1.71 3.68 4 ...
##  $ S02 : num  2.79 5.41 2.34 3.71 2.9 ...
##  $ S03 : num  4.16 4.49 3.02 2.87 2.99 ...
##  $ S04 : num  3.07 5.06 2.46 2.73 2.67 ...
##  $ S05 : num  3.62 9.29 6.04 4.21 3.88 ...
##  $ S06 : num  3.16 5.64 2.46 6.24 3.22 ...
##  $ S07 : num  3.23 8.36 4.92 3.72 3.14 ...
##  $ S08 : num  7.16 4.31 3.37 6.33 6.14 ...
##  $ S09 : num  1.54 2.95 1.38 1.15 2.76 ...
##  $ S10 : num  4.06 6.65 2.18 3.66 3.33 ...

#individual means
dta1.matrix <- as.matrix(dta1[,5:14])
t(matrix(1, 7, 1)) %*% dta1.matrix / 7

##           S01   S02      S03      S04  S05      S06      S07      S08
## [1,] 4.130143 3.847 3.774286 3.779286 5.62 5.371286 5.228429 5.011429
##        S09      S10
## [1,] 2.741 4.493714

Q2

dta2 <- survey
str(dta2)

## 'data.frame':    237 obs. of  12 variables:
##  $ Sex   : Factor w/ 2 levels "Female","Male": 1 2 2 2 2 1 2 1 2 2 ...
##  $ Wr.Hnd: num  18.5 19.5 18 18.8 20 18 17.7 17 20 18.5 ...
##  $ NW.Hnd: num  18 20.5 13.3 18.9 20 17.7 17.7 17.3 19.5 18.5 ...
##  $ W.Hnd : Factor w/ 2 levels "Left","Right": 2 1 2 2 2 2 2 2 2 2 ...
##  $ Fold  : Factor w/ 3 levels "L on R","Neither",..: 3 3 1 3 2 1 1 3 3 3 ...
##  $ Pulse : int  92 104 87 NA 35 64 83 74 72 90 ...
##  $ Clap  : Factor w/ 3 levels "Left","Neither",..: 1 1 2 2 3 3 3 3 3 3 ...
##  $ Exer  : Factor w/ 3 levels "Freq","None",..: 3 2 2 2 3 3 1 1 3 3 ...
##  $ Smoke : Factor w/ 4 levels "Heavy","Never",..: 2 4 3 2 2 2 2 2 2 2 ...
##  $ Height: num  173 178 NA 160 165 ...
##  $ M.I   : Factor w/ 2 levels "Imperial","Metric": 2 1 NA 2 2 1 1 2 2 2 ...
##  $ Age   : num  18.2 17.6 16.9 20.3 23.7 ...

#density plot(have problem on adding ablines)
densityplot(~Height,data=dta2,groups = Sex,
            plot.points = T, ref = TRUE, 
            auto.key = list(space = "right"))

Q3

#the distribution of products of two pairs of dice
hist(as.numeric(d2 <- outer(1:6, 1:6, "+")),
     prob = T, xlab = "Sum", main = "")

abline(v = 3.5*2 + c(-1, 1)*sqrt(2*3.5), lty = 2)

curve(dnorm(x, mean = 3.5*2, sd = sqrt(2*3.5)), add = T)

Q4

m <- 3600
n <- 16
dta4 <- rnorm(m*n)
dim(dta4) <- c(m, n)
hist(apply(dta4, 1, min), prob = T, xlab = "Average", main="",ylim = c(0,2),xlim = c(-4,4))
curve(dnorm(x, mean = mean(apply(dta4, 1, min)), sd = 1/sqrt(n)), add = T)

####the sampling distribution of the smallest value does not follow the normal curve

Q5

mean

## function (x, ...) 
## UseMethod("mean")
## <bytecode: 0x0000000013354670>
## <environment: namespace:base>

methods("mean")

## [1] mean.Date     mean.default  mean.difftime mean.POSIXct  mean.POSIXlt 
## see '?methods' for accessing help and source code

base:::mean.default

## function (x, trim = 0, na.rm = FALSE, ...) 
## {
##     if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) {
##         warning("argument is not numeric or logical: returning NA")
##         return(NA_real_)
##     }
##     if (na.rm) 
##         x <- x[!is.na(x)]
##     if (!is.numeric(trim) || length(trim) != 1L) 
##         stop("'trim' must be numeric of length one")
##     n <- length(x)
##     if (trim > 0 && n) {
##         if (is.complex(x)) 
##             stop("trimmed means are not defined for complex data")
##         if (anyNA(x)) 
##             return(NA_real_)
##         if (trim >= 0.5) 
##             return(stats::median(x, na.rm = FALSE))
##         lo <- floor(n * trim) + 1
##         hi <- n + 1 - lo
##         x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi]
##     }
##     .Internal(mean(x))
## }
## <bytecode: 0x0000000016a3ec50>
## <environment: namespace:base>

Q6

##First plot
coef(lm(weight ~ height, data = women))

## (Intercept)      height 
##   -87.51667     3.45000

plot(weight~height,data = women)
abline(lm(weight ~ height,data = women),col = "red")
grid()

##Second plot
women$htc <- with(women, height - mean(height))
coef(lm(weight ~ htc, data = women))

## (Intercept)         htc 
##    136.7333      3.4500

plot(weight~htc,data = women)
abline(lm(weight ~ htc,data = women),col = "red")
grid()

The intercept of first plot is the y axis value when height equals 0. The intercept of second plot is the value of mean weight. Both values of slope are the value of weight changing when height plus 1.

Q7

#use nonlinear regression model to fit 
wtloss.fm <- nls(Weight ~ b0 + b1*2^(-Days/th),
                 data = wtloss, start = list(b0=90, b1=95, th=120))
wtloss.fm

## Nonlinear regression model
##   model: Weight ~ b0 + b1 * 2^(-Days/th)
##    data: wtloss
##     b0     b1     th 
##  81.37 102.68 141.91 
##  residual sum-of-squares: 39.24
## 
## Number of iterations to convergence: 3 
## Achieved convergence tolerance: 4.324e-06

plot(wtloss)
with(wtloss, lines(Days, fitted(wtloss.fm)))
grid()

#use simple linear regression model to fit (red line)
wtloss.lm <- lm(Weight ~ Days, data = wtloss)
summary(wtloss.lm)

## 
## Call:
## lm(formula = Weight ~ Days, data = wtloss)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.522 -2.891 -1.331  3.096  7.460 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 176.890244   0.987644   179.1   <2e-16 ***
## Days         -0.290735   0.007125   -40.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.639 on 50 degrees of freedom
## Multiple R-squared:  0.9708, Adjusted R-squared:  0.9703 
## F-statistic:  1665 on 1 and 50 DF,  p-value: < 2.2e-16

with(wtloss, lines(Days, fitted(wtloss.lm),col = "red"))
grid()

dataM_week2

Wen_Sung Cheng

2017年3月13日

Q1

Q2

Q3

Q4

Q5

Q6

The intercept of first plot is the y axis value when height equals 0. The intercept of second plot is the value of mean weight. Both values of slope are the value of weight changing when height plus 1.

Q7