dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/readingtimes.txt",header = T)
str(dta1)
## 'data.frame': 7 obs. of 14 variables:
## $ Snt : int 1 2 3 4 5 6 7
## $ Sp : int 1 2 3 4 5 6 7
## $ Wrds: int 13 16 9 9 10 18 6
## $ New : int 1 3 2 2 3 4 1
## $ S01 : num 3.43 6.48 1.71 3.68 4 ...
## $ S02 : num 2.79 5.41 2.34 3.71 2.9 ...
## $ S03 : num 4.16 4.49 3.02 2.87 2.99 ...
## $ S04 : num 3.07 5.06 2.46 2.73 2.67 ...
## $ S05 : num 3.62 9.29 6.04 4.21 3.88 ...
## $ S06 : num 3.16 5.64 2.46 6.24 3.22 ...
## $ S07 : num 3.23 8.36 4.92 3.72 3.14 ...
## $ S08 : num 7.16 4.31 3.37 6.33 6.14 ...
## $ S09 : num 1.54 2.95 1.38 1.15 2.76 ...
## $ S10 : num 4.06 6.65 2.18 3.66 3.33 ...
#individual means
dta1.matrix <- as.matrix(dta1[,5:14])
t(matrix(1, 7, 1)) %*% dta1.matrix / 7
## S01 S02 S03 S04 S05 S06 S07 S08
## [1,] 4.130143 3.847 3.774286 3.779286 5.62 5.371286 5.228429 5.011429
## S09 S10
## [1,] 2.741 4.493714
dta2 <- survey
str(dta2)
## 'data.frame': 237 obs. of 12 variables:
## $ Sex : Factor w/ 2 levels "Female","Male": 1 2 2 2 2 1 2 1 2 2 ...
## $ Wr.Hnd: num 18.5 19.5 18 18.8 20 18 17.7 17 20 18.5 ...
## $ NW.Hnd: num 18 20.5 13.3 18.9 20 17.7 17.7 17.3 19.5 18.5 ...
## $ W.Hnd : Factor w/ 2 levels "Left","Right": 2 1 2 2 2 2 2 2 2 2 ...
## $ Fold : Factor w/ 3 levels "L on R","Neither",..: 3 3 1 3 2 1 1 3 3 3 ...
## $ Pulse : int 92 104 87 NA 35 64 83 74 72 90 ...
## $ Clap : Factor w/ 3 levels "Left","Neither",..: 1 1 2 2 3 3 3 3 3 3 ...
## $ Exer : Factor w/ 3 levels "Freq","None",..: 3 2 2 2 3 3 1 1 3 3 ...
## $ Smoke : Factor w/ 4 levels "Heavy","Never",..: 2 4 3 2 2 2 2 2 2 2 ...
## $ Height: num 173 178 NA 160 165 ...
## $ M.I : Factor w/ 2 levels "Imperial","Metric": 2 1 NA 2 2 1 1 2 2 2 ...
## $ Age : num 18.2 17.6 16.9 20.3 23.7 ...
#density plot(have problem on adding ablines)
densityplot(~Height,data=dta2,groups = Sex,
plot.points = T, ref = TRUE,
auto.key = list(space = "right"))
#the distribution of products of two pairs of dice
hist(as.numeric(d2 <- outer(1:6, 1:6, "+")),
prob = T, xlab = "Sum", main = "")
abline(v = 3.5*2 + c(-1, 1)*sqrt(2*3.5), lty = 2)
curve(dnorm(x, mean = 3.5*2, sd = sqrt(2*3.5)), add = T)
m <- 3600
n <- 16
dta4 <- rnorm(m*n)
dim(dta4) <- c(m, n)
hist(apply(dta4, 1, min), prob = T, xlab = "Average", main="",ylim = c(0,2),xlim = c(-4,4))
curve(dnorm(x, mean = mean(apply(dta4, 1, min)), sd = 1/sqrt(n)), add = T)
####the sampling distribution of the smallest value does not follow the normal curve
mean
## function (x, ...)
## UseMethod("mean")
## <bytecode: 0x0000000013354670>
## <environment: namespace:base>
methods("mean")
## [1] mean.Date mean.default mean.difftime mean.POSIXct mean.POSIXlt
## see '?methods' for accessing help and source code
base:::mean.default
## function (x, trim = 0, na.rm = FALSE, ...)
## {
## if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) {
## warning("argument is not numeric or logical: returning NA")
## return(NA_real_)
## }
## if (na.rm)
## x <- x[!is.na(x)]
## if (!is.numeric(trim) || length(trim) != 1L)
## stop("'trim' must be numeric of length one")
## n <- length(x)
## if (trim > 0 && n) {
## if (is.complex(x))
## stop("trimmed means are not defined for complex data")
## if (anyNA(x))
## return(NA_real_)
## if (trim >= 0.5)
## return(stats::median(x, na.rm = FALSE))
## lo <- floor(n * trim) + 1
## hi <- n + 1 - lo
## x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi]
## }
## .Internal(mean(x))
## }
## <bytecode: 0x0000000016a3ec50>
## <environment: namespace:base>
##First plot
coef(lm(weight ~ height, data = women))
## (Intercept) height
## -87.51667 3.45000
plot(weight~height,data = women)
abline(lm(weight ~ height,data = women),col = "red")
grid()
##Second plot
women$htc <- with(women, height - mean(height))
coef(lm(weight ~ htc, data = women))
## (Intercept) htc
## 136.7333 3.4500
plot(weight~htc,data = women)
abline(lm(weight ~ htc,data = women),col = "red")
grid()
#use nonlinear regression model to fit
wtloss.fm <- nls(Weight ~ b0 + b1*2^(-Days/th),
data = wtloss, start = list(b0=90, b1=95, th=120))
wtloss.fm
## Nonlinear regression model
## model: Weight ~ b0 + b1 * 2^(-Days/th)
## data: wtloss
## b0 b1 th
## 81.37 102.68 141.91
## residual sum-of-squares: 39.24
##
## Number of iterations to convergence: 3
## Achieved convergence tolerance: 4.324e-06
plot(wtloss)
with(wtloss, lines(Days, fitted(wtloss.fm)))
grid()
#use simple linear regression model to fit (red line)
wtloss.lm <- lm(Weight ~ Days, data = wtloss)
summary(wtloss.lm)
##
## Call:
## lm(formula = Weight ~ Days, data = wtloss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.522 -2.891 -1.331 3.096 7.460
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 176.890244 0.987644 179.1 <2e-16 ***
## Days -0.290735 0.007125 -40.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.639 on 50 degrees of freedom
## Multiple R-squared: 0.9708, Adjusted R-squared: 0.9703
## F-statistic: 1665 on 1 and 50 DF, p-value: < 2.2e-16
with(wtloss, lines(Days, fitted(wtloss.lm),col = "red"))
grid()