dataM_week5

Q1

exchangerate <- function(NTD,rate = 30.832){
  USD = NTD/rate
  cat(NTD,"NTD can buy",USD,"USD")
}
exchangerate(10000)

## 10000 NTD can buy 324.3383 USD

Q2

fcircle <- function(x){sqrt(1-x^2)}
curve(fcircle, 0, 1, ylim = c(0, 1), ylab = "f(x)")
N <- 10000; x <- runif(N, 0, 1); y <- runif(N, 0, 1)
points(x, y, col = ifelse(fcircle(x) > y, 2, 3),lty = 3,lwd = 0.1)
segments(0, 0, 0, 1)
segments(0, 0, 1, 0)

sum(fcircle(runif(N, 0, 1)) > runif(N, 0, 1))/N*(1*1)

## [1] 0.7827

integrate(fcircle, 0, 1)

## 0.7853983 with absolute error < 0.00011

pi*(1^2)/4

## [1] 0.7853982

Q3

tcurve <- function(vec,from = -4,to = 4){
  stopifnot(is.numeric(vec))
  l <- length(vec)
  cl <- rainbow(l+1)
  Labels <- c("normal",paste0("df = ", vec))
  x <- seq(from,to,0.05)
  plot(x,dnorm(x), type = "l",col = cl[1],lty = 2,lwd = 1.5 ,ylab = "Density",main = "Probability density function")
  sapply(vec,function(i){curve(dt(x,df = i), col = cl[which(vec==i)+1], lwd=1.5, add = TRUE)})
  legend("topright", inset = .05, Labels, col=cl, lwd=2, lty=c(2, rep(1,l)))
}
tcurve(c(2,4,8,16,32))

Q4

payment <- function(M,L,r){
  stopifnot(is.numeric(M))
  stopifnot(is.numeric(L))
  stopifnot(is.numeric(r))
  M <- M*12
  m <- expand.grid(M,L,r)
  for(i in 1:dim(m)[1]){
    m[i,4] <- m[i,2]*(m[i,3]/(1-(1+m[i,3])^(-(m[i,1])))) 
  }
  colnames(m) <- c("month","loan","rate","pay_per_month")
  print(head(m))
}
payment(c(10,15,20,25,30),c(5000000,10000000,15000000),c(0.02,0.05,0.07))

##   month  loan rate pay_per_month
## 1   120 5e+06 0.02      110240.5
## 2   180 5e+06 0.02      102913.7
## 3   240 5e+06 0.02      100870.4
## 4   300 5e+06 0.02      100263.7
## 5   360 5e+06 0.02      100080.2
## 6   120 1e+07 0.02      220481.0

Q5

dta <- read.table("http://www.ccunix.ccu.edu.tw/~psycfs/dataM/Data/hs0.txt", h=T)

#
library(MASS)
wm2d <- with(dta, kde2d(write, math))
persp(wm2d, theta = 45, phi = 30, expand = 0.6, shade = 0.3,
      ylab = "Math", xlab = "Writing", zlab = "Density")

# hand-made version
bvnpdf <- function(x, y, r=0.6) {
  z <- (1/(2*pi*sqrt(1-r^2)))*exp(-0.5*(1/(1-r^2))*(x^2-2*r*x*y+y^2))
}

y <- x <- seq(-pi, pi, by=0.1); z <- outer(x, y, bvnpdf)

bin2d <- function(mtx2c) {
  
  #histogram plot of column 1&2
  h1 <- hist(mtx2c[, 1], plot=FALSE)
  h2 <- hist(mtx2c[, 2], plot=FALSE)
  #cut the data into blocks and count the frequency of each block
  frq <- table(cut(mtx2c[, 1], h1$breaks), cut(mtx2c[, 2], h2$breaks))
  return(list(counts = frq, breaks1 = h1$breaks, breaks2 = h2$breaks,
              mids1 = h1$mids, mids2 = h2$mids))
}

#use bin2d function to cut data into blocks and count the frequency
wmb2 <- bin2d(dta[, c("write", "math")])
h2 <- outer(diff(wmb2$breaks1), diff(wmb2$breaks2))
zd <- wmb2$count/(dim(dta)[1]*h2)
persp(wmb2$mids1, wmb2$mids2, z = zd, theta = 45, phi = 30, expand = 0.6,
      shade = 0.3, xlab = "Math", ylab = "Writing", zlab = "Density estimate")

Q6

dta6 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/hs0.txt",header = T)
str(dta6)

## 'data.frame':    200 obs. of  11 variables:
##  $ id     : int  70 121 86 141 172 113 50 11 84 48 ...
##  $ female : Factor w/ 2 levels "female","male": 2 1 2 2 2 2 2 2 2 2 ...
##  $ race   : Factor w/ 4 levels "african-amer",..: 4 4 4 4 4 4 1 3 4 1 ...
##  $ ses    : Factor w/ 3 levels "high","low","middle": 2 3 1 1 3 3 3 3 3 3 ...
##  $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
##  $ prog   : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
##  $ read   : int  57 68 44 63 47 44 50 34 63 57 ...
##  $ write  : int  52 59 33 44 52 52 59 46 57 55 ...
##  $ math   : int  41 53 54 47 57 51 42 45 54 52 ...
##  $ science: int  47 63 58 53 53 63 53 39 58 NA ...
##  $ socst  : int  57 61 31 56 61 61 61 36 51 51 ...

dta6 <- na.omit(dta6)

#a
m <- combn(names(dta6[,7:11]),2,simplify = F)
names(m) <- sapply(m, function(i) paste0(i[1], " vs ", i[2]))
lapply(m, function(i) {t.test(dta6[,i[1]], dta6[,i[2]])})

## $`read vs write`
## 
##  Welch Two Sample t-test
## 
## data:  dta6[, i[1]] and dta6[, i[2]]
## t = -0.57036, df = 385.68, p-value = 0.5688
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.531458  1.392996
## sample estimates:
## mean of x mean of y 
##  52.11282  52.68205 
## 
## 
## $`read vs math`
## 
##  Welch Two Sample t-test
## 
## data:  dta6[, i[1]] and dta6[, i[2]]
## t = -0.44893, df = 385.11, p-value = 0.6537
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.400139  1.507831
## sample estimates:
## mean of x mean of y 
##  52.11282  52.55897 
## 
## 
## $`read vs science`
## 
##  Welch Two Sample t-test
## 
## data:  dta6[, i[1]] and dta6[, i[2]]
## t = 0.19213, df = 387.28, p-value = 0.8477
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.799263  2.189007
## sample estimates:
## mean of x mean of y 
##  52.11282  51.91795 
## 
## 
## $`read vs socst`
## 
##  Welch Two Sample t-test
## 
## data:  dta6[, i[1]] and dta6[, i[2]]
## t = -0.18323, df = 386.98, p-value = 0.8547
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.285933  1.896190
## sample estimates:
## mean of x mean of y 
##  52.11282  52.30769 
## 
## 
## $`write vs math`
## 
##  Welch Two Sample t-test
## 
## data:  dta6[, i[1]] and dta6[, i[2]]
## t = 0.12898, df = 387.97, p-value = 0.8974
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.752980  1.999133
## sample estimates:
## mean of x mean of y 
##  52.68205  52.55897 
## 
## 
## $`write vs science`
## 
##  Welch Two Sample t-test
## 
## data:  dta6[, i[1]] and dta6[, i[2]]
## t = 0.78332, df = 387.54, p-value = 0.4339
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.153777  2.681982
## sample estimates:
## mean of x mean of y 
##  52.68205  51.91795 
## 
## 
## $`write vs socst`
## 
##  Welch Two Sample t-test
## 
## data:  dta6[, i[1]] and dta6[, i[2]]
## t = 0.36465, df = 381.73, p-value = 0.7156
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.644203  2.392921
## sample estimates:
## mean of x mean of y 
##  52.68205  52.30769 
## 
## 
## $`math vs science`
## 
##  Welch Two Sample t-test
## 
## data:  dta6[, i[1]] and dta6[, i[2]]
## t = 0.66005, df = 387.27, p-value = 0.5096
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.268416  2.550467
## sample estimates:
## mean of x mean of y 
##  52.55897  51.91795 
## 
## 
## $`math vs socst`
## 
##  Welch Two Sample t-test
## 
## data:  dta6[, i[1]] and dta6[, i[2]]
## t = 0.24574, df = 380.83, p-value = 0.806
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.759275  2.261839
## sample estimates:
## mean of x mean of y 
##  52.55897  52.30769 
## 
## 
## $`science vs socst`
## 
##  Welch Two Sample t-test
## 
## data:  dta6[, i[1]] and dta6[, i[2]]
## t = -0.37388, df = 384.58, p-value = 0.7087
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.439316  1.659828
## sample estimates:
## mean of x mean of y 
##  51.91795  52.30769

#b
pacman::p_load(tidyverse,tidyr,broom)
dta6L <- gather(dta6,type,score,7:11)
dta6L$type <- as.factor(dta6L$type)
dta6LL <- dta6L %>% group_by(type) %>% do(tmp = data.frame(.))
lapply(dta6LL$tmp,function(i){aov(score~race, data = i)})

## [[1]]
## Call:
##    aov(formula = score ~ race, data = i)
## 
## Terms:
##                      race Residuals
## Sum of Squares   1746.262 15317.810
## Deg. of Freedom         3       191
## 
## Residual standard error: 8.955331
## Estimated effects may be unbalanced
## 
## [[2]]
## Call:
##    aov(formula = score ~ race, data = i)
## 
## Terms:
##                      race Residuals
## Sum of Squares   1627.565 18671.952
## Deg. of Freedom         3       191
## 
## Residual standard error: 9.887311
## Estimated effects may be unbalanced
## 
## [[3]]
## Call:
##    aov(formula = score ~ race, data = i)
## 
## Terms:
##                      race Residuals
## Sum of Squares   3169.521 15447.166
## Deg. of Freedom         3       191
## 
## Residual standard error: 8.993065
## Estimated effects may be unbalanced
## 
## [[4]]
## Call:
##    aov(formula = score ~ race, data = i)
## 
## Terms:
##                      race Residuals
## Sum of Squares    800.668 21690.871
## Deg. of Freedom         3       191
## 
## Residual standard error: 10.65668
## Estimated effects may be unbalanced
## 
## [[5]]
## Call:
##    aov(formula = score ~ race, data = i)
## 
## Terms:
##                     race Residuals
## Sum of Squares   1758.13  15622.16
## Deg. of Freedom        3       191
## 
## Residual standard error: 9.04386
## Estimated effects may be unbalanced

#c
dta6$ses <- factor(dta6$ses,levels = c("low","middle","high"))
rst <- tidy(lm(math ~ ses-1,data = dta6),conf.int=TRUE)

ggplot(rst,aes(x = factor(term,levels = c("seslow","sesmiddle","seshigh")), y = estimate))+
  geom_point()+
  geom_errorbar(aes(ymin = conf.low,ymax = conf.high))+
  coord_flip()+
  labs(x = "",y = "Estimate")+
  theme_bw()

Q7

normal <- function(size,mu,sigma){
  x <- sapply(1:size,function(i){mean(rnorm(i,mu,sigma))})
  plot(x,type="l", col="green", xlab="Sample size", ylab="Running average", main="Normal distribution")
  abline(h=mu, lty=2)
  grid()
}

normal(1000,100,15)

dataM_week5

Wen_Sung Cheng

2017年4月11日

Q1

Q2

Q3

Q4

Q5

Q6

Q7