Assignment

Answer

############################################
# 1
############################################
library(MASS)
h <- function(m=100,n=50,dist='norm',a=0, b=1, alpha=0.05){
  Z <- c()
  count <- 0
  num <- 0
  lower = upper = c()
  pval=c()
  for (i in 1:m) {
    if(dist=='norm'){
      xs <- rnorm(n,a,b)
    }else if(dist=='beta'){
      xs <- rbeta(n,a,b)
    }else if(dist=='exp'){
      xs <- rexp(n,a)
    }else if(dist=='unif'){
      xs <- runif(n,a,b)
    }else if(dist=='bern'){
      xs <- rbinom(n,1,a)
    }else if(dist=='pois'){
      xs <- rpois(n,a)
    }else if(dist=='geom'){
      xs <- rgeom(n,a)
    }
    test <- shapiro.test(xs)
    pval[i] <- test$p.value
    count <- count + ifelse(pval[i] > 0,1,0)
    S2 <- var(xs)
    Sigma2 <- b
    Z[i] <- (n-1)*S2/Sigma2
    l <- (n-1)*S2/qchisq(alpha/2,df=n-1, lower.tail = F)
    lower[i] <- l
    u <- (n-1)*S2/qchisq(1-(alpha/2),df=n-1, lower.tail = F)
    upper[i] <- u
    num <- num + ifelse(S2 >= l & S2 <= u, 1, 0)
  }
  # a
  chi_df <- fitdistr(Z,"chi-squared",start=list(df=n-1),method="BFGS")
  chi_df <- round(chi_df[[1]][1])
  # b
  hist(Z, col='blue', breaks = 20, freq = F)
  curve(dchisq(x,df=chi_df),add=TRUE, col='red',lwd=3)
  # c
  mean_pval <- mean(pval)
  # d
  compare <- rchisq(n=m, df=chi_df)
  chitest <- chisq.test(Z,compare)
  pval_chi <- chitest$p.value
  # e
  # f
  lower = mean(lower)
  upper = mean(upper)
  
  return(list(deg=chi_df, num=num, upper=upper, lower=lower, p_value.for.chisq=pval_chi,p_value.for.normality.data=mean_pval, num.of.normal=count))
}

h(m=100,n=50,dist='norm',a=0, b=1, alpha=0.05)

## $deg
## df 
## 47 
## 
## $num
## [1] 100
## 
## $upper
## [1] 1.490521
## 
## $lower
## [1] 0.6697758
## 
## $p_value.for.chisq
## [1] 0.2390079
## 
## $p_value.for.normality.data
## [1] 0.5399027
## 
## $num.of.normal
## [1] 100

h(m=100,n=50,dist='beta',a=1, b=2, alpha=0.05)

## $deg
## df 
##  2 
## 
## $num
## [1] 100
## 
## $upper
## [1] 0.08753544
## 
## $lower
## [1] 0.03933464
## 
## $p_value.for.chisq
## [1] 0.2390079
## 
## $p_value.for.normality.data
## [1] 0.02433376
## 
## $num.of.normal
## [1] 100

h(m=100,n=50,dist='exp',a=1, alpha=0.05)

## $deg
## df 
## 45 
## 
## $num
## [1] 100
## 
## $upper
## [1] 1.48449
## 
## $lower
## [1] 0.6670658
## 
## $p_value.for.chisq
## [1] 0.2390079
## 
## $p_value.for.normality.data
## [1] 0.0003637165
## 
## $num.of.normal
## [1] 100

h(m=100,n=50,dist='unif',a=0, b=1, alpha=0.05)

## $deg
## df 
##  5 
## 
## $num
## [1] 100
## 
## $upper
## [1] 0.1308547
## 
## $lower
## [1] 0.05880045
## 
## $p_value.for.chisq
## [1] 0.2390079
## 
## $p_value.for.normality.data
## [1] 0.04386997
## 
## $num.of.normal
## [1] 100

h(m=100,n=50,dist='bern',a=0.6, alpha=0.05)

## $deg
## df 
## 13 
## 
## $num
## [1] 100
## 
## $upper
## [1] 0.3749907
## 
## $lower
## [1] 0.1685046
## 
## $p_value.for.chisq
## [1] 0.397808
## 
## $p_value.for.normality.data
## [1] 4.255479e-10
## 
## $num.of.normal
## [1] 100

h(m=100,n=50,dist='pois',a=2, alpha=0.05)

## $deg
## df 
## 99 
## 
## $num
## [1] 100
## 
## $upper
## [1] 3.206657
## 
## $lower
## [1] 1.440933
## 
## $p_value.for.chisq
## [1] 0.2504045
## 
## $p_value.for.normality.data
## [1] 0.002478923
## 
## $num.of.normal
## [1] 100

h(m=100,n=50,dist='geom',a=0.1, alpha=0.05)

## $deg
##   df 
## 4011 
## 
## $num
## [1] 100
## 
## $upper
## [1] 136.636
## 
## $lower
## [1] 61.3983
## 
## $p_value.for.chisq
## [1] 0.2390079
## 
## $p_value.for.normality.data
## [1] 0.0004611004
## 
## $num.of.normal
## [1] 100

############################################
# 2
############################################
data <- read.table('dataset/data.txt', header = T)
head(data)

##   age         job marital education default balance housing loan  contact day
## 1  30  unemployed married   primary      no    1787      no   no cellular  19
## 2  33    services married secondary      no    4789     yes  yes cellular  11
## 3  35  management  single  tertiary      no    1350     yes   no cellular  16
## 4  30  management married  tertiary      no    1476     yes  yes  unknown   3
## 5  59 blue-collar married secondary      no       0     yes   no  unknown   5
## 6  35  management  single  tertiary      no     747      no   no cellular  23
##   month duration campaign pdays previous poutcome  y
## 1   Oct       79        1    -1        0  unknown no
## 2   May      220        1   339        4  failure no
## 3   Apr      185        1   330        1  failure no
## 4   Jun      199        4    -1        0  unknown no
## 5   May      226        1    -1        0  unknown no
## 6   Feb      141        2   176        3  failure no

set.seed(2022)
sdata <- data[sample.int(nrow(data), size=150, replace = F),]
head(sdata)

##      age          job  marital education default balance housing loan   contact
## 4324  83      retired divorced   primary      no       0      no   no telephone
## 1459  36   management  married  tertiary      no    2987     yes   no  cellular
## 2871  59      retired  married   primary      no      46      no   no  cellular
## 3915  30   technician  married  tertiary      no     569      no   no  cellular
## 708   55      retired  married   primary      no    8894      no   no   unknown
## 2751  36 entrepreneur  married  tertiary      no       6      no   no telephone
##      day month duration campaign pdays previous poutcome   y
## 4324  31   May      664        1    77        3  success  no
## 1459  12   Aug      307        1    -1        0  unknown yes
## 2871  14   Aug       78        2    -1        0  unknown  no
## 3915  27   Aug      976       12    -1        0  unknown yes
## 708   11   Jun      262        1    -1        0  unknown  no
## 2751  30   Oct      246        1    -1        0  unknown  no

# a
# H0:pA=pB
n_married <- table(sdata$marital)[2]
n_single <- table(sdata$marital)[3]
test <- prop.test(x = c(n_married, n_single), n = c(100, 100),conf.level = 0.01)
test

## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(n_married, n_single) out of c(100, 100)
## X-squared = 39.592, df = 1, p-value = 3.13e-10
## alternative hypothesis: two.sided
## 1 percent confidence interval:
##  0.4192552 0.4407448
## sample estimates:
## prop 1 prop 2 
##   0.88   0.45

# b
# H0: p_yes = p_no
# H1: p_yes > p_no
new <- sdata[sdata$age < 40 & sdata$housing=='yes',]
n <- table(sdata$y)
n_yes <- n[1]
n_no <- n[2]
prop.test(x = c(n_yes, n_no), n = c(150, 150),conf.level = 0.05,
          alternative = 'greater')

## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(n_yes, n_no) out of c(150, 150)
## X-squared = 182.52, df = 1, p-value < 2.2e-16
## alternative hypothesis: greater
## 5 percent confidence interval:
##  0.8386297 1.0000000
## sample estimates:
##    prop 1    prop 2 
## 0.8933333 0.1066667

# c
# H0: sigma2_age = 110
# H1: sigma2_age > 110
library(EnvStats)
varTest(sdata$age, sigma.squared = 110, conf.level = 0.01)

## 
##  Chi-Squared Test on Variance
## 
## data:  sdata$age
## Chi-Squared = 162.66, df = 149, p-value = 0.4197
## alternative hypothesis: true variance is not equal to 110
## 1 percent confidence interval:
##  120.4518 120.8028
## sample estimates:
## variance 
## 120.0879

# d
sdata <- sdata[sdata$education != 'unknown',]
res.aov <- aov(duration ~ education, data = sdata)
summary(res.aov)

##              Df  Sum Sq Mean Sq F value Pr(>F)
## education     2  292048  146024   2.231  0.111
## Residuals   132 8637954   65439

# e
aov_one <- aov(age ~ housing, data = sdata)
summary(aov_one)

##              Df Sum Sq Mean Sq F value  Pr(>F)   
## housing       1    880   880.0    8.16 0.00497 **
## Residuals   133  14343   107.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

############################################
# 3
############################################
# a
new <- sdata[,c(1,6,10,12,13,15)]
cordf <- cor(new)
cordff <- cordf[6,]
cordff2 <- cordff[-6]
barplot(cordff2, ylab='previous')

# b
myvar <- cordff2[which.max(abs(cordff2))]
response <- sdata$previous
independent <- sdata[,names(myvar)]
reg <- lm(response ~ independent + I(independent^2) + I(independent^3))
# coefficients estimation 
reg$coefficients

##      (Intercept)      independent I(independent^2) I(independent^3) 
##      0.185958324      0.184578528      0.012966657     -0.001429125

# error variance
summary(reg)$sigma^2

## [1] 6.75808

# c
summary(reg)

## 
## Call:
## lm(formula = response ~ independent + I(independent^2) + I(independent^3))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7986 -0.8178 -0.3821 -0.3821 23.3768 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)       0.185958   0.674861   0.276    0.783
## independent       0.184579   0.511131   0.361    0.719
## I(independent^2)  0.012967   0.087022   0.149    0.882
## I(independent^3) -0.001429   0.003595  -0.398    0.692
## 
## Residual standard error: 2.6 on 131 degrees of freedom
## Multiple R-squared:  0.02424,    Adjusted R-squared:  0.001897 
## F-statistic: 1.085 on 3 and 131 DF,  p-value: 0.3579

confint(reg, level = 0.90)

##                           5 %        95 %
## (Intercept)      -0.931995937 1.303912584
## independent      -0.662144843 1.031301900
## I(independent^2) -0.131191281 0.157124594
## I(independent^3) -0.007383678 0.004525427

# d
# H0: rho = 0 and H1: rho > 0
cor.test(sdata$previous, sdata$age, conf.level = 0.95, alternative = 'greater')

## 
##  Pearson's product-moment correlation
## 
## data:  sdata$previous and sdata$age
## t = -0.55389, df = 133, p-value = 0.7097
## alternative hypothesis: true correlation is greater than 0
## 95 percent confidence interval:
##  -0.1888807  1.0000000
## sample estimates:
##         cor 
## -0.04797321

############################################
# 4
############################################
team_work <- data.frame(names=c('Amin','Reza','Hassan','Farhad'),
                        cooperation=c(50,10,20,20))
team_work

##    names cooperation
## 1   Amin          50
## 2   Reza          10
## 3 Hassan          20
## 4 Farhad          20

Assignment

Afshin Motavali

April, 2022

Problem

Answer

follow me for more on