Answer
############################################
# 1
############################################
library(MASS)
h <- function(m=100,n=50,dist='norm',a=0, b=1, alpha=0.05){
Z <- c()
count <- 0
num <- 0
lower = upper = c()
pval=c()
for (i in 1:m) {
if(dist=='norm'){
xs <- rnorm(n,a,b)
}else if(dist=='beta'){
xs <- rbeta(n,a,b)
}else if(dist=='exp'){
xs <- rexp(n,a)
}else if(dist=='unif'){
xs <- runif(n,a,b)
}else if(dist=='bern'){
xs <- rbinom(n,1,a)
}else if(dist=='pois'){
xs <- rpois(n,a)
}else if(dist=='geom'){
xs <- rgeom(n,a)
}
test <- shapiro.test(xs)
pval[i] <- test$p.value
count <- count + ifelse(pval[i] > 0,1,0)
S2 <- var(xs)
Sigma2 <- b
Z[i] <- (n-1)*S2/Sigma2
l <- (n-1)*S2/qchisq(alpha/2,df=n-1, lower.tail = F)
lower[i] <- l
u <- (n-1)*S2/qchisq(1-(alpha/2),df=n-1, lower.tail = F)
upper[i] <- u
num <- num + ifelse(S2 >= l & S2 <= u, 1, 0)
}
# a
chi_df <- fitdistr(Z,"chi-squared",start=list(df=n-1),method="BFGS")
chi_df <- round(chi_df[[1]][1])
# b
hist(Z, col='blue', breaks = 20, freq = F)
curve(dchisq(x,df=chi_df),add=TRUE, col='red',lwd=3)
# c
mean_pval <- mean(pval)
# d
compare <- rchisq(n=m, df=chi_df)
chitest <- chisq.test(Z,compare)
pval_chi <- chitest$p.value
# e
# f
lower = mean(lower)
upper = mean(upper)
return(list(deg=chi_df, num=num, upper=upper, lower=lower, p_value.for.chisq=pval_chi,p_value.for.normality.data=mean_pval, num.of.normal=count))
}
h(m=100,n=50,dist='norm',a=0, b=1, alpha=0.05)

## $deg
## df
## 47
##
## $num
## [1] 100
##
## $upper
## [1] 1.490521
##
## $lower
## [1] 0.6697758
##
## $p_value.for.chisq
## [1] 0.2390079
##
## $p_value.for.normality.data
## [1] 0.5399027
##
## $num.of.normal
## [1] 100
h(m=100,n=50,dist='beta',a=1, b=2, alpha=0.05)

## $deg
## df
## 2
##
## $num
## [1] 100
##
## $upper
## [1] 0.08753544
##
## $lower
## [1] 0.03933464
##
## $p_value.for.chisq
## [1] 0.2390079
##
## $p_value.for.normality.data
## [1] 0.02433376
##
## $num.of.normal
## [1] 100
h(m=100,n=50,dist='exp',a=1, alpha=0.05)

## $deg
## df
## 45
##
## $num
## [1] 100
##
## $upper
## [1] 1.48449
##
## $lower
## [1] 0.6670658
##
## $p_value.for.chisq
## [1] 0.2390079
##
## $p_value.for.normality.data
## [1] 0.0003637165
##
## $num.of.normal
## [1] 100
h(m=100,n=50,dist='unif',a=0, b=1, alpha=0.05)

## $deg
## df
## 5
##
## $num
## [1] 100
##
## $upper
## [1] 0.1308547
##
## $lower
## [1] 0.05880045
##
## $p_value.for.chisq
## [1] 0.2390079
##
## $p_value.for.normality.data
## [1] 0.04386997
##
## $num.of.normal
## [1] 100
h(m=100,n=50,dist='bern',a=0.6, alpha=0.05)

## $deg
## df
## 13
##
## $num
## [1] 100
##
## $upper
## [1] 0.3749907
##
## $lower
## [1] 0.1685046
##
## $p_value.for.chisq
## [1] 0.397808
##
## $p_value.for.normality.data
## [1] 4.255479e-10
##
## $num.of.normal
## [1] 100
h(m=100,n=50,dist='pois',a=2, alpha=0.05)

## $deg
## df
## 99
##
## $num
## [1] 100
##
## $upper
## [1] 3.206657
##
## $lower
## [1] 1.440933
##
## $p_value.for.chisq
## [1] 0.2504045
##
## $p_value.for.normality.data
## [1] 0.002478923
##
## $num.of.normal
## [1] 100
h(m=100,n=50,dist='geom',a=0.1, alpha=0.05)

## $deg
## df
## 4011
##
## $num
## [1] 100
##
## $upper
## [1] 136.636
##
## $lower
## [1] 61.3983
##
## $p_value.for.chisq
## [1] 0.2390079
##
## $p_value.for.normality.data
## [1] 0.0004611004
##
## $num.of.normal
## [1] 100
############################################
# 2
############################################
data <- read.table('dataset/data.txt', header = T)
head(data)
## age job marital education default balance housing loan contact day
## 1 30 unemployed married primary no 1787 no no cellular 19
## 2 33 services married secondary no 4789 yes yes cellular 11
## 3 35 management single tertiary no 1350 yes no cellular 16
## 4 30 management married tertiary no 1476 yes yes unknown 3
## 5 59 blue-collar married secondary no 0 yes no unknown 5
## 6 35 management single tertiary no 747 no no cellular 23
## month duration campaign pdays previous poutcome y
## 1 Oct 79 1 -1 0 unknown no
## 2 May 220 1 339 4 failure no
## 3 Apr 185 1 330 1 failure no
## 4 Jun 199 4 -1 0 unknown no
## 5 May 226 1 -1 0 unknown no
## 6 Feb 141 2 176 3 failure no
set.seed(2022)
sdata <- data[sample.int(nrow(data), size=150, replace = F),]
head(sdata)
## age job marital education default balance housing loan contact
## 4324 83 retired divorced primary no 0 no no telephone
## 1459 36 management married tertiary no 2987 yes no cellular
## 2871 59 retired married primary no 46 no no cellular
## 3915 30 technician married tertiary no 569 no no cellular
## 708 55 retired married primary no 8894 no no unknown
## 2751 36 entrepreneur married tertiary no 6 no no telephone
## day month duration campaign pdays previous poutcome y
## 4324 31 May 664 1 77 3 success no
## 1459 12 Aug 307 1 -1 0 unknown yes
## 2871 14 Aug 78 2 -1 0 unknown no
## 3915 27 Aug 976 12 -1 0 unknown yes
## 708 11 Jun 262 1 -1 0 unknown no
## 2751 30 Oct 246 1 -1 0 unknown no
# a
# H0:pA=pB
n_married <- table(sdata$marital)[2]
n_single <- table(sdata$marital)[3]
test <- prop.test(x = c(n_married, n_single), n = c(100, 100),conf.level = 0.01)
test
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(n_married, n_single) out of c(100, 100)
## X-squared = 39.592, df = 1, p-value = 3.13e-10
## alternative hypothesis: two.sided
## 1 percent confidence interval:
## 0.4192552 0.4407448
## sample estimates:
## prop 1 prop 2
## 0.88 0.45
# b
# H0: p_yes = p_no
# H1: p_yes > p_no
new <- sdata[sdata$age < 40 & sdata$housing=='yes',]
n <- table(sdata$y)
n_yes <- n[1]
n_no <- n[2]
prop.test(x = c(n_yes, n_no), n = c(150, 150),conf.level = 0.05,
alternative = 'greater')
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(n_yes, n_no) out of c(150, 150)
## X-squared = 182.52, df = 1, p-value < 2.2e-16
## alternative hypothesis: greater
## 5 percent confidence interval:
## 0.8386297 1.0000000
## sample estimates:
## prop 1 prop 2
## 0.8933333 0.1066667
# c
# H0: sigma2_age = 110
# H1: sigma2_age > 110
library(EnvStats)
varTest(sdata$age, sigma.squared = 110, conf.level = 0.01)
##
## Chi-Squared Test on Variance
##
## data: sdata$age
## Chi-Squared = 162.66, df = 149, p-value = 0.4197
## alternative hypothesis: true variance is not equal to 110
## 1 percent confidence interval:
## 120.4518 120.8028
## sample estimates:
## variance
## 120.0879
# d
sdata <- sdata[sdata$education != 'unknown',]
res.aov <- aov(duration ~ education, data = sdata)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## education 2 292048 146024 2.231 0.111
## Residuals 132 8637954 65439
# e
aov_one <- aov(age ~ housing, data = sdata)
summary(aov_one)
## Df Sum Sq Mean Sq F value Pr(>F)
## housing 1 880 880.0 8.16 0.00497 **
## Residuals 133 14343 107.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############################################
# 3
############################################
# a
new <- sdata[,c(1,6,10,12,13,15)]
cordf <- cor(new)
cordff <- cordf[6,]
cordff2 <- cordff[-6]
barplot(cordff2, ylab='previous')

# b
myvar <- cordff2[which.max(abs(cordff2))]
response <- sdata$previous
independent <- sdata[,names(myvar)]
reg <- lm(response ~ independent + I(independent^2) + I(independent^3))
# coefficients estimation
reg$coefficients
## (Intercept) independent I(independent^2) I(independent^3)
## 0.185958324 0.184578528 0.012966657 -0.001429125
# error variance
summary(reg)$sigma^2
## [1] 6.75808
# c
summary(reg)
##
## Call:
## lm(formula = response ~ independent + I(independent^2) + I(independent^3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7986 -0.8178 -0.3821 -0.3821 23.3768
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.185958 0.674861 0.276 0.783
## independent 0.184579 0.511131 0.361 0.719
## I(independent^2) 0.012967 0.087022 0.149 0.882
## I(independent^3) -0.001429 0.003595 -0.398 0.692
##
## Residual standard error: 2.6 on 131 degrees of freedom
## Multiple R-squared: 0.02424, Adjusted R-squared: 0.001897
## F-statistic: 1.085 on 3 and 131 DF, p-value: 0.3579
confint(reg, level = 0.90)
## 5 % 95 %
## (Intercept) -0.931995937 1.303912584
## independent -0.662144843 1.031301900
## I(independent^2) -0.131191281 0.157124594
## I(independent^3) -0.007383678 0.004525427
# d
# H0: rho = 0 and H1: rho > 0
cor.test(sdata$previous, sdata$age, conf.level = 0.95, alternative = 'greater')
##
## Pearson's product-moment correlation
##
## data: sdata$previous and sdata$age
## t = -0.55389, df = 133, p-value = 0.7097
## alternative hypothesis: true correlation is greater than 0
## 95 percent confidence interval:
## -0.1888807 1.0000000
## sample estimates:
## cor
## -0.04797321
############################################
# 4
############################################
team_work <- data.frame(names=c('Amin','Reza','Hassan','Farhad'),
cooperation=c(50,10,20,20))
team_work
## names cooperation
## 1 Amin 50
## 2 Reza 10
## 3 Hassan 20
## 4 Farhad 20