Lab-3
1.For a binomial distribution with n = 7 and p = 0.2 find (a) P(X = 5). (b) P(X > 2). (c) P(X < 8). (d) P(X ≥ 4). (e) Plot pmf and CDF.
n <- 7
p <- 0.2
# (a) P(X = 5)
dbinom(5, size = n, prob = p)
## [1] 0.0043008
# (b) P(X > 2)
sum(dbinom(3:n, size = n, prob = p))
## [1] 0.148032
# (c) P(X < 8)
sum(dbinom(0:7, size = n, prob = p))
## [1] 1
# (d) P(X ≥ 4)
sum(dbinom(4:n, size = n, prob = p))
## [1] 0.033344
# (e) Plot pmf and CDF
x <- 0:n
plot(x, dbinom(x, size = n, prob = p), type = "h", main = "Binomial pmf", xlab = "x", ylab = "P(X=x)")
plot(x, pbinom(x, size = n, prob = p), type = "s", main = "Binomial CDF", xlab = "x", ylab = "P(X≤x)")
lambda <- 3
# (a) P(x ≤ 2)
ppois(2, lambda)
## [1] 0.4231901
# (b) P(x ≥ 5)
1 - ppois(4, lambda)
## [1] 0.1847368
# (c) P(x = 8)
dpois(8, lambda)
## [1] 0.008101512
# (d) Plot pmf and CDF
x <- 0:10
plot(x, dpois(x, lambda), type = "h", main = "Poisson pmf", xlab = "x", ylab = "P(X=x)")
plot(x, ppois(x, lambda), type = "s", main = "Poisson CDF", xlab = "x", ylab = "P(X≤x)")
p <- 0.4
n <- 5
# (a) none will graduate
dbinom(0, size=n , prob=p)
## [1] 0.07776
# (b) one will graduate
dbinom(1,size=n ,prob=p)
## [1] 0.2592
# (c) at least one will graduate
1-dbinom(0,size=n ,prob=p)
## [1] 0.92224
p <- 0.001/5 # probability of defective blade
n <- 10 # blades per packet
lambda <- n * p # parameter for Poisson distribution
N <- 100000 # number of packets in consignment
# number of packets containing no defective blades
dpois(0, lambda) * N
## [1] 99800.2
# number of packets containing one defective blade
dpois(1, lambda) * N
## [1] 199.6004
# number of packets containing two defective blades
dpois(2, lambda) * N
## [1] 0.1996004
n <- 1000 # number of students
mu <- 42 # mean score
sigma <- 24 # standard deviation
# (a) the number of students exceeding a score of 50,
pnorm(50,mu,sigma ,lower.tail=FALSE)*n
## [1] 369.4413
# (b) the number of students lying between 30 and 54,
(pnorm(54,mu,sigma)-pnorm(30,mu,sigma))*n
## [1] 382.9249
# (c) the value of the score exceeded by the top 100 students.
qnorm((n-100)/n,mu,sigma)
## [1] 72.75724
Lab-4
1.A random sample of 10 boys had the following IQ.’ s : 70, 120, 110, 101, 88, 83, 95, 98, 107, 100. Do these data support the assumption of a population mean I.Q. of 100 ? Find a reasonable range in which most of the mean I.Q. values of samples of 10 boys lie.
iq <- c(70, 120, 110, 101, 88, 83, 95, 98, 107, 100)
t.test(iq, mu = 100)
##
## One Sample t-test
##
## data: iq
## t = -0.62034, df = 9, p-value = 0.5504
## alternative hypothesis: true mean is not equal to 100
## 95 percent confidence interval:
## 86.98934 107.41066
## sample estimates:
## mean of x
## 97.2
# To find a reasonable range for the mean IQ values of samples of 10 boys
mean(iq) + c(-1,1) * qt(0.975, df = length(iq)-1) * sd(iq)/sqrt(length(iq))
## [1] 86.98934 107.41066
# create variables for sample 1 (market A)
n1 <- 400
xbar1 <- 250
s1 <- 40
# create variables for sample 2 (market B)
n2 <- 400
xbar2 <- 220
s2 <- 55
# perform the two-sample t-test
t_test <- t.test(x = c(xbar1, xbar2), n = c(n1, n2), sd = c(s1, s2), alternative = "two.sided", conf.level = 0.99)
t_test
##
## One Sample t-test
##
## data: c(xbar1, xbar2)
## t = 15.667, df = 1, p-value = 0.04058
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
## -719.8511 1189.8511
## sample estimates:
## mean of x
## 235
# extract the p-value
p_value <- t_test$p.value
# check whether the null hypothesis is rejected
if (p_value < 0.01) {
cat("The average weekly food expenditure of the two populations of shoppers are significantly different at the 1% level of significance.")
} else {
cat("There is no significant difference in the average weekly food expenditure of the two populations of shoppers at the 1% level of significance.")
}
## There is no significant difference in the average weekly food expenditure of the two populations of shoppers at the 1% level of significance.
sample_mean <- 3.4
pop_mean <- 3.25
sd <- 2.61
n <- 900
t_value <- (sample_mean - pop_mean) / (sd / sqrt(n))
p_value <- 2 * pt(-abs(t_value), df = n-1)
xbar <- 3.4
s <- 2.61
n <- 900
# 95% confidence interval
alpha <- 0.05
t_critical <- qt(alpha/2, df = n-1, lower.tail = FALSE)
margin_of_error <- t_critical * (s / sqrt(n))
lower_bound <- xbar - margin_of_error
upper_bound <- xbar + margin_of_error
cat("95% confidence interval: (", lower_bound, ", ", upper_bound, ")\n", sep="")
## 95% confidence interval: (3.229253, 3.570747)
# 98% confidence interval
alpha <- 0.02
t_critical <- qt(alpha/2, df = n-1, lower.tail = FALSE)
margin_of_error <- t_critical * (s / sqrt(n))
lower_bound <- xbar - margin_of_error
upper_bound <- xbar + margin_of_error
cat("98% confidence interval: (", lower_bound, ", ", upper_bound, ")\n", sep="")
## 98% confidence interval: (3.197246, 3.602754)
p0 <- 0.2
n <- 400
x <- 50
binom.test(x,n,p0)
##
## Exact binomial test
##
## data: x and n
## number of successes = 50, number of trials = 400, p-value = 9.994e-05
## alternative hypothesis: true probability of success is not equal to 0.2
## 95 percent confidence interval:
## 0.09421851 0.16145866
## sample estimates:
## probability of success
## 0.125
n1 <-100
x1 <-60
n2<-200
x2<-100
prop.test(x=c(x1,x2),n=c(n1,n2))
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(x1, x2) out of c(n1, n2)
## X-squared = 2.2919, df = 1, p-value = 0.1301
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.02591167 0.22591167
## sample estimates:
## prop 1 prop 2
## 0.6 0.5
Lab-5
# Input data
smoking <- matrix(c(21, 36, 30, 48, 26, 19), nrow = 2, byrow = TRUE)
colnames(smoking) <- c("Non-smokers", "Moderate Smokers", "Heavy Smokers")
rownames(smoking) <- c("Hypertension", "No Hypertension")
# Perform chi-squared test
chisq.test(smoking)
##
## Pearson's Chi-squared test
##
## data: smoking
## X-squared = 14.464, df = 2, p-value = 0.0007232
# Input data
groupA <- c(8, 6, 5, 7, 6, 8, 7, 4, 5, 6)
groupB <- c(10, 6, 7, 8, 6, 9, 7, 6.7)
# Perform two-sample t-test
t.test(groupA, groupB)
##
## Welch Two Sample t-test
##
## data: groupA and groupB
## t = -1.9251, df = 14.501, p-value = 0.07406
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.6645423 0.1395423
## sample estimates:
## mean of x mean of y
## 6.2000 7.4625
# Input data
sample1 <- c(20, 16, 26, 27, 23.22, 18, 24, 25,19)
sample2 <- c(27,33 ,42 ,35 ,32 ,34 ,38 ,28 ,41 ,43 ,30 ,37)
# Perform F-test
var.test(sample1,sample2)
##
## F test to compare two variances
##
## data: sample1 and sample2
## F = 0.52759, num df = 8, denom df = 11, p-value = 0.3735
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1440008 2.2387943
## sample estimates:
## ratio of variances
## 0.5275929
Lab-6
# Input data
system_a <- c(55, 60, 63, 56, 59, 55)
system_b <- c(57, 53, 64, 49, 62)
system_c <- c(66, 52, 61, 57)
# Combine data into a data frame
data <- data.frame(
value = c(system_a, system_b, system_c),
system = factor(rep(c("A", "B", "C"), times = c(length(system_a), length(system_b), length(system_c))))
)
# Perform ANOVA test
result <- aov(value ~ system, data = data)
# Print summary of ANOVA test
summary(result)
## Df Sum Sq Mean Sq F value Pr(>F)
## system 2 8.93 4.467 0.172 0.844
## Residuals 12 312.00 26.000
# Check if p-value is less than 0.05
if (summary(result)[[1]][["Pr(>F)"]][1] < 0.05) {
cat("The three systems are not equally effective at a 5% level of significance.\n")
} else {
cat("The three systems are equally effective at a 5% level of significance.\n")
}
## The three systems are equally effective at a 5% level of significance.
Ration I II III IV A 13.8 15.7 16.0 20.2 B 8.7 11.8 9.0 12.9 C 12.0 16.5 13.3 12.5
# create the data frame
pigs <- data.frame(
ration = rep(c("A", "B", "C"), each = 4),
type = rep(c("I", "II", "III", "IV"), times = 3),
weight_gain = c(13.8, 15.7, 16.0, 20.2, 8.7, 11.8, 9.0, 12.9, 12.0, 16.5, 13.3, 12.5)
)
# check the data
pigs
# test whether the difference in the rations is significant
anova_lm <- aov(weight_gain ~ ration, data = pigs)
summary(anova_lm)
## Df Sum Sq Mean Sq F value Pr(>F)
## ration 2 67.87 33.94 6.496 0.0179 *
## Residuals 9 47.01 5.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# test whether the four types of pigs differ significantly in gaining weight
anova_lm2 <- aov(weight_gain ~ type, data = pigs)
summary(anova_lm2)
## Df Sum Sq Mean Sq F value Pr(>F)
## type 3 26.35 8.784 0.794 0.531
## Residuals 8 88.53 11.067
3.Analyse the variance in the following Latin square:
20 B 17 C 25 D 34 A 23 A 21 D 15 C 24 B 24 D 26 A 21 B 19 C 26 C 23 B 27 A 22 D
freq=c(20,17,25,34,23,21,15,24,24,26,21,19,26,23,27,22)
col=c(rep("col1",1),rep("col2",1),rep("col3",1),rep("col4",1))
row=c(rep("rowA",4),rep("rowB",4),rep("rowC",4),rep("rowD",4))
seed=c("B","C","D","A","A","D","C","B","D","A","B","C","C","B","A","D")
mydata=data.frame(row,col,seed,freq)
mydata
myfit=anova(lm(freq~row+col+seed,mydata))
print(myfit)
## Analysis of Variance Table
##
## Response: freq
## Df Sum Sq Mean Sq F value Pr(>F)
## row 3 34.187 11.396 0.7058 0.5826
## col 3 22.688 7.563 0.4684 0.7151
## seed 3 141.188 47.063 2.9148 0.1227
## Residuals 6 96.875 16.146