# 10 invidual are chossen random for in normal popuation and highest ARE found to be in inches 
 # hight 63 63 66 67 68 69 70 70 71 71  
#in the lite of these data discuss the suggestion that the mean height in the population is 66 inches 
x<-c( 63, 63, 66, 67, 68, 69, 70, 70, 71, 71 )
mue <-66
sum(x)

## [1] 678

mean(x)

## [1] 67.8

n<-length(x)
n

## [1] 10

sum(x)/length(x)

## [1] 67.8

t<-(mean(x)-mue)/(sd(x)/sqrt(n))
standard_deviation <-sd (x)
print(standard_deviation)

## [1] 3.011091

qt(0.025,9)

## [1] -2.262157

t<-(mean(x)-mue)/(sd(x)/sqrt(n))
print(t)

## [1] 1.890378

#conclusion:reject h0,the mean hight of population is 66 inches

t.test(x,alternative="two.sided",mue=66,conf.level=.95)

## 
##  One Sample t-test
## 
## data:  x
## t = 71.204, df = 9, p-value = 1.074e-13
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  65.646 69.954
## sample estimates:
## mean of x 
##      67.8

#conclusion:accept h0 ,mu is 66

assumption:

1 random less

#2 normal popualtion (less than 30,greater than 30) #ho: the distribution is normall

shapiro.test(x)

## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.88278, p-value = 0.1404

p=0.14
alpha= 0.05
boxplot(x)

hist(x)

#conclusion: reject to if ho if p< alpha

given data:random sample of 16 values,mue=41.5

#step 1:H0:mue =43.5,H1: mue is not equal to 43.5 #step 2: alpha =0.05 #step 3: test statistics #question 2

x<-c(16)
# Generate a random sample from a normal distribution
sample_data <- rnorm(16, mean = 41.5, sd = 135)
# Perform a one-sample t-test
t_test_result <- t.test(sample_data, mu = 41.5)
# Print the result
print(t_test_result)

## 
##  One Sample t-test
## 
## data:  sample_data
## t = -0.51222, df = 15, p-value = 0.616
## alternative hypothesis: true mean is not equal to 41.5
## 95 percent confidence interval:
##  -81.93417 117.10249
## sample estimates:
## mean of x 
##  17.58416

#sample mean
x<-c( 63, 63, 66, 67, 68, 69, 70, 70, 71, 71,11,10,5,12,15,16 )
mue <-41.5
sum(x)

## [1] 747

mean(x)

## [1] 46.6875

length(x)

## [1] 16

sum(x)/length(x)

## [1] 46.6875

t<-(mean(x)-mue)/(sd(x)/sqrt(n))
standard_deviation <-sd (x)
print(standard_deviation)

## [1] 28.33777

#step 4: CR
qt(0.025,15)

## [1] -2.13145

qt(0.25,15)

## [1] -0.6911969

t<-(mean(x)-mue)/(sd(x)/sqrt(n))
print(t)

## [1] 0.5788852

t.test(x,alternative="two.sided",mue=41.5,conf.level=.95)

## 
##  One Sample t-test
## 
## data:  x
## t = 6.5901, df = 15, p-value = 8.581e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  31.58737 61.78763
## sample estimates:
## mean of x 
##   46.6875

boxplot(x)

hist(x)

#step 5: conclusion reject h0

#assumptions in using t distribution #(1) the sample of n observation is selected randomly. #(2) the population for which the small sample is drawn is normal. #question 3 # FOR Z TEST

## Z-test:
#if n>30 then use z test,if tge value is given n,mue,sd,and mean of x. in the situation the variance is unknown and known thenus the z-test:
n<-36
n

## [1] 36

alpha<-0.05
alpha

## [1] 0.05

xbar<- (2)
xbar

## [1] 2

mue<-5
mue

## [1] 5

s<-2
s

## [1] 2

z<-(xbar-mue)/(s/(sqrt(n)))
z

## [1] -9

#critical region:
qnorm(0.05)

## [1] -1.644854

#reject ho if z<z alpha
#conclusion: reject ho.because z is less then z alpha value 
boxplot(x)

hist(x)

shapiro.test(x)

## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.71647, p-value = 0.0002578

#assumpition for z test one sample: #Data Distribution: The data should be approximately normally distributed. However, if the sample size is large (typically n > 30), the Central Limit Theorem suggests that the distribution of sample means will be approximately normal even if the underlying data distribution is not. #Known Population Standard Deviation (σ):#The population standard deviation ( σ) should be known. If it is unknown, a t-test might be more appropriate. #Random Sampling: The data should be obtained through a random sampling process or, in the case of experimental data, through a random assignment p # for two sample # Example data for two samples

sample1 <- c(120, 130, 140, 150, 160)
sample2 <- c(110, 125, 135, 145, 155)

# Specify hypothesized difference in means
hypothesized_diff <- 0

# Calculate sample means and standard deviations
mean1 <- mean(sample1)
mean2 <- mean(sample2)
sd1 <- sd(sample1)
sd2 <- sd(sample2)

# Calculate the standard error of the difference
standard_error_diff <- sqrt((sd1^2 / length(sample1)) + (sd2^2 / length(sample2)))
boxplot(sample1)

hist(sample1)

boxplot(sample2)

hist(sample2)

# 2 SAMPLE POPULATION

# Example data
population_mean <- 100
population_sd <- 15
sample_size <- 30

# Generate a random sample from a normal distribution
sample_data <- rnorm(sample_size, mean = population_mean, sd = population_sd)

# Specify the hypothesized population mean
hypothesized_mean <- 100

# Calculate the standard error of the mean
standard_error <- population_sd / sqrt(sample_size)

# Calculate the z-score
z_score <- (mean(sample_data) - hypothesized_mean) / standard_error
  #double mean
  # Example data for two samples
  sample1 <- c(120, 130, 140, 150, 160)
sample2 <- c(110, 125, 135, 145, 155)

# Specify hypothesized difference in means
hypothesized_diff <- 0

# Calculate sample means and standard deviations
mean1 <- mean(sample1)
mean2 <- mean(sample2)
sd1 <- sd(sample1)
sd2 <- sd(sample2)

# Calculate the standard error of the difference
standard_error_diff <- sqrt((sd1^2 / length(sample1)) + (sd2^2 / length(sample2)))

#assumpition for 2 sample Data Distributin Similar to the one-sample z-test, the data should be approximately normally distributed. If the sample sizes are large, the Central Limit Theorem applies. #Known Population Standard Deviations (σ₁ and σ₂)The population standard deviation for the two groups being compared should be known. If they are unknown, a pooled t-test might be more appropriate. #Random Sampling The data should be obtained through a random sampling process or, in the case of experimental data, through a random assignment process. #Independent Samples:

#question 4 # f distribution # Example data for two samples

group1 <- c(120, 130, 140, 150, 160)
group2 <- c(110, 125, 135, 145, 155)

# Perform an F-test for equality of variances
f_test_result <- var.test(group1, group2)

# Print the result
print(f_test_result)

## 
##  F test to compare two variances
## 
## data:  group1 and group2
## F = 0.81967, num df = 4, denom df = 4, p-value = 0.8518
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.08534224 7.87256548
## sample estimates:
## ratio of variances 
##          0.8196721

for sample mean popluation

Example data for three groups

group1 <- c(120, 130, 140, 150, 160)
group2 <- c(110, 125, 135, 145, 155)
group3 <- c(100, 115, 125, 135, 145)

# Combine data into a data frame
data_df <- data.frame(value = c(group1, group2, group3),
                      group = rep(c("Group1", "Group2", "Group3"), each = 5))

# Perform one-way analysis of variance (ANOVA)
anova_result <- aov(value ~ group, data = data_df)

# Print the result
print(anova_result)

## Call:
##    aov(formula = value ~ group, data = data_df)
## 
## Terms:
##                    group Residuals
## Sum of Squares   653.333  3440.000
## Deg. of Freedom        2        12
## 
## Residual standard error: 16.93123
## Estimated effects may be unbalanced

# Perform an F-test for equality of variances
f_test_result <- var.test(group1, group2)

# Print the result
print(f_test_result)

## 
##  F test to compare two variances
## 
## data:  group1 and group2
## F = 0.81967, num df = 4, denom df = 4, p-value = 0.8518
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.08534224 7.87256548
## sample estimates:
## ratio of variances 
##          0.8196721

#Independence:The observations within each group should be independent of each other. Independence is crucial for the validity of ANOVA. #Normality:The populations from which the samples are drawn should be approximately normally distributed. ANOVA is robust to deviations from normality, especially with larger sample sizes. #Homogeneity of Variances (Homoscedasticity):The variances within each group should be approximately equal. Homogeneity of variances is an important assumption for the validity of ANOVA. If this assumption is violated, adjustments or alternative methods might be considered. #Random Sampling:The data should be obtained through a random sampling process or, in the case of experimental data, through a random assignment process. #question 5

#chi square

# Example data (observed frequencies)
observed <- c(25, 15, 10, 20)

# Expected frequencies (theoretical distribution)
expected <- c(20, 20, 20, 20)

# Perform a chi-square test
chi_square_result <- chisq.test(observed, p = expected / sum(expected))

# Print the result
print(chi_square_result)

## 
##  Chi-squared test for given probabilities
## 
## data:  observed
## X-squared = 7.1429, df = 3, p-value = 0.06748

# Example data for two populations (as a contingency table)
population1 <- c(30, 20, 10)
population2 <- c(10, 25, 15)

# Create a contingency table
contingency_table <- rbind(population1, population2)

# Perform a chi-square test
chi_square_result <- chisq.test(contingency_table)

# Print the result
print(chi_square_result)

## 
##  Pearson's Chi-squared test
## 
## data:  contingency_table
## X-squared = 10.735, df = 2, p-value = 0.004665

boxplot(population1)

hist(population1)

#assumpitin for chi square #Categorical Data The data must consist of categorical variables. These variables should be divided into categories or groups, and each observation should fall into one and only one category. #Independence: The observations in each category should be independent. This means that the occurrence of an observation in one category should not influence the occurrence of an observation in another category. #Expected Frequencies:The expected frequency for each category should be greater than 5. If expected frequencies are too small, the chi-square test may not provide reliable results. #Random Sampling: The data should be obtained through a random sampling process or, in the case of experimental data, through a random assignment

```

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

test of hypothesis

Muhammad Bilal DS-5th

2023-12-10

assumption:

1 random less

given data:random sample of 16 values,mue=41.5

for sample mean popluation

Example data for three groups