dat1=read.csv("C:/Users/Lenovo/Downloads/tablet quality control.csv",header=TRUE)
head(dat1)
attach(dat1)    #Attach the dataset so the variable names can be used directly
Company=as.factor(Company)
Conformity = as.factor(Conformity)
str(dat1)   # Check the structure of the dataset
## 'data.frame':    100 obs. of  4 variables:
##  $ ID           : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Company      : chr  "X" "X" "X" "X" ...
##  $ Conformity   : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ Concentration: num  100.7 98.5 103.8 99.6 97.1 ...
summary(dat1)
##        ID           Company           Conformity        Concentration   
##  Min.   :  1.00   Length:100         Length:100         Min.   : 90.40  
##  1st Qu.: 25.75   Class :character   Class :character   1st Qu.: 94.47  
##  Median : 50.50   Mode  :character   Mode  :character   Median : 97.50  
##  Mean   : 50.50                                         Mean   : 97.62  
##  3rd Qu.: 75.25                                         3rd Qu.:100.15  
##  Max.   :100.00                                         Max.   :106.90

#3.2 Probability of event

tab=table(Company,  Conformity)   #count the number of conforming and non-conforming tablets with function table()
tab
##        Conformity
## Company No Yes
##       X 26  24
##       Y 12  38
addmargins(tab)   # Add row totals and column totals with the function of addmargins
##        Conformity
## Company  No Yes Sum
##     X    26  24  50
##     Y    12  38  50
##     Sum  38  62 100
#Probability of E1
PrE1=50/100
PrE1
## [1] 0.5
#Probability of E2
PrE2=62/100
PrE2
## [1] 0.62
#Probability of E1 and E2
PrE12=PrE1+PrE2-24/100
PrE12
## [1] 0.88
#Probability of union of complement E1, E2
PrE1C=1-PrE1
PrE2C=1-PrE2
PrE1C2C=PrE1C+PrE2C-12/100
PrE1C2C
## [1] 0.76

#3.3 Binomial Distribution

#Probability of observing exact 8 tablets conforming (Yes)
dbinom(8,size=10,prob=PrE2)  
## [1] 0.1418774
#probability of observing 8 or fewer conforming tablets
pbinom(8,size=10,prob=PrE2)  
## [1] 0.9401661
#Plot binominal distributions
x=0:10
plot(x,dbinom(x,size=10,prob=PrE2),type="h",lwd=3,xlab="Number of conforming tablets",ylab="Binominal Distribution ofconforming tablets")

#3.4 Normal Distribution

par(mfrow=c(1,3))
#Histogram
hist(Concentration,col="blue",main="Histogram of Tablet Concentration",xlab="Concentration") # density curve
plot(density(Concentration),main="Density Curve of Tablet Concentration",xlab="Concentration",ylab="Density",lwd=2)
#Q-Q plot
qqnorm(Concentration,main="Q-Q Plot of Tablet Concentration")
qqline(Concentration,col="red",lwd=2)

#The probability that concentration is less than 98
pnorm(98, mean=mean(Concentration), sd=sd(Concentration))
## [1] 0.5383799
#The probability that concentration is greater than 102
pnorm(102, mean=mean(Concentration), sd=sd(Concentration), lower.tail=FALSE)
## [1] 0.1310622
#The probability that concentration is between 98 and 102
pnorm(102, mean=mean(Concentration), sd=sd(Concentration)) 
## [1] 0.8689378
pnorm(98, mean=mean(Concentration), sd=sd(Concentration))
## [1] 0.5383799
#Find the concentration value corresponding to the upper 5% with function
qnorm(0.95, mean=mean(Concentration), sd=sd(Concentration))
## [1] 104.0427

#3.5 Confidence Interval for the Mean

#Confide interval for the mean()
mean_conc=tapply(Concentration, Company, mean)
mean_conc
##      X      Y 
## 97.186 98.062
sd_conc=tapply(Concentration, Company, sd)
sd_conc
##        X        Y 
## 4.463183 3.233491
n_conc=tapply(Concentration, Company, length)
lower_CI=mean_conc-qt(0.975,n_conc)*(sd_conc/sqrt(n_conc))
lower_CI
##        X        Y 
## 95.91822 97.14352
upper_CI=mean_conc+qt(0.975,(n_conc-1))*(sd_conc/sqrt(n_conc))
upper_CI
##        X        Y 
## 98.45442 98.98095
#Combine into one table 
ci_table=data.frame(Company = names(mean_conc),n =as.vector(n_conc),Mean 
= as.vector(mean_conc),SD = as.vector(sd_conc),SE=  as.vector(sd_conc),Lower_95Cl=as.vector(lower_CI),Upper_95Cl=as.vector(upper_CI))
ci_table