library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(infer)
library(knitr)
library(shiny)
storcold <- read.csv("Cold_Storage_Temp_Data.csv")
summary(storcold)
## Season Month Date Temperature
## Rainy :122 Aug : 31 Min. : 1.00 Min. :1.700
## Summer:120 Dec : 31 1st Qu.: 8.00 1st Qu.:2.500
## Winter:123 Jan : 31 Median :16.00 Median :2.900
## Jul : 31 Mean :15.72 Mean :2.963
## Mar : 31 3rd Qu.:23.00 3rd Qu.:3.300
## May : 31 Max. :31.00 Max. :5.000
## (Other):179
Season_mean = storcold %>% group_by(Season) %>% summarise(Mean.Temperature = mean(Temperature))
# The mean temperatures season wise
print(Season_mean)
## # A tibble: 3 x 2
## Season Mean.Temperature
## <fct> <dbl>
## 1 Rainy 3.04
## 2 Summer 3.15
## 3 Winter 2.70
Mean_year_temperature = mean(storcold$Temperature)
print(Mean_year_temperature)
## [1] 2.96274
The Mean temperature for full year is 2.96 degree C
SD_YearTemp = sd(storcold$Temperature)
print( SD_YearTemp)
## [1] 0.508589
The Standard Deviation for full year is 0.508 deg C
Lessthan2C = pnorm(2, mean = Mean_year_temperature, sd = SD_YearTemp)
print(Lessthan2C)
## [1] 0.02918146
The probability of temperature going below 2 deg c is 0.02918146
Above4C = 1 - pnorm(4, Mean_year_temperature, SD_YearTemp)
print(Above4C)
## [1] 0.02070077
Probability of temperature going above 4 deg is 0.02070077
Solution Let say the range of Temperature (T) 2 deg c <-> 4 deg c be R
The penalty cases is defined by Case 1 - The probability P(R) of temperature going outside above range is 2.5% <= P(R) <= 5% = 10% of Annual Maitenance Cost ~= (lower bound) 0.025 <= P(R) <= 0.05 (upper bound)
Case 2 - In case P(R) > 5% then it is 25% of Annual Maintenance Cost
Now probability of observing temperature (T) greater than 4 degC = > P(T>4) = 0.02070077 ( Well within 0.05 )
Probability of observing temperature (T) less than 2 degC = > P(T<2) = 0.02918146 ( outside of lower bound of Case 1 i.e. 0.025 )
P(T<2) > P(T lower bound)
Hence AMC company will be fined 10% of Annual Maintenance Cost
Mar_ColdST <- read_csv("Cold_Storage_Mar2018.csv")
## Parsed with column specification:
## cols(
## Season = col_character(),
## Month = col_character(),
## Date = col_double(),
## Temperature = col_double()
## )
summary(Mar_ColdST)
## Season Month Date Temperature
## Length:35 Length:35 Min. : 1.0 Min. :3.800
## Class :character Class :character 1st Qu.: 9.5 1st Qu.:3.900
## Mode :character Mode :character Median :14.0 Median :3.900
## Mean :14.4 Mean :3.974
## 3rd Qu.:19.5 3rd Qu.:4.100
## Max. :28.0 Max. :4.600
# Density plot for sample distribution of Temperatures
ggplot(Mar_ColdST,aes(Temperature)) +
geom_density() +
geom_vline(xintercept = 3.9, color = "red")
Hypothesis will be defined for both Z and T test as (both will carry the same hypothesis)
Ho mu <= 3.9 Ha mu > 3.9
Null hypothesis Ho = Average temperature will be less than or equivalent to upper limit of 3.9 degC
Alternate hypothesis Ha = Average temp has risen more than 3.9 degC
As customer I have different opinion than Supervisor of Plant . I know that food item was smelling sour hence temperature must have breached some threshold limit of referigeration plant
As Supervisor would want to maintain status quo that temperature has not crossed 3.9 degC as he had ordered. So from Customer standpoint I want to prove against the status quo of 3.9 degC
This will be a right tailed test as mu > 3.9 is what We are trying to prove
sample_mean = mean(Mar_ColdST$Temperature)
print (sample_mean)
## [1] 3.974286
sample_sd = sd(Mar_ColdST$Temperature)
print(sample_sd)
## [1] 0.159674
std_error = sample_sd / (sqrt(35))
print(std_error)
## [1] 0.02698984
mu = 3.9
z_test = (sample_mean - mu ) / std_error
print(z_test)
## [1] 2.752359
z_critical = 1.28
z_test >= z_critical
## [1] TRUE
Since z-statistic value = 2.75 is greater than z-critical value of 1.28 at significance level alpha =0.1 Hence we Reject the null hypotheis
We accept the Alternate hypothesis that temperature has indeed risen more than 3.9 degC
t_test = t.test(Mar_ColdST$Temperature, mu = 3.9, conf.level = 0.90, alternative = "greater")
print(t_test)
##
## One Sample t-test
##
## data: Mar_ColdST$Temperature
## t = 2.7524, df = 34, p-value = 0.004711
## alternative hypothesis: true mean is greater than 3.9
## 90 percent confidence interval:
## 3.939011 Inf
## sample estimates:
## mean of x
## 3.974286
p - value for t-test is 0.004711 with 34 degrees of freedom
Since p - value is less than alpha = 0.10 hence we reject the Null Hypothesis Also 90% confidence level says the story that average temperature of population days would have been somewhere between 3.93 and Infinity
Accepting the Alternate hypotheis - Average temperature has risen more than upper limit of 3.9 degC
Both the tests ( z as well as t ) proved that Temperature had been well over 3.9 degC against the claims of Supervisor who had been vigilant about not breaching the 3.9 degC
The sample size of 35 days is optimum enough to conduct both z and t tests And at this optimum size we find that t-statistic value and z-statistic value are almost similar
t-statistic = z-statistic = 2.75 ( another striking similarity to reject the Null hypotheis )