setwd("C:/Users/Nandan Hegde/OneDrive/Documents/MSU_Grad_Studies/STT810_class_assignments/ICA6")
df_pizza = read.csv("pizza.csv")
# a.Calculate the covariance and correlation between moisture and cal using cov() and cor()
covariance = cov(df_pizza$mois, df_pizza$cal)
print(paste("Covariance between mois and cal: ", covariance))
## [1] "Covariance between mois and cal: -4.5279181270903"
correlation = cor(df_pizza$mois, df_pizza$cal)
print(paste("Correlation between mois and cal: ", correlation))
## [1] "Correlation between mois and cal: -0.764440543783477"
# b.Verify the covariance value by calculating E(XY) – E(X)E(Y) (will not be exact)
EXY = mean(df_pizza$mois * df_pizza$cal)
EX = mean(df_pizza$mois)
EY = mean(df_pizza$cal)
verify_covariance = EXY - (EX * EY)
cat("Calculation of covariance:", verify_covariance, "\n")
## Calculation of covariance: -4.512825
# c.Verify the correlation value by diving the covariance by the product of standard deviations
std_mois = sd(df_pizza$mois)
std_cal = sd(df_pizza$cal)
verify_correlation = verify_covariance / (std_mois * std_cal)
cat("Manual calculation of correlation:", verify_correlation, "\n")
## Manual calculation of correlation: -0.7618924
# d.Examine the entire correlation matrix. Which variable has the strongest correlation with cal?
numeric_cols = sapply(df_pizza, is.numeric)
correlation_matrix = cor(df_pizza[, numeric_cols])
cat("Correlation matrix:\n")
## Correlation matrix:
print(correlation_matrix)
## id mois prot fat ash sodium
## id 1.00000000 0.03259505 -0.07648546 -0.00486006 -0.03407156 -0.01515608
## mois 0.03259505 1.00000000 0.36024768 -0.17131821 0.26555552 -0.10227890
## prot -0.07648546 0.36024768 1.00000000 0.49800167 0.82384370 0.42912952
## fat -0.00486006 -0.17131821 0.49800167 1.00000000 0.79163396 0.93332522
## ash -0.03407156 0.26555552 0.82384370 0.79163396 1.00000000 0.80812215
## sodium -0.01515608 -0.10227890 0.42912952 0.93332522 0.80812215 1.00000000
## carb 0.01496614 -0.59180165 -0.85354226 -0.64023817 -0.89898837 -0.62017634
## cal -0.02120862 -0.76444054 0.07025810 0.76456710 0.32646845 0.67195750
## carb cal
## id 0.01496614 -0.02120862
## mois -0.59180165 -0.76444054
## prot -0.85354226 0.07025810
## fat -0.64023817 0.76456710
## ash -0.89898837 0.32646845
## sodium -0.62017634 0.67195750
## carb 1.00000000 -0.02348458
## cal -0.02348458 1.00000000
correlation_matrix = correlation_matrix["cal", -which(colnames(correlation_matrix) == "cal")]
# Identify the strongest positive correlation
strongest_positive_correlation = max(correlation_matrix)
strongest_positive_variable = names(which.max(correlation_matrix))
# Identify the strongest negative correlation
strongest_negative_correlation = min(correlation_matrix)
strongest_negative_variable = names(which.min(correlation_matrix))
cat("Variable with the strongest positive correlation with calories:", strongest_positive_variable,
"with correlation:", strongest_positive_correlation, "\n")
## Variable with the strongest positive correlation with calories: fat with correlation: 0.7645671
cat("Variable with the strongest negative correlation with calories:", strongest_negative_variable,
"with correlation:", strongest_negative_correlation, "\n")
## Variable with the strongest negative correlation with calories: mois with correlation: -0.7644405
# Given parameters
mean_value = 8
std_value = 2
n_simulations = 1000000
# Generate two simulations of a normal distribution
set.seed(123)
x = rnorm(n_simulations, mean = mean_value, sd = std_value)
y = rnorm(n_simulations, mean = mean_value, sd = std_value)
# a.What is the covariance between x and y? Why?
covariance_xy = cov(x, y)
cat("Covariance between x and y: ", covariance_xy, "\n")
## Covariance between x and y: -0.003257275
# b.Define z = x + y and verify Var(z) = Var(x) + Var(y) = 2 * Var(x)
z = x + y
variance_x = var(x)
variance_y = var(y)
variance_z = var(z)
cat("Variance of x: ", variance_x, "\n")
## Variance of x: 3.999417
cat("Variance of y: ", variance_y, "\n")
## Variance of y: 3.998049
cat("Variance of z: ", variance_z, "\n")
## Variance of z: 7.990951
cat("2 * Variance of x: ", 2 * variance_x, "\n")
## 2 * Variance of x: 7.998833
# c.What is the covariance and correlation between z and x?
covariance_zx = cov(z, x)
correlation_zx = cor(z, x)
cat("Covariance between z and x: ", covariance_zx, "\n")
## Covariance between z and x: 3.996159
cat("Correlation between z and x: ", correlation_zx, "\n")
## Correlation between z and x: 0.7068793
# d. Define w = z + x and verify Var(w) = Var(z) + Var(x) + 2 * Cov(z, x)
w = z + x
variance_w = var(w)
calculated_variance_w = variance_z + variance_x + 2 * covariance_zx
cat("Variance of w: ", variance_w, "\n")
## Variance of w: 19.98269
cat("Calculated variance of w (using the formula): ", calculated_variance_w, "\n")
## Calculated variance of w (using the formula): 19.98269