data <- read.csv("C:/Users/Abdul Qudoos/Downloads/R project.csv")
Structure of the data
str(data)
## 'data.frame': 28 obs. of 7 variables:
## $ Observation: int 1 2 3 4 5 6 7 8 9 10 ...
## $ Brand : chr "Canon" "Canon" "Canon" "Canon" ...
## $ Price_. : int 264 160 240 160 144 160 160 104 104 88 ...
## $ Megapixels : int 10 12 12 10 12 12 14 10 12 16 ...
## $ Weight_oz : int 7 5 7 6 5 7 5 7 5 5 ...
## $ Score : int 70 70 69 66 66 65 64 64 63 59 ...
## $ Brand.1 : int 1 1 1 1 1 1 1 1 1 1 ...
summary statistics for each variable
summary(data)
## Observation Brand Price_. Megapixels
## Min. : 1.00 Length:28 Min. : 64.0 Min. :10.00
## 1st Qu.: 7.75 Class :character 1st Qu.: 88.0 1st Qu.:12.00
## Median :14.50 Mode :character Median :128.0 Median :12.00
## Mean :14.50 Mean :140.3 Mean :12.86
## 3rd Qu.:21.25 3rd Qu.:160.0 3rd Qu.:14.00
## Max. :28.00 Max. :320.0 Max. :16.00
## Weight_oz Score Brand.1
## Min. :4.000 Min. :46.00 Min. :0.0000
## 1st Qu.:5.000 1st Qu.:56.00 1st Qu.:0.0000
## Median :6.000 Median :60.50 Median :0.0000
## Mean :5.821 Mean :60.36 Mean :0.4643
## 3rd Qu.:7.000 3rd Qu.:65.25 3rd Qu.:1.0000
## Max. :7.000 Max. :70.00 Max. :1.0000
Checking the random sampling
set.seed(123)
sample_nikon <- sample(data$Price[data$Brand == "Nikon"], 10)
sample_canon <- sample(data$Price[data$Brand == "Canon"], 10)
# Check histograms for randomness
hist(sample_nikon, main = "Histogram of Nikon Prices (Sample)")
hist(sample_canon, main = "Histogram of Canon Prices (Sample)")
# Separate data for Nikon and Canon
nikon_prices <- data$Price[data$Brand == "Nikon"]
canon_prices <- data$Price[data$Brand == "Canon"]
# Perform independent two-sample t-test
t_test_result <- t.test(nikon_prices, canon_prices)
# Display the result
t_test_result
##
## Welch Two Sample t-test
##
## data: nikon_prices and canon_prices
## t = 0.08931, df = 25.993, p-value = 0.9295
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -49.67709 54.18991
## sample estimates:
## mean of x mean of y
## 141.3333 139.0769
# Code data into a new variable based on Weight_oz
data$Weight_Category <- cut(data$Weight_oz, breaks = c(3, 5, 6, 7), labels = c("Light", "Medium", "Heavy"))
# Check the distribution of the new variable
table(data$Weight_Category)
##
## Light Medium Heavy
## 12 7 9
# Load required packages
library(stats)
library(multcomp)
## Warning: package 'multcomp' was built under R version 4.3.2
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.3.2
## Loading required package: survival
## Loading required package: TH.data
## Warning: package 'TH.data' was built under R version 4.3.2
## Loading required package: MASS
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
# Ensure data types
data$Price <- as.numeric(data$Price)
data$Weight_oz <- as.numeric(data$Weight_oz)
# Check data structure
str(data)
## 'data.frame': 28 obs. of 9 variables:
## $ Observation : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Brand : chr "Canon" "Canon" "Canon" "Canon" ...
## $ Price_. : int 264 160 240 160 144 160 160 104 104 88 ...
## $ Megapixels : int 10 12 12 10 12 12 14 10 12 16 ...
## $ Weight_oz : num 7 5 7 6 5 7 5 7 5 5 ...
## $ Score : int 70 70 69 66 66 65 64 64 63 59 ...
## $ Brand.1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Weight_Category: Factor w/ 3 levels "Light","Medium",..: 3 1 3 2 1 3 1 3 1 1 ...
## $ Price : num 264 160 240 160 144 160 160 104 104 88 ...
# Convert weight to categories
data$Weight_Cat <- cut(data$Weight_oz, breaks = c(3, 5, 6, 7), labels = c("Light Weight", "Medium Weight", "Heavy Weight"))
# Print data structure
str(data)
## 'data.frame': 28 obs. of 10 variables:
## $ Observation : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Brand : chr "Canon" "Canon" "Canon" "Canon" ...
## $ Price_. : int 264 160 240 160 144 160 160 104 104 88 ...
## $ Megapixels : int 10 12 12 10 12 12 14 10 12 16 ...
## $ Weight_oz : num 7 5 7 6 5 7 5 7 5 5 ...
## $ Score : int 70 70 69 66 66 65 64 64 63 59 ...
## $ Brand.1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Weight_Category: Factor w/ 3 levels "Light","Medium",..: 3 1 3 2 1 3 1 3 1 1 ...
## $ Price : num 264 160 240 160 144 160 160 104 104 88 ...
## $ Weight_Cat : Factor w/ 3 levels "Light Weight",..: 3 1 3 2 1 3 1 3 1 1 ...
# Check for empty data
if (nrow(data) == 0) {
stop("Data frame is empty. Please ensure data is loaded correctly.")
}
# Check for factor encoding
if (!is.factor(data$Weight_Cat)) {
data$Weight_Cat <- as.factor(data$Weight_Cat)
}
# Perform one-way ANOVA
anova_result <- aov(Price ~ as.factor(Weight_Cat), data = data)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Weight_Cat) 2 13124 6562 1.557 0.23
## Residuals 25 105338 4214
# Install required packages (if not already installed)
if (!requireNamespace("ggplot2", quietly = TRUE)) install.packages("ggplot2")
if (!requireNamespace("MASS", quietly = TRUE)) install.packages("MASS")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
library(MASS) # For the truehist function
# Function to generate histogram and Q-Q plot
check_normality <- function(variable, data) {
# Extract current variable
current_variable <- names(data)[variable]
# Compute standardized scores
standardized_values <- scale(data[, variable])
# Create the histogram using truehist for better visualization
truehist(standardized_values, main = paste0("Histogram of ", current_variable), col = "skyblue")
# Q-Q plot
qqnorm(standardized_values, main = paste0("Q-Q Plot of ", current_variable))
qqline(standardized_values, col = 2)
# Perform and print Shapiro-Wilk test
shapiro_test <- shapiro.test(standardized_values)
cat(paste0("Shapiro-Wilk test for normality of ", current_variable, ":\n"))
print(shapiro_test)
}
# Check normality for all numeric variables
for (i in 1:ncol(data)) {
if (is.numeric(data[, i])) {
check_normality(i, data)
}
}
## Shapiro-Wilk test for normality of Observation:
##
## Shapiro-Wilk normality test
##
## data: standardized_values
## W = 0.95784, p-value = 0.3095
## Shapiro-Wilk test for normality of Price_.:
##
## Shapiro-Wilk normality test
##
## data: standardized_values
## W = 0.89739, p-value = 0.009945
## Shapiro-Wilk test for normality of Megapixels:
##
## Shapiro-Wilk normality test
##
## data: standardized_values
## W = 0.87756, p-value = 0.003549
## Shapiro-Wilk test for normality of Weight_oz:
##
## Shapiro-Wilk normality test
##
## data: standardized_values
## W = 0.84974, p-value = 0.0009235
## Shapiro-Wilk test for normality of Score:
##
## Shapiro-Wilk normality test
##
## data: standardized_values
## W = 0.95719, p-value = 0.2985
## Shapiro-Wilk test for normality of Brand.1:
##
## Shapiro-Wilk normality test
##
## data: standardized_values
## W = 0.63672, p-value = 4.084e-07
## Shapiro-Wilk test for normality of Price:
##
## Shapiro-Wilk normality test
##
## data: standardized_values
## W = 0.89739, p-value = 0.009945
# Install required package (if not already installed)
if (!requireNamespace("GGally", quietly = TRUE)) install.packages("GGally")
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(GGally)
## Warning: package 'GGally' was built under R version 4.3.2
# Select variables
variables <- c("Megapixels", "Weight_oz", "Score")
# Check if selected variables are present in the dataset
missing_vars <- setdiff(variables, names(data))
if (length(missing_vars) > 0) {
stop(paste("The following variables are missing from the dataset:", paste(missing_vars, collapse = ", ")))
}
# Check for missing values in selected variables
missing_values <- any(is.na(data[, variables]))
if (missing_values) {
stop("There are missing values in the selected variables.")
}
# 1. Create matrix scatter plot with Spearman's correlation
ggpairs(data, columns = variables, lower = list(continuous = "cor", diag = "hist")) +
theme_bw() +
labs(diag = ~correlation)
# 2. Compute correlation matrix using Spearman's method
correlation_matrix <- cor(data[, variables], method = "spearman")
# Print correlation matrix
print(correlation_matrix)
## Megapixels Weight_oz Score
## Megapixels 1.00000000 -0.2092513 -0.04233974
## Weight_oz -0.20925132 1.0000000 0.21404556
## Score -0.04233974 0.2140456 1.00000000