Organizing Qualitative Data.
Create data
gender_counts <- c(Male = 16, Female = 9)
total <- sum(gender_counts)
Calculate percentages and cumulative percentages
percentages <- round(gender_counts / total * 100, 1)
cumulative_percentages <- cumsum(percentages)
Create frequency table
frequency_table <- data.frame(
Gender = names(gender_counts),
Frequency = gender_counts,
Percentage = percentages,
Cumulative_Percentage = cumulative_percentages
)
Print frequency table
print(frequency_table)
## Gender Frequency Percentage Cumulative_Percentage
## Male Male 16 64 64
## Female Female 9 36 100
Bar chart
barplot(gender_counts,
main = "Gender Distribution",
xlab = "Gender",
ylab = "Frequency",
col = c("blue", "pink"),
ylim = c(0, max(gender_counts) + 5))

Pie chart with percentages
pie(gender_counts,
labels = paste(names(gender_counts), "\n", percentages, "%", sep = ""),
main = "Gender Distribution",
col = c("blue", "pink"))

Organizing Quantitative Variables.
Sample data: Scores of 25 students
scores <- c(45, 50, 55, 60, 60, 62, 65, 67, 70, 72,
75, 75, 77, 78, 80, 82, 85, 88, 90, 92,
95, 95, 98, 100, 100)
Define the number of classes
num_classes <- 5
Create breaks for histogram (equal width bins)
breaks <- seq(min(scores), max(scores), length.out = num_classes + 1)
Create a frequency table with defined bins
freq_table <- cut(scores, breaks, right = TRUE, include.lowest = TRUE)
table_freq <- table(freq_table)
Calculate percentages
percentages <- (table_freq / length(scores)) * 100
Calculate cumulative percentages
cumulative_percentages <- cumsum(percentages)
Create a data frame for better visualization
result <- data.frame(Class_Interval = names(table_freq),
Frequency = as.vector(table_freq),
Percentage = round(percentages, 2),
Cumulative_Percentage = round(cumulative_percentages, 2))
Print the result
print(result)
## Class_Interval Frequency Percentage.freq_table Percentage.Freq
## [45,56] [45,56] 3 [45,56] 12
## (56,67] (56,67] 5 (56,67] 20
## (67,78] (67,78] 6 (67,78] 24
## (78,89] (78,89] 4 (78,89] 16
## (89,100] (89,100] 7 (89,100] 28
## Cumulative_Percentage
## [45,56] 12
## (56,67] 32
## (67,78] 56
## (78,89] 72
## (89,100] 100
Create a histogram
hist(scores, breaks=breaks, col="lightblue", main="Histogram of Student Scores",
xlab="Scores", ylab="Frequency", border="black", right=TRUE)

Descriptive Statistics.
Sample data: Scores of 10 students
scores <- c(45, 50, 55, 60, 65, 70, 75, 80, 85, 90)
Compute descriptive statistics
mean_value <- mean(scores) # Mean
median_value <- median(scores) # Median
mode_value <- names(sort(table(scores), decreasing = TRUE))[1] # Mode
sd_value <- sd(scores) # Standard Deviation
var_value <- var(scores) # Variance
min_value <- min(scores) # Minimum
max_value <- max(scores) # Maximum
range_value <- max_value - min_value # Range
quantiles <- quantile(scores) # Quartiles
skewness_value <- sum((scores - mean_value)^3) / (length(scores) * sd_value^3) # Skewness
kurtosis_value <- sum((scores - mean_value)^4) / (length(scores) * sd_value^4) - 3 # Kurtosis
Display results
cat("Mean:", mean_value, "\n")
## Mean: 67.5
cat("Median:", median_value, "\n")
## Median: 67.5
cat("Mode:", mode_value, "\n")
## Mode: 45
cat("Standard Deviation:", sd_value, "\n")
## Standard Deviation: 15.13825
cat("Variance:", var_value, "\n")
## Variance: 229.1667
cat("Minimum:", min_value, "\n")
## Minimum: 45
cat("Maximum:", max_value, "\n")
## Maximum: 90
cat("Range:", range_value, "\n")
## Range: 45
cat("Quartiles:\n")
## Quartiles:
print(quantiles)
## 0% 25% 50% 75% 100%
## 45.00 56.25 67.50 78.75 90.00
cat("Skewness:", skewness_value, "\n")
## Skewness: 0
cat("Kurtosis:", kurtosis_value, "\n")
## Kurtosis: -1.561636
Pearson’s Correlation Coefficient.
Create vectors for Age and Weight
age <- c(7, 6, 8, 5, 6, 9)
weight <- c(12, 8, 12, 10, 11, 13)
Compute Pearson’s correlation coefficient with significance
test
correlation_test <- cor.test(age, weight, method = "pearson")
Print the results
print(correlation_test)
##
## Pearson's product-moment correlation
##
## data: age and weight
## t = 2.3355, df = 4, p-value = 0.07977
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1355843 0.9719695
## sample estimates:
## cor
## 0.7595545
R Code for Spearman’s Correlation
Create vectors for Education Level (Ordinal Data) and Income
education <- c("Preparatory", "Primary", "University", "Secondary", "Secondary", "Illiterate", "University")
income <- c(25, 10, 8, 10, 15, 50, 60)
Convert education levels into ordinal numeric
values
education_levels <- factor(education, levels = c("Illiterate", "Primary", "Preparatory", "Secondary", "University"), ordered = TRUE)
education_numeric <- as.numeric(education_levels)
Compute Spearman’s correlation coefficient
spearman_correlation <- cor(education_numeric, income, method = "spearman")
Print results
cat("Spearman's Correlation Coefficient:", spearman_correlation, "\n")
## Spearman's Correlation Coefficient: -0.1743193
print(spearman_test)
##
## Spearman's rank correlation rho
##
## data: education_numeric and income
## S = 65.762, p-value = 0.7085
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1743193
Simple Linear Regression Model.
Create vectors for Age (x) and Weight (y)
age <- c(7, 6, 8, 5, 6, 9) # Independent Variable (x)
weight <- c(12, 8, 12, 10, 11, 13) # Dependent Variable (y)
Fit the Simple Linear Regression Model
model <- lm(weight ~ age)
Print Model Summary
summary(model)
##
## Call:
## lm(formula = weight ~ age)
##
## Residuals:
## 1 2 3 4 5 6
## 8.462e-01 -2.231e+00 -7.692e-02 6.923e-01 7.692e-01 -5.551e-17
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.6923 2.7525 1.705 0.1634
## age 0.9231 0.3952 2.335 0.0798 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.301 on 4 degrees of freedom
## Multiple R-squared: 0.5769, Adjusted R-squared: 0.4712
## F-statistic: 5.455 on 1 and 4 DF, p-value: 0.07977
Plot the data and regression line
plot(age, weight, main="Simple Linear Regression: Age vs. Weight",
xlab="Age", ylab="Weight", pch=16, col="blue")
abline(model, col="red", lwd=2) # Add regression line

R Code for One-Sample t-Test
Sample data: Heights of 10 students
height <- c(172, 168, 175, 170, 169, 173, 171, 174, 167, 176)
Print the result
print(t_test_result)
##
## One Sample t-test
##
## data: height
## t = 1.5667, df = 9, p-value = 0.1516
## alternative hypothesis: true mean is not equal to 170
## 95 percent confidence interval:
## 169.3341 173.6659
## sample estimates:
## mean of x
## 171.5
R Code for Independent Sample t-Test
Sample data: Test scores for two groups
workshop <- c(85, 87, 88, 86, 84) # Group 1: Attended Workshop
no_workshop <- c(80, 82, 81, 83, 79) # Group 2: Did not attend Workshop
Print the result
print(t_test_result)
##
## Two Sample t-test
##
## data: workshop and no_workshop
## t = 5, df = 8, p-value = 0.001053
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.693996 7.306004
## sample estimates:
## mean of x mean of y
## 86 81
R Code for Paired Sample t-Test
Sample data: Body fat percentage before and after the workout
program
before <- c(25, 30, 28, 27, 26) # Before workout
after <- c(23, 28, 26, 25, 20) # After workout
Print the result
print(t_test_result)
##
## Paired t-test
##
## data: before and after
## t = 3.5, df = 4, p-value = 0.0249
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.5788439 5.0211561
## sample estimates:
## mean difference
## 2.8
R Code for One-Way ANOVA
Sample data: Plant Growth (cm) under different Fertilizers
fertilizer <- factor(rep(c("A", "B", "C"), each = 5)) # Grouping factor
growth <- c(10, 11, 9, 10, 12, # Fertilizer A
12, 13, 11, 12, 13, # Fertilizer B
15, 14, 16, 15, 14) # Fertilizer C
Print ANOVA summary
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## fertilizer 2 48.93 24.47 27.18 3.49e-05 ***
## Residuals 12 10.80 0.90
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Pearson’s Chi Square.
Create the observed data matrix
data <- matrix(c(90, 60, 30, 120), nrow = 2, byrow = TRUE)
Add row and column names for clarity
dimnames(data) <- list("Smoking Status" = c("Smoker", "Non-Smoker"),
"Lung Cancer" = c("Yes", "No"))
Print the results of the test
test
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data
## X-squared = 48.347, df = 1, p-value = 3.57e-12