#=========================================== # R COMPLETE STATISTICAL FUNCTIONS PRACTICE # Author: Hirwa #===========================================

Install package if not installed

if(!require(dplyr)) install.packages(“dplyr”) library(dplyr)

#=========================================== # 1. IMPORT DATASET #=========================================== data(mtcars) # Built-in dataset df <- mtcars

View first rows

head(df)

Structure of dataset

str(df)

#=========================================== # 2. BASIC STATISTICAL FUNCTIONS #===========================================

Mean

mean(df$mpg)

Median

median(df$mpg)

Standard Deviation

sd(df$mpg)

Variance

var(df$mpg)

Minimum

min(df$mpg)

Maximum

max(df$mpg)

Range

range(df$mpg)

Sum

sum(df$mpg)

Quantiles

quantile(df$mpg)

Summary

summary(df)

#=========================================== # 3. APPLY FUNCTION #===========================================

Mean of each column

apply(df, 2, mean)

Sum of each column

apply(df, 2, sum)

Row sums

apply(df, 1, sum)

#=========================================== # 4. Lapply() #===========================================

Square each column

lapply(df, function(x) x^2)

Mean of all columns

lapply(df, mean)

#=========================================== # 5. Sapply() #===========================================

Mean of all columns

sapply(df, mean)

Max value in each column

sapply(df, max)

#=========================================== # 6. Vapply() # Safer version of sapply #===========================================

vapply(df, mean, numeric(1))

vapply(df, max, numeric(1))

#=========================================== # 7. Tapply() # Grouping calculations #===========================================

Mean mpg by cylinders

tapply(df\(mpg, df\)cyl, mean)

Sum horsepower by cylinders

tapply(df\(hp, df\)cyl, sum)

#=========================================== # 8. Mapply() # Multiple vectors #===========================================

x <- c(1,2,3,4) y <- c(10,20,30,40)

Add vectors

mapply(function(a,b) a+b, x, y)

Multiply vectors

mapply(function(a,b) a*b, x, y)

#=========================================== # 9. SORTING #===========================================

Sort mpg ascending

sort(df$mpg)

Sort descending

sort(df$mpg, decreasing = TRUE)

#=========================================== # 10. CORRELATION #===========================================

cor(df\(mpg, df\)hp)

cor(df\(wt, df\)mpg)

Correlation matrix

cor(df)

#=========================================== # 11. GROUP_BY() + PIPE %>% #===========================================

df %>% group_by(cyl) %>% summarise( avg_mpg = mean(mpg), avg_hp = mean(hp), total_cars = n() )

Group by gear

df %>% group_by(gear) %>% summarise( avg_weight = mean(wt), max_hp = max(hp) )

#=========================================== # 12. FILTER + PIPE #===========================================

Cars with mpg > 20

df %>% filter(mpg > 20)

Cars with horsepower > 100

df %>% filter(hp > 100)

#=========================================== # 13. ARRANGE #===========================================

Sort by mpg

df %>% arrange(mpg)

Descending horsepower

df %>% arrange(desc(hp))

#=========================================== # 14. SELECT #===========================================

df %>% select(mpg, hp, wt)

#=========================================== # 15. MUTATE #===========================================

df %>% mutate( efficiency = mpg / wt )

#=========================================== # 16. ERROR HANDLING #===========================================

Example error

tryCatch({ log(“abc”) }, error = function(e){ print(“Error Found”) print(e) })

Traceback example

f1 <- function() f2() f2 <- function() stop(“Something went wrong”)

try(f1()) traceback()

#=========================================== # 17. ADVANCED SUMMARY #===========================================

Count missing values

sum(is.na(df))

Unique cylinder values

unique(df$cyl)

Table frequency

table(df$cyl)

Dimensions

dim(df)

Number of rows

nrow(df)

Number of columns

ncol(df)

#=========================================== # 18. HISTOGRAM #===========================================

hist(df$mpg, main = “Histogram of MPG”, xlab = “Miles Per Gallon”)

#=========================================== # 19. BOXPLOT #===========================================

boxplot(df$mpg, main = “MPG Boxplot”)

#=========================================== # 20. FINAL REPORT #===========================================

cat(“Analysis Completed Successfully!”)