#=========================================== # R COMPLETE STATISTICAL FUNCTIONS PRACTICE # Author: Hirwa #===========================================
if(!require(dplyr)) install.packages(“dplyr”) library(dplyr)
#=========================================== # 1. IMPORT DATASET #=========================================== data(mtcars) # Built-in dataset df <- mtcars
head(df)
str(df)
#=========================================== # 2. BASIC STATISTICAL FUNCTIONS #===========================================
mean(df$mpg)
median(df$mpg)
sd(df$mpg)
var(df$mpg)
min(df$mpg)
max(df$mpg)
range(df$mpg)
sum(df$mpg)
quantile(df$mpg)
summary(df)
#=========================================== # 3. APPLY FUNCTION #===========================================
apply(df, 2, mean)
apply(df, 2, sum)
apply(df, 1, sum)
#=========================================== # 4. Lapply() #===========================================
lapply(df, function(x) x^2)
lapply(df, mean)
#=========================================== # 5. Sapply() #===========================================
sapply(df, mean)
sapply(df, max)
#=========================================== # 6. Vapply() # Safer version of sapply #===========================================
vapply(df, mean, numeric(1))
vapply(df, max, numeric(1))
#=========================================== # 7. Tapply() # Grouping calculations #===========================================
tapply(df\(mpg, df\)cyl, mean)
tapply(df\(hp, df\)cyl, sum)
#=========================================== # 8. Mapply() # Multiple vectors #===========================================
x <- c(1,2,3,4) y <- c(10,20,30,40)
mapply(function(a,b) a+b, x, y)
mapply(function(a,b) a*b, x, y)
#=========================================== # 9. SORTING #===========================================
sort(df$mpg)
sort(df$mpg, decreasing = TRUE)
#=========================================== # 10. CORRELATION #===========================================
cor(df\(mpg, df\)hp)
cor(df\(wt, df\)mpg)
cor(df)
#=========================================== # 11. GROUP_BY() + PIPE %>% #===========================================
df %>% group_by(cyl) %>% summarise( avg_mpg = mean(mpg), avg_hp = mean(hp), total_cars = n() )
df %>% group_by(gear) %>% summarise( avg_weight = mean(wt), max_hp = max(hp) )
#=========================================== # 12. FILTER + PIPE #===========================================
df %>% filter(mpg > 20)
df %>% filter(hp > 100)
#=========================================== # 13. ARRANGE #===========================================
df %>% arrange(mpg)
df %>% arrange(desc(hp))
#=========================================== # 14. SELECT #===========================================
df %>% select(mpg, hp, wt)
#=========================================== # 15. MUTATE #===========================================
df %>% mutate( efficiency = mpg / wt )
#=========================================== # 16. ERROR HANDLING #===========================================
tryCatch({ log(“abc”) }, error = function(e){ print(“Error Found”) print(e) })
f1 <- function() f2() f2 <- function() stop(“Something went wrong”)
try(f1()) traceback()
#=========================================== # 17. ADVANCED SUMMARY #===========================================
sum(is.na(df))
unique(df$cyl)
table(df$cyl)
dim(df)
nrow(df)
ncol(df)
#=========================================== # 18. HISTOGRAM #===========================================
hist(df$mpg, main = “Histogram of MPG”, xlab = “Miles Per Gallon”)
#=========================================== # 19. BOXPLOT #===========================================
boxplot(df$mpg, main = “MPG Boxplot”)
#=========================================== # 20. FINAL REPORT #===========================================
cat(“Analysis Completed Successfully!”)