For Loop || Simple to Advance

For Loop

Simple For Loop Examples

Ex 1

for(i in 1:5){
  
  print(i)
  
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5

Ex 2

names=c("arun","amita","revant","hershie")

for(name in names){
  
  print(name)
  
}
[1] "arun"
[1] "amita"
[1] "revant"
[1] "hershie"

Ex 3

for(i in 1:6){
  
  squared = i^2
  
  print(squared)
  
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
[1] 36

Ex 4

squares=c()

for(i in 1:5){
  
  squares=c(squares, i^2)
  
}

print(squares)
[1]  1  4  9 16 25

Ex 5

for(i in 1:10){
  
  if(i %% 2==0){
    
    print(paste(i," is even number"))
  
  } else {
      
    print(paste(i, " is odd number"))
    
    }
}
[1] "1  is odd number"
[1] "2  is even number"
[1] "3  is odd number"
[1] "4  is even number"
[1] "5  is odd number"
[1] "6  is even number"
[1] "7  is odd number"
[1] "8  is even number"
[1] "9  is odd number"
[1] "10  is even number"

Ex 6

for(i in 1:5){
  
  for(j in 1:5){
    
    print(paste(i,"x",j,"=",i*j))
  }
  
}
[1] "1 x 1 = 1"
[1] "1 x 2 = 2"
[1] "1 x 3 = 3"
[1] "1 x 4 = 4"
[1] "1 x 5 = 5"
[1] "2 x 1 = 2"
[1] "2 x 2 = 4"
[1] "2 x 3 = 6"
[1] "2 x 4 = 8"
[1] "2 x 5 = 10"
[1] "3 x 1 = 3"
[1] "3 x 2 = 6"
[1] "3 x 3 = 9"
[1] "3 x 4 = 12"
[1] "3 x 5 = 15"
[1] "4 x 1 = 4"
[1] "4 x 2 = 8"
[1] "4 x 3 = 12"
[1] "4 x 4 = 16"
[1] "4 x 5 = 20"
[1] "5 x 1 = 5"
[1] "5 x 2 = 10"
[1] "5 x 3 = 15"
[1] "5 x 4 = 20"
[1] "5 x 5 = 25"

Ex 7

my_list = list(a=1:3, b=4:6, c=7:9)

for(item in my_list){
  
  print(sum(item))
  
}
[1] 6
[1] 15
[1] 24

Ex 8

for(i in 1:10){
  if(i==3){
    next
  }
  if(i==7){
    break
  }
  print(i)
}
[1] 1
[1] 2
[1] 4
[1] 5
[1] 6

Ex 9

n=10

result = numeric(n)

for(i in 1:n){
  
  result[i]=i^2
  
}

print(result)
 [1]   1   4   9  16  25  36  49  64  81 100

Ex 10

my_function = function(x){
  
  return(x^2 + 2*x + 1)

  }

values = 1:5

results=c()

for(i in values){
  
  results = c(results, my_function(i))

}

print(results)
[1]  4  9 16 25 36
values = 1:5

results = values^2 + 2*values + 1

print(results)
[1]  4  9 16 25 36

Complex For Loop Examples

1. Simulating a Random Walk (with Conditional Logic)

Simulate a random walk in 1D where you either step forward or backward randomly.

set.seed(123)  # For reproducibility
steps <- 100
position <- numeric(steps)
position[1] <- 0  # Start at 0

for (i in 2:steps) {
  step <- sample(c(-1, 1), size = 1)  # Randomly choose -1 or 1
  position[i] <- position[i - 1] + step
}

# Plot the random walk
plot(position, type = "l", col = "blue", main = "Random Walk", xlab = "Step", ylab = "Position")

2. Matrix Manipulation with Nested Loops

Fill a matrix with the sum of its row and column indices.

n <- 5
m <- matrix(0, nrow = n, ncol = n)

for (i in 1:n) {
  for (j in 1:n) {
    m[i, j] <- i + j  # Sum of row and column indices
  }
}

print(m)
     [,1] [,2] [,3] [,4] [,5]
[1,]    2    3    4    5    6
[2,]    3    4    5    6    7
[3,]    4    5    6    7    8
[4,]    5    6    7    8    9
[5,]    6    7    8    9   10

3. Finding Prime Numbers (Sieve of Eratosthenes)

Find all prime numbers up to a given number using nested loops.

n <- 50
is_prime <- rep(TRUE, n)
is_prime[1] <- FALSE  # 1 is not a prime number

for (i in 2:sqrt(n)) {
  if (is_prime[i]) {
    for (j in seq(i^2, n, i)) {
      is_prime[j] <- FALSE
    }
  }
}

primes <- which(is_prime)
print(primes)
 [1]  2  3  5  7 11 13 17 19 23 29 31 37 41 43 47

4. Simulating Multiple Dice Rolls

Simulate rolling multiple dice 10,000 times and calculate the probability of rolling a sum of 7.

set.seed(123)
rolls <- 10000
dice <- 2
sum_sevens <- 0

for (i in 1:rolls) {
  outcome <- sample(1:6, dice, replace = TRUE)  # Roll two dice
  if (sum(outcome) == 7) {
    sum_sevens <- sum_sevens + 1
  }
}

# Calculate probability
probability <- sum_sevens / rolls
print(paste("Probability of rolling a sum of 7:", probability))
[1] "Probability of rolling a sum of 7: 0.1595"

5. Dynamic Data Frame Creation Using a Loop

Create a dynamic data frame that grows row by row using a loop.

# Initialize an empty data frame
df <- data.frame(ID = integer(), Name = character(), Score = numeric(), stringsAsFactors = FALSE)

names <- c("Alice", "Bob", "Charlie", "David", "Eve")
scores <- c(85, 92, 78, 90, 88)

for (i in 1:length(names)) {
  # Add a new row to the data frame
  new_row <- data.frame(ID = i, Name = names[i], Score = scores[i])
  df <- rbind(df, new_row)
}

print(df)
  ID    Name Score
1  1   Alice    85
2  2     Bob    92
3  3 Charlie    78
4  4   David    90
5  5     Eve    88

Bonus: Monte Carlo Simulation

Use a loop to estimate the value of π using the Monte Carlo method.

set.seed(123)
n <- 100000  # Number of random points
inside_circle <- 0

for (i in 1:n) {
  x <- runif(1, -1, 1)  # Random x-coordinate
  y <- runif(1, -1, 1)  # Random y-coordinate
  
  if (x^2 + y^2 <= 1) {
    inside_circle <- inside_circle + 1  # Point is inside the circle
  }
}

# Estimate π
pi_estimate <- (inside_circle / n) * 4
print(paste("Estimated value of π:", pi_estimate))
[1] "Estimated value of π: 3.14044"

Part 1: 5 Examples of for Loops for Creating Multiple Hypothesis Tests

Here are examples where you create multiple hypothesis tests using for loops. These can help automate statistical testing across multiple datasets or variables.


1. T-Test Across Multiple Groups

Perform a t-test for multiple groups of a numeric variable.

set.seed(123)
data <- data.frame(
  group = rep(c("A", "B", "C"), each = 20),
  value = c(rnorm(20, mean = 10), rnorm(20, mean = 12), rnorm(20, mean = 15))
)

unique_groups <- unique(data$group)
results <- list()

for (i in 1:length(unique_groups)) {
  group_data <- data$value[data$group == unique_groups[i]]
  test <- t.test(group_data, mu = 10)  # Test if the mean is 10
  results[[unique_groups[i]]] <- test
}

print(results)

2. Chi-Square Test for Multiple Contingency Tables

Perform a chi-square test on multiple 2x2 tables.

set.seed(123)
tables <- list(
  matrix(c(10, 20, 30, 40), nrow = 2),
  matrix(c(15, 25, 35, 45), nrow = 2),
  matrix(c(5, 10, 15, 20), nrow = 2)
)

results <- list()

for (i in 1:length(tables)) {
  test <- chisq.test(tables[[i]])
  results[[paste("Table", i)]] <- test
}

print(results)

3. ANOVA Across Multiple Variables

Perform ANOVA on multiple numeric variables grouped by a factor.

set.seed(123)
data <- data.frame(
  group = rep(c("A", "B", "C"), each = 10),
  var1 = rnorm(30, mean = 5),
  var2 = rnorm(30, mean = 10)
)

results <- list()

for (var in c("var1", "var2")) {
  formula <- as.formula(paste(var, "~ group"))
  test <- aov(formula, data = data)
  results[[var]] <- summary(test)
}

print(results)

4. Wilcoxon Test for Multiple Subsets

Perform a Wilcoxon test on subsets of data based on a grouping factor.

set.seed(123)
data <- data.frame(
  group = rep(c("X", "Y", "Z"), each = 15),
  value = c(rnorm(15, 5), rnorm(15, 6), rnorm(15, 7))
)

results <- list()

for (g in unique(data$group)) {
  subset_data <- data$value[data$group == g]
  test <- wilcox.test(subset_data, mu = 5)
  results[[g]] <- test
}

print(results)

5. Correlation Tests for Multiple Pairs of Variables

Test for correlation between multiple pairs of variables.

set.seed(123)
data <- data.frame(
  var1 = rnorm(50),
  var2 = rnorm(50, 5),
  var3 = rnorm(50, 10)
)

variables <- colnames(data)
results <- list()

for (i in 1:(length(variables) - 1)) {
  for (j in (i + 1):length(variables)) {
    test <- cor.test(data[[variables[i]]], data[[variables[j]]])
    results[[paste(variables[i], "vs", variables[j])]] <- test
  }
}

print(results)

Part 2: 5 Examples of Creating Multiple Plots and Combining Them with patchwork

1. Simple Histogram Plots

Create histograms for multiple variables and combine them.

library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.4.2
library(patchwork)
Warning: package 'patchwork' was built under R version 4.4.2
set.seed(123)
data <- data.frame(
  var1 = rnorm(100),
  var2 = rnorm(100, 5),
  var3 = rnorm(100, 10)
)

variables <- colnames(data)
plots <- list()

for (var in variables) {
  p <- ggplot(data, aes_string(x = var)) +
    geom_histogram(binwidth = 1, fill = "blue", alpha = 0.7) +
    ggtitle(paste("Histogram of", var))
  plots[[var]] <- p
}
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
# Combine using patchwork
combined_plot <- wrap_plots(plots, ncol = 1)
print(combined_plot)

2. Boxplots by Group

Create boxplots for multiple numeric variables grouped by a factor.

set.seed(123)
data <- data.frame(
  group = rep(c("A", "B", "C"), each = 50),
  var1 = rnorm(150),
  var2 = rnorm(150, 5)
)

variables <- c("var1", "var2")
plots <- list()

for (var in variables) {
  p <- ggplot(data, aes(x = group, y = get(var), fill = group)) +
    geom_boxplot() +
    ggtitle(paste("Boxplot of", var, "by Group")) +
    theme_minimal()
  plots[[var]] <- p
}

# Combine using patchwork
combined_plot <- wrap_plots(plots, ncol = 2)
print(combined_plot)

3. Scatter Plots with Regression Lines

Generate scatter plots for pairs of variables with regression lines.

data <- data.frame(
  x = rnorm(100),
  y1 = rnorm(100, 5),
  y2 = rnorm(100, 10)
)

y_vars <- c("y1", "y2")
plots <- list()

for (y in y_vars) {
  p <- ggplot(data, aes(x = x, y = get(y))) +
    geom_point(color = "blue") +
    geom_smooth(method = "lm", color = "red") +
    ggtitle(paste("Scatter Plot: x vs", y))
  plots[[y]] <- p
}

# Combine using patchwork
combined_plot <- wrap_plots(plots, nrow = 1)
print(combined_plot)
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

4. Density Plots for Multiple Variables

Create density plots for multiple variables and arrange them.

data <- data.frame(
  var1 = rnorm(500, 0, 1),
  var2 = rnorm(500, 5, 1),
  var3 = rnorm(500, 10, 1)
)

variables <- colnames(data)
plots <- list()

for (var in variables) {
  p <- ggplot(data, aes_string(x = var)) +
    geom_density(fill = "blue", alpha = 0.5) +
    ggtitle(paste("Density of", var))
  plots[[var]] <- p
}

# Combine using patchwork
combined_plot <- wrap_plots(plots, ncol = 2)
print(combined_plot)

5. Faceted Line Plots for Time-Series Data

Generate faceted line plots for multiple variables over time.

set.seed(123)
data <- data.frame(
  time = 1:100,
  series1 = cumsum(rnorm(100)),
  series2 = cumsum(rnorm(100, 0.5)),
  series3 = cumsum(rnorm(100, -0.5))
)

variables <- c("series1", "series2", "series3")
plots <- list()

for (var in variables) {
  p <- ggplot(data, aes(x = time, y = get(var))) +
    geom_line(color = "blue") +
    ggtitle(paste("Time Series:", var)) +
    theme_minimal()
  plots[[var]] <- p
}

# Combine using patchwork
combined_plot <- wrap_plots(plots, ncol = 1)
print(combined_plot)

Multiple Models with For Loop

1. Linear Regression for Multiple Subsets

Train multiple linear regression models on subsets of data and store the coefficients.

set.seed(123)
library(dplyr)
Warning: package 'dplyr' was built under R version 4.4.2

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# Create a sample dataset
data <- data.frame(
  group = rep(c("A", "B", "C"), each = 50),
  x = rnorm(150),
  y = rnorm(150, 5)
)

# Prepare storage for model parameters
results <- data.frame(group = character(), intercept = numeric(), slope = numeric(), stringsAsFactors = FALSE)

# Loop through each group and fit a linear model
for (g in unique(data$group)) {
  subset_data <- data %>% filter(group == g)
  model <- lm(y ~ x, data = subset_data)
  results <- rbind(results, data.frame(
    group = g,
    intercept = coef(model)[1],
    slope = coef(model)[2]
  ))
}

print(results)
             group intercept      slope
(Intercept)      A  5.043041 -0.1230692
(Intercept)1     B  5.011152 -0.1343668
(Intercept)2     C  5.204264 -0.1779689

2. Logistic Regression Across Multiple Target Variables

Train logistic regression models on multiple binary targets and store performance metrics.

set.seed(123)
library(caret)
Warning: package 'caret' was built under R version 4.4.2
Loading required package: lattice
# Create a dataset with binary targets
data <- data.frame(
  x1 = rnorm(100),
  x2 = rnorm(100),
  target1 = sample(0:1, 100, replace = TRUE),
  target2 = sample(0:1, 100, replace = TRUE)
)

targets <- c("target1", "target2")
results <- data.frame(target = character(), accuracy = numeric(), stringsAsFactors = FALSE)

# Loop through each target and fit a logistic regression model
for (target in targets) {
  formula <- as.formula(paste(target, "~ x1 + x2"))
  model <- train(formula, data = data, method = "glm", family = "binomial", trControl = trainControl(method = "cv", number = 5))
  accuracy <- max(model$results$Accuracy)  # Extract accuracy
  results <- rbind(results, data.frame(target = target, accuracy = accuracy))
}
Warning in train.default(x, y, weights = w, ...): You are trying to do
regression and your outcome only has two possible values Are you trying to do
classification? If so, use a 2 level factor as your outcome column.
Warning in max(model$results$Accuracy): no non-missing arguments to max;
returning -Inf
Warning in train.default(x, y, weights = w, ...): You are trying to do
regression and your outcome only has two possible values Are you trying to do
classification? If so, use a 2 level factor as your outcome column.
Warning in max(model$results$Accuracy): no non-missing arguments to max;
returning -Inf
print(results)
   target accuracy
1 target1     -Inf
2 target2     -Inf

3. Random Forest Models with Varying Parameters

Train multiple random forest models with different mtry values and store their OOB error rates.

set.seed(123)
library(randomForest)
Warning: package 'randomForest' was built under R version 4.4.2
randomForest 4.7-1.2
Type rfNews() to see new features/changes/bug fixes.

Attaching package: 'randomForest'
The following object is masked from 'package:dplyr':

    combine
The following object is masked from 'package:ggplot2':

    margin
# Create a sample dataset
data <- data.frame(
  x1 = rnorm(100),
  x2 = rnorm(100),
  y = factor(sample(0:1, 100, replace = TRUE))
)

# Hyperparameter values to try
mtry_values <- c(1, 2)
results <- data.frame(mtry = integer(), OOBError = numeric(), stringsAsFactors = FALSE)

# Loop through different mtry values and fit a random forest model
for (m in mtry_values) {
  model <- randomForest(y ~ x1 + x2, data = data, mtry = m)
  results <- rbind(results, data.frame(mtry = m, OOBError = model$err.rate[nrow(model$err.rate), 1]))
}

print(results)
     mtry OOBError
OOB     1     0.52
OOB1    2     0.56

4. Cross-Validation with Different Models

Train different machine learning models (e.g., SVM, kNN, Decision Trees) and store their performance.

set.seed(123)
library(kernlab)

Attaching package: 'kernlab'
The following object is masked from 'package:ggplot2':

    alpha
library(caret)

# Create a dataset
data <- data.frame(
  x1 = rnorm(100),
  x2 = rnorm(100),
  y = factor(sample(0:1, 100, replace = TRUE))
)

# List of models to train
models <- c("svmRadial", "knn", "rpart")
results <- data.frame(model = character(), accuracy = numeric(), stringsAsFactors = FALSE)

# Loop through each model and fit it
for (m in models) {
  model <- train(y ~ x1 + x2, data = data, method = m, trControl = trainControl(method = "cv", number = 5))
  accuracy <- max(model$results$Accuracy)  # Extract accuracy
  results <- rbind(results, data.frame(model = m, accuracy = accuracy))
}

print(results)
      model accuracy
1 svmRadial     0.59
2       knn     0.56
3     rpart     0.56

5. Hyperparameter Tuning for Gradient Boosting

Train gradient boosting models with different learning rates and store RMSE values.

set.seed(123)
library(gbm)
Warning: package 'gbm' was built under R version 4.4.2
Loaded gbm 2.2.2
This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
# Create a dataset
data <- data.frame(
  x1 = rnorm(100),
  x2 = rnorm(100),
  y = rnorm(100, 10)
)

# Learning rate values to try
learning_rates <- c(0.01, 0.05, 0.1)
results <- data.frame(learning_rate = numeric(), RMSE = numeric(), stringsAsFactors = FALSE)

# Loop through different learning rates and train a GBM model
for (lr in learning_rates) {
  model <- gbm(y ~ x1 + x2, data = data, distribution = "gaussian", n.trees = 100, interaction.depth = 3, shrinkage = lr, cv.folds = 5)
  rmse <- sqrt(min(model$cv.error))  # Extract RMSE
  results <- rbind(results, data.frame(learning_rate = lr, RMSE = rmse))
}

print(results)
  learning_rate      RMSE
1          0.01 0.9489399
2          0.05 0.9448511
3          0.10 0.9568672