For Loop || Simple to Advance

For Loop

Simple For Loop Examples

Ex 1

for(i in 1:5){
  
  print(i)
  
}

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5

Ex 2

names=c("arun","amita","revant","hershie")

for(name in names){
  
  print(name)
  
}

[1] "arun"
[1] "amita"
[1] "revant"
[1] "hershie"

Ex 3

for(i in 1:6){
  
  squared = i^2
  
  print(squared)
  
}

[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
[1] 36

Ex 4

squares=c()

for(i in 1:5){
  
  squares=c(squares, i^2)
  
}

print(squares)

[1]  1  4  9 16 25

Ex 5

for(i in 1:10){
  
  if(i %% 2==0){
    
    print(paste(i," is even number"))
  
  } else {
      
    print(paste(i, " is odd number"))
    
    }
}

[1] "1  is odd number"
[1] "2  is even number"
[1] "3  is odd number"
[1] "4  is even number"
[1] "5  is odd number"
[1] "6  is even number"
[1] "7  is odd number"
[1] "8  is even number"
[1] "9  is odd number"
[1] "10  is even number"

Ex 6

for(i in 1:5){
  
  for(j in 1:5){
    
    print(paste(i,"x",j,"=",i*j))
  }
  
}

[1] "1 x 1 = 1"
[1] "1 x 2 = 2"
[1] "1 x 3 = 3"
[1] "1 x 4 = 4"
[1] "1 x 5 = 5"
[1] "2 x 1 = 2"
[1] "2 x 2 = 4"
[1] "2 x 3 = 6"
[1] "2 x 4 = 8"
[1] "2 x 5 = 10"
[1] "3 x 1 = 3"
[1] "3 x 2 = 6"
[1] "3 x 3 = 9"
[1] "3 x 4 = 12"
[1] "3 x 5 = 15"
[1] "4 x 1 = 4"
[1] "4 x 2 = 8"
[1] "4 x 3 = 12"
[1] "4 x 4 = 16"
[1] "4 x 5 = 20"
[1] "5 x 1 = 5"
[1] "5 x 2 = 10"
[1] "5 x 3 = 15"
[1] "5 x 4 = 20"
[1] "5 x 5 = 25"

Ex 7

my_list = list(a=1:3, b=4:6, c=7:9)

for(item in my_list){
  
  print(sum(item))
  
}

[1] 6
[1] 15
[1] 24

Ex 8

for(i in 1:10){
  if(i==3){
    next
  }
  if(i==7){
    break
  }
  print(i)
}

[1] 1
[1] 2
[1] 4
[1] 5
[1] 6

Ex 9

n=10

result = numeric(n)

for(i in 1:n){
  
  result[i]=i^2
  
}

print(result)

 [1]   1   4   9  16  25  36  49  64  81 100

Ex 10

my_function = function(x){
  
  return(x^2 + 2*x + 1)

  }

values = 1:5

results=c()

for(i in values){
  
  results = c(results, my_function(i))

}

print(results)

[1]  4  9 16 25 36

values = 1:5

results = values^2 + 2*values + 1

print(results)

[1]  4  9 16 25 36

Complex For Loop Examples

1. Simulating a Random Walk (with Conditional Logic)

Simulate a random walk in 1D where you either step forward or backward randomly.

set.seed(123)  # For reproducibility
steps <- 100
position <- numeric(steps)
position[1] <- 0  # Start at 0

for (i in 2:steps) {
  step <- sample(c(-1, 1), size = 1)  # Randomly choose -1 or 1
  position[i] <- position[i - 1] + step
}

# Plot the random walk
plot(position, type = "l", col = "blue", main = "Random Walk", xlab = "Step", ylab = "Position")

2. Matrix Manipulation with Nested Loops

Fill a matrix with the sum of its row and column indices.

n <- 5
m <- matrix(0, nrow = n, ncol = n)

for (i in 1:n) {
  for (j in 1:n) {
    m[i, j] <- i + j  # Sum of row and column indices
  }
}

print(m)

     [,1] [,2] [,3] [,4] [,5]
[1,]    2    3    4    5    6
[2,]    3    4    5    6    7
[3,]    4    5    6    7    8
[4,]    5    6    7    8    9
[5,]    6    7    8    9   10

3. Finding Prime Numbers (Sieve of Eratosthenes)

Find all prime numbers up to a given number using nested loops.

n <- 50
is_prime <- rep(TRUE, n)
is_prime[1] <- FALSE  # 1 is not a prime number

for (i in 2:sqrt(n)) {
  if (is_prime[i]) {
    for (j in seq(i^2, n, i)) {
      is_prime[j] <- FALSE
    }
  }
}

primes <- which(is_prime)
print(primes)

 [1]  2  3  5  7 11 13 17 19 23 29 31 37 41 43 47

4. Simulating Multiple Dice Rolls

Simulate rolling multiple dice 10,000 times and calculate the probability of rolling a sum of 7.

set.seed(123)
rolls <- 10000
dice <- 2
sum_sevens <- 0

for (i in 1:rolls) {
  outcome <- sample(1:6, dice, replace = TRUE)  # Roll two dice
  if (sum(outcome) == 7) {
    sum_sevens <- sum_sevens + 1
  }
}

# Calculate probability
probability <- sum_sevens / rolls
print(paste("Probability of rolling a sum of 7:", probability))

[1] "Probability of rolling a sum of 7: 0.1595"

5. Dynamic Data Frame Creation Using a Loop

Create a dynamic data frame that grows row by row using a loop.

# Initialize an empty data frame
df <- data.frame(ID = integer(), Name = character(), Score = numeric(), stringsAsFactors = FALSE)

names <- c("Alice", "Bob", "Charlie", "David", "Eve")
scores <- c(85, 92, 78, 90, 88)

for (i in 1:length(names)) {
  # Add a new row to the data frame
  new_row <- data.frame(ID = i, Name = names[i], Score = scores[i])
  df <- rbind(df, new_row)
}

print(df)

  ID    Name Score
1  1   Alice    85
2  2     Bob    92
3  3 Charlie    78
4  4   David    90
5  5     Eve    88

Bonus: Monte Carlo Simulation

Use a loop to estimate the value of π using the Monte Carlo method.

set.seed(123)
n <- 100000  # Number of random points
inside_circle <- 0

for (i in 1:n) {
  x <- runif(1, -1, 1)  # Random x-coordinate
  y <- runif(1, -1, 1)  # Random y-coordinate
  
  if (x^2 + y^2 <= 1) {
    inside_circle <- inside_circle + 1  # Point is inside the circle
  }
}

# Estimate π
pi_estimate <- (inside_circle / n) * 4
print(paste("Estimated value of π:", pi_estimate))

[1] "Estimated value of π: 3.14044"

Part 1: 5 Examples of `for` Loops for Creating Multiple Hypothesis Tests

Here are examples where you create multiple hypothesis tests using for loops. These can help automate statistical testing across multiple datasets or variables.

1. T-Test Across Multiple Groups

Perform a t-test for multiple groups of a numeric variable.

set.seed(123)
data <- data.frame(
  group = rep(c("A", "B", "C"), each = 20),
  value = c(rnorm(20, mean = 10), rnorm(20, mean = 12), rnorm(20, mean = 15))
)

unique_groups <- unique(data$group)
results <- list()

for (i in 1:length(unique_groups)) {
  group_data <- data$value[data$group == unique_groups[i]]
  test <- t.test(group_data, mu = 10)  # Test if the mean is 10
  results[[unique_groups[i]]] <- test
}

print(results)

2. Chi-Square Test for Multiple Contingency Tables

Perform a chi-square test on multiple 2x2 tables.

set.seed(123)
tables <- list(
  matrix(c(10, 20, 30, 40), nrow = 2),
  matrix(c(15, 25, 35, 45), nrow = 2),
  matrix(c(5, 10, 15, 20), nrow = 2)
)

results <- list()

for (i in 1:length(tables)) {
  test <- chisq.test(tables[[i]])
  results[[paste("Table", i)]] <- test
}

print(results)

3. ANOVA Across Multiple Variables

Perform ANOVA on multiple numeric variables grouped by a factor.

set.seed(123)
data <- data.frame(
  group = rep(c("A", "B", "C"), each = 10),
  var1 = rnorm(30, mean = 5),
  var2 = rnorm(30, mean = 10)
)

results <- list()

for (var in c("var1", "var2")) {
  formula <- as.formula(paste(var, "~ group"))
  test <- aov(formula, data = data)
  results[[var]] <- summary(test)
}

print(results)

4. Wilcoxon Test for Multiple Subsets

Perform a Wilcoxon test on subsets of data based on a grouping factor.

set.seed(123)
data <- data.frame(
  group = rep(c("X", "Y", "Z"), each = 15),
  value = c(rnorm(15, 5), rnorm(15, 6), rnorm(15, 7))
)

results <- list()

for (g in unique(data$group)) {
  subset_data <- data$value[data$group == g]
  test <- wilcox.test(subset_data, mu = 5)
  results[[g]] <- test
}

print(results)

5. Correlation Tests for Multiple Pairs of Variables

Test for correlation between multiple pairs of variables.

set.seed(123)
data <- data.frame(
  var1 = rnorm(50),
  var2 = rnorm(50, 5),
  var3 = rnorm(50, 10)
)

variables <- colnames(data)
results <- list()

for (i in 1:(length(variables) - 1)) {
  for (j in (i + 1):length(variables)) {
    test <- cor.test(data[[variables[i]]], data[[variables[j]]])
    results[[paste(variables[i], "vs", variables[j])]] <- test
  }
}

print(results)

Part 2: 5 Examples of Creating Multiple Plots and Combining Them with `patchwork`

1. Simple Histogram Plots

Create histograms for multiple variables and combine them.

library(ggplot2)

Warning: package 'ggplot2' was built under R version 4.4.2

library(patchwork)

Warning: package 'patchwork' was built under R version 4.4.2

set.seed(123)
data <- data.frame(
  var1 = rnorm(100),
  var2 = rnorm(100, 5),
  var3 = rnorm(100, 10)
)

variables <- colnames(data)
plots <- list()

for (var in variables) {
  p <- ggplot(data, aes_string(x = var)) +
    geom_histogram(binwidth = 1, fill = "blue", alpha = 0.7) +
    ggtitle(paste("Histogram of", var))
  plots[[var]] <- p
}

Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.

# Combine using patchwork
combined_plot <- wrap_plots(plots, ncol = 1)
print(combined_plot)

2. Boxplots by Group

Create boxplots for multiple numeric variables grouped by a factor.

set.seed(123)
data <- data.frame(
  group = rep(c("A", "B", "C"), each = 50),
  var1 = rnorm(150),
  var2 = rnorm(150, 5)
)

variables <- c("var1", "var2")
plots <- list()

for (var in variables) {
  p <- ggplot(data, aes(x = group, y = get(var), fill = group)) +
    geom_boxplot() +
    ggtitle(paste("Boxplot of", var, "by Group")) +
    theme_minimal()
  plots[[var]] <- p
}

# Combine using patchwork
combined_plot <- wrap_plots(plots, ncol = 2)
print(combined_plot)

3. Scatter Plots with Regression Lines

Generate scatter plots for pairs of variables with regression lines.

data <- data.frame(
  x = rnorm(100),
  y1 = rnorm(100, 5),
  y2 = rnorm(100, 10)
)

y_vars <- c("y1", "y2")
plots <- list()

for (y in y_vars) {
  p <- ggplot(data, aes(x = x, y = get(y))) +
    geom_point(color = "blue") +
    geom_smooth(method = "lm", color = "red") +
    ggtitle(paste("Scatter Plot: x vs", y))
  plots[[y]] <- p
}

# Combine using patchwork
combined_plot <- wrap_plots(plots, nrow = 1)
print(combined_plot)

`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

4. Density Plots for Multiple Variables

Create density plots for multiple variables and arrange them.

data <- data.frame(
  var1 = rnorm(500, 0, 1),
  var2 = rnorm(500, 5, 1),
  var3 = rnorm(500, 10, 1)
)

variables <- colnames(data)
plots <- list()

for (var in variables) {
  p <- ggplot(data, aes_string(x = var)) +
    geom_density(fill = "blue", alpha = 0.5) +
    ggtitle(paste("Density of", var))
  plots[[var]] <- p
}

# Combine using patchwork
combined_plot <- wrap_plots(plots, ncol = 2)
print(combined_plot)

5. Faceted Line Plots for Time-Series Data

Generate faceted line plots for multiple variables over time.

set.seed(123)
data <- data.frame(
  time = 1:100,
  series1 = cumsum(rnorm(100)),
  series2 = cumsum(rnorm(100, 0.5)),
  series3 = cumsum(rnorm(100, -0.5))
)

variables <- c("series1", "series2", "series3")
plots <- list()

for (var in variables) {
  p <- ggplot(data, aes(x = time, y = get(var))) +
    geom_line(color = "blue") +
    ggtitle(paste("Time Series:", var)) +
    theme_minimal()
  plots[[var]] <- p
}

# Combine using patchwork
combined_plot <- wrap_plots(plots, ncol = 1)
print(combined_plot)

Multiple Models with For Loop

1. Linear Regression for Multiple Subsets

Train multiple linear regression models on subsets of data and store the coefficients.

set.seed(123)
library(dplyr)

Warning: package 'dplyr' was built under R version 4.4.2


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

# Create a sample dataset
data <- data.frame(
  group = rep(c("A", "B", "C"), each = 50),
  x = rnorm(150),
  y = rnorm(150, 5)
)

# Prepare storage for model parameters
results <- data.frame(group = character(), intercept = numeric(), slope = numeric(), stringsAsFactors = FALSE)

# Loop through each group and fit a linear model
for (g in unique(data$group)) {
  subset_data <- data %>% filter(group == g)
  model <- lm(y ~ x, data = subset_data)
  results <- rbind(results, data.frame(
    group = g,
    intercept = coef(model)[1],
    slope = coef(model)[2]
  ))
}

print(results)

             group intercept      slope
(Intercept)      A  5.043041 -0.1230692
(Intercept)1     B  5.011152 -0.1343668
(Intercept)2     C  5.204264 -0.1779689

2. Logistic Regression Across Multiple Target Variables

Train logistic regression models on multiple binary targets and store performance metrics.

set.seed(123)
library(caret)

Warning: package 'caret' was built under R version 4.4.2

Loading required package: lattice

# Create a dataset with binary targets
data <- data.frame(
  x1 = rnorm(100),
  x2 = rnorm(100),
  target1 = sample(0:1, 100, replace = TRUE),
  target2 = sample(0:1, 100, replace = TRUE)
)

targets <- c("target1", "target2")
results <- data.frame(target = character(), accuracy = numeric(), stringsAsFactors = FALSE)

# Loop through each target and fit a logistic regression model
for (target in targets) {
  formula <- as.formula(paste(target, "~ x1 + x2"))
  model <- train(formula, data = data, method = "glm", family = "binomial", trControl = trainControl(method = "cv", number = 5))
  accuracy <- max(model$results$Accuracy)  # Extract accuracy
  results <- rbind(results, data.frame(target = target, accuracy = accuracy))
}

Warning in train.default(x, y, weights = w, ...): You are trying to do
regression and your outcome only has two possible values Are you trying to do
classification? If so, use a 2 level factor as your outcome column.

Warning in max(model$results$Accuracy): no non-missing arguments to max;
returning -Inf

Warning in train.default(x, y, weights = w, ...): You are trying to do
regression and your outcome only has two possible values Are you trying to do
classification? If so, use a 2 level factor as your outcome column.

Warning in max(model$results$Accuracy): no non-missing arguments to max;
returning -Inf

print(results)

   target accuracy
1 target1     -Inf
2 target2     -Inf

3. Random Forest Models with Varying Parameters

Train multiple random forest models with different mtry values and store their OOB error rates.

set.seed(123)
library(randomForest)

Warning: package 'randomForest' was built under R version 4.4.2

randomForest 4.7-1.2

Type rfNews() to see new features/changes/bug fixes.


Attaching package: 'randomForest'

The following object is masked from 'package:dplyr':

    combine

The following object is masked from 'package:ggplot2':

    margin

# Create a sample dataset
data <- data.frame(
  x1 = rnorm(100),
  x2 = rnorm(100),
  y = factor(sample(0:1, 100, replace = TRUE))
)

# Hyperparameter values to try
mtry_values <- c(1, 2)
results <- data.frame(mtry = integer(), OOBError = numeric(), stringsAsFactors = FALSE)

# Loop through different mtry values and fit a random forest model
for (m in mtry_values) {
  model <- randomForest(y ~ x1 + x2, data = data, mtry = m)
  results <- rbind(results, data.frame(mtry = m, OOBError = model$err.rate[nrow(model$err.rate), 1]))
}

print(results)

     mtry OOBError
OOB     1     0.52
OOB1    2     0.56

4. Cross-Validation with Different Models

Train different machine learning models (e.g., SVM, kNN, Decision Trees) and store their performance.

set.seed(123)
library(kernlab)


Attaching package: 'kernlab'

The following object is masked from 'package:ggplot2':

    alpha

library(caret)

# Create a dataset
data <- data.frame(
  x1 = rnorm(100),
  x2 = rnorm(100),
  y = factor(sample(0:1, 100, replace = TRUE))
)

# List of models to train
models <- c("svmRadial", "knn", "rpart")
results <- data.frame(model = character(), accuracy = numeric(), stringsAsFactors = FALSE)

# Loop through each model and fit it
for (m in models) {
  model <- train(y ~ x1 + x2, data = data, method = m, trControl = trainControl(method = "cv", number = 5))
  accuracy <- max(model$results$Accuracy)  # Extract accuracy
  results <- rbind(results, data.frame(model = m, accuracy = accuracy))
}

print(results)

      model accuracy
1 svmRadial     0.59
2       knn     0.56
3     rpart     0.56

5. Hyperparameter Tuning for Gradient Boosting

Train gradient boosting models with different learning rates and store RMSE values.

set.seed(123)
library(gbm)

Warning: package 'gbm' was built under R version 4.4.2

Loaded gbm 2.2.2

This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3

# Create a dataset
data <- data.frame(
  x1 = rnorm(100),
  x2 = rnorm(100),
  y = rnorm(100, 10)
)

# Learning rate values to try
learning_rates <- c(0.01, 0.05, 0.1)
results <- data.frame(learning_rate = numeric(), RMSE = numeric(), stringsAsFactors = FALSE)

# Loop through different learning rates and train a GBM model
for (lr in learning_rates) {
  model <- gbm(y ~ x1 + x2, data = data, distribution = "gaussian", n.trees = 100, interaction.depth = 3, shrinkage = lr, cv.folds = 5)
  rmse <- sqrt(min(model$cv.error))  # Extract RMSE
  results <- rbind(results, data.frame(learning_rate = lr, RMSE = rmse))
}

print(results)

  learning_rate      RMSE
1          0.01 0.9489399
2          0.05 0.9448511
3          0.10 0.9568672