for(i in 1:5){
print(i)
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
for(i in 1:5){
print(i)
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
=c("arun","amita","revant","hershie")
names
for(name in names){
print(name)
}
[1] "arun"
[1] "amita"
[1] "revant"
[1] "hershie"
for(i in 1:6){
= i^2
squared
print(squared)
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
[1] 36
=c()
squares
for(i in 1:5){
=c(squares, i^2)
squares
}
print(squares)
[1] 1 4 9 16 25
for(i in 1:10){
if(i %% 2==0){
print(paste(i," is even number"))
else {
}
print(paste(i, " is odd number"))
} }
[1] "1 is odd number"
[1] "2 is even number"
[1] "3 is odd number"
[1] "4 is even number"
[1] "5 is odd number"
[1] "6 is even number"
[1] "7 is odd number"
[1] "8 is even number"
[1] "9 is odd number"
[1] "10 is even number"
for(i in 1:5){
for(j in 1:5){
print(paste(i,"x",j,"=",i*j))
}
}
[1] "1 x 1 = 1"
[1] "1 x 2 = 2"
[1] "1 x 3 = 3"
[1] "1 x 4 = 4"
[1] "1 x 5 = 5"
[1] "2 x 1 = 2"
[1] "2 x 2 = 4"
[1] "2 x 3 = 6"
[1] "2 x 4 = 8"
[1] "2 x 5 = 10"
[1] "3 x 1 = 3"
[1] "3 x 2 = 6"
[1] "3 x 3 = 9"
[1] "3 x 4 = 12"
[1] "3 x 5 = 15"
[1] "4 x 1 = 4"
[1] "4 x 2 = 8"
[1] "4 x 3 = 12"
[1] "4 x 4 = 16"
[1] "4 x 5 = 20"
[1] "5 x 1 = 5"
[1] "5 x 2 = 10"
[1] "5 x 3 = 15"
[1] "5 x 4 = 20"
[1] "5 x 5 = 25"
= list(a=1:3, b=4:6, c=7:9)
my_list
for(item in my_list){
print(sum(item))
}
[1] 6
[1] 15
[1] 24
for(i in 1:10){
if(i==3){
next
}if(i==7){
break
}print(i)
}
[1] 1
[1] 2
[1] 4
[1] 5
[1] 6
=10
n
= numeric(n)
result
for(i in 1:n){
=i^2
result[i]
}
print(result)
[1] 1 4 9 16 25 36 49 64 81 100
= function(x){
my_function
return(x^2 + 2*x + 1)
}
= 1:5
values
=c()
results
for(i in values){
= c(results, my_function(i))
results
}
print(results)
[1] 4 9 16 25 36
= 1:5
values
= values^2 + 2*values + 1
results
print(results)
[1] 4 9 16 25 36
Simulate a random walk in 1D where you either step forward or backward randomly.
set.seed(123) # For reproducibility
<- 100
steps <- numeric(steps)
position 1] <- 0 # Start at 0
position[
for (i in 2:steps) {
<- sample(c(-1, 1), size = 1) # Randomly choose -1 or 1
step <- position[i - 1] + step
position[i]
}
# Plot the random walk
plot(position, type = "l", col = "blue", main = "Random Walk", xlab = "Step", ylab = "Position")
Fill a matrix with the sum of its row and column indices.
<- 5
n <- matrix(0, nrow = n, ncol = n)
m
for (i in 1:n) {
for (j in 1:n) {
<- i + j # Sum of row and column indices
m[i, j]
}
}
print(m)
[,1] [,2] [,3] [,4] [,5]
[1,] 2 3 4 5 6
[2,] 3 4 5 6 7
[3,] 4 5 6 7 8
[4,] 5 6 7 8 9
[5,] 6 7 8 9 10
Find all prime numbers up to a given number using nested loops.
<- 50
n <- rep(TRUE, n)
is_prime 1] <- FALSE # 1 is not a prime number
is_prime[
for (i in 2:sqrt(n)) {
if (is_prime[i]) {
for (j in seq(i^2, n, i)) {
<- FALSE
is_prime[j]
}
}
}
<- which(is_prime)
primes print(primes)
[1] 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47
Simulate rolling multiple dice 10,000 times and calculate the probability of rolling a sum of 7.
set.seed(123)
<- 10000
rolls <- 2
dice <- 0
sum_sevens
for (i in 1:rolls) {
<- sample(1:6, dice, replace = TRUE) # Roll two dice
outcome if (sum(outcome) == 7) {
<- sum_sevens + 1
sum_sevens
}
}
# Calculate probability
<- sum_sevens / rolls
probability print(paste("Probability of rolling a sum of 7:", probability))
[1] "Probability of rolling a sum of 7: 0.1595"
Create a dynamic data frame that grows row by row using a loop.
# Initialize an empty data frame
<- data.frame(ID = integer(), Name = character(), Score = numeric(), stringsAsFactors = FALSE)
df
<- c("Alice", "Bob", "Charlie", "David", "Eve")
names <- c(85, 92, 78, 90, 88)
scores
for (i in 1:length(names)) {
# Add a new row to the data frame
<- data.frame(ID = i, Name = names[i], Score = scores[i])
new_row <- rbind(df, new_row)
df
}
print(df)
ID Name Score
1 1 Alice 85
2 2 Bob 92
3 3 Charlie 78
4 4 David 90
5 5 Eve 88
Use a loop to estimate the value of π using the Monte Carlo method.
set.seed(123)
<- 100000 # Number of random points
n <- 0
inside_circle
for (i in 1:n) {
<- runif(1, -1, 1) # Random x-coordinate
x <- runif(1, -1, 1) # Random y-coordinate
y
if (x^2 + y^2 <= 1) {
<- inside_circle + 1 # Point is inside the circle
inside_circle
}
}
# Estimate π
<- (inside_circle / n) * 4
pi_estimate print(paste("Estimated value of π:", pi_estimate))
[1] "Estimated value of π: 3.14044"
for
Loops for Creating Multiple Hypothesis TestsHere are examples where you create multiple hypothesis tests using for
loops. These can help automate statistical testing across multiple datasets or variables.
Perform a t-test for multiple groups of a numeric variable.
set.seed(123)
<- data.frame(
data group = rep(c("A", "B", "C"), each = 20),
value = c(rnorm(20, mean = 10), rnorm(20, mean = 12), rnorm(20, mean = 15))
)
<- unique(data$group)
unique_groups <- list()
results
for (i in 1:length(unique_groups)) {
<- data$value[data$group == unique_groups[i]]
group_data <- t.test(group_data, mu = 10) # Test if the mean is 10
test <- test
results[[unique_groups[i]]]
}
print(results)
Perform a chi-square test on multiple 2x2 tables.
set.seed(123)
<- list(
tables matrix(c(10, 20, 30, 40), nrow = 2),
matrix(c(15, 25, 35, 45), nrow = 2),
matrix(c(5, 10, 15, 20), nrow = 2)
)
<- list()
results
for (i in 1:length(tables)) {
<- chisq.test(tables[[i]])
test paste("Table", i)]] <- test
results[[
}
print(results)
Perform ANOVA on multiple numeric variables grouped by a factor.
set.seed(123)
<- data.frame(
data group = rep(c("A", "B", "C"), each = 10),
var1 = rnorm(30, mean = 5),
var2 = rnorm(30, mean = 10)
)
<- list()
results
for (var in c("var1", "var2")) {
<- as.formula(paste(var, "~ group"))
formula <- aov(formula, data = data)
test <- summary(test)
results[[var]]
}
print(results)
Perform a Wilcoxon test on subsets of data based on a grouping factor.
set.seed(123)
<- data.frame(
data group = rep(c("X", "Y", "Z"), each = 15),
value = c(rnorm(15, 5), rnorm(15, 6), rnorm(15, 7))
)
<- list()
results
for (g in unique(data$group)) {
<- data$value[data$group == g]
subset_data <- wilcox.test(subset_data, mu = 5)
test <- test
results[[g]]
}
print(results)
Test for correlation between multiple pairs of variables.
set.seed(123)
<- data.frame(
data var1 = rnorm(50),
var2 = rnorm(50, 5),
var3 = rnorm(50, 10)
)
<- colnames(data)
variables <- list()
results
for (i in 1:(length(variables) - 1)) {
for (j in (i + 1):length(variables)) {
<- cor.test(data[[variables[i]]], data[[variables[j]]])
test paste(variables[i], "vs", variables[j])]] <- test
results[[
}
}
print(results)
patchwork
Create histograms for multiple variables and combine them.
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.4.2
library(patchwork)
Warning: package 'patchwork' was built under R version 4.4.2
set.seed(123)
<- data.frame(
data var1 = rnorm(100),
var2 = rnorm(100, 5),
var3 = rnorm(100, 10)
)
<- colnames(data)
variables <- list()
plots
for (var in variables) {
<- ggplot(data, aes_string(x = var)) +
p geom_histogram(binwidth = 1, fill = "blue", alpha = 0.7) +
ggtitle(paste("Histogram of", var))
<- p
plots[[var]] }
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
# Combine using patchwork
<- wrap_plots(plots, ncol = 1)
combined_plot print(combined_plot)
Create boxplots for multiple numeric variables grouped by a factor.
set.seed(123)
<- data.frame(
data group = rep(c("A", "B", "C"), each = 50),
var1 = rnorm(150),
var2 = rnorm(150, 5)
)
<- c("var1", "var2")
variables <- list()
plots
for (var in variables) {
<- ggplot(data, aes(x = group, y = get(var), fill = group)) +
p geom_boxplot() +
ggtitle(paste("Boxplot of", var, "by Group")) +
theme_minimal()
<- p
plots[[var]]
}
# Combine using patchwork
<- wrap_plots(plots, ncol = 2)
combined_plot print(combined_plot)
Generate scatter plots for pairs of variables with regression lines.
<- data.frame(
data x = rnorm(100),
y1 = rnorm(100, 5),
y2 = rnorm(100, 10)
)
<- c("y1", "y2")
y_vars <- list()
plots
for (y in y_vars) {
<- ggplot(data, aes(x = x, y = get(y))) +
p geom_point(color = "blue") +
geom_smooth(method = "lm", color = "red") +
ggtitle(paste("Scatter Plot: x vs", y))
<- p
plots[[y]]
}
# Combine using patchwork
<- wrap_plots(plots, nrow = 1)
combined_plot print(combined_plot)
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
Create density plots for multiple variables and arrange them.
<- data.frame(
data var1 = rnorm(500, 0, 1),
var2 = rnorm(500, 5, 1),
var3 = rnorm(500, 10, 1)
)
<- colnames(data)
variables <- list()
plots
for (var in variables) {
<- ggplot(data, aes_string(x = var)) +
p geom_density(fill = "blue", alpha = 0.5) +
ggtitle(paste("Density of", var))
<- p
plots[[var]]
}
# Combine using patchwork
<- wrap_plots(plots, ncol = 2)
combined_plot print(combined_plot)
Generate faceted line plots for multiple variables over time.
set.seed(123)
<- data.frame(
data time = 1:100,
series1 = cumsum(rnorm(100)),
series2 = cumsum(rnorm(100, 0.5)),
series3 = cumsum(rnorm(100, -0.5))
)
<- c("series1", "series2", "series3")
variables <- list()
plots
for (var in variables) {
<- ggplot(data, aes(x = time, y = get(var))) +
p geom_line(color = "blue") +
ggtitle(paste("Time Series:", var)) +
theme_minimal()
<- p
plots[[var]]
}
# Combine using patchwork
<- wrap_plots(plots, ncol = 1)
combined_plot print(combined_plot)
Train multiple linear regression models on subsets of data and store the coefficients.
set.seed(123)
library(dplyr)
Warning: package 'dplyr' was built under R version 4.4.2
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# Create a sample dataset
<- data.frame(
data group = rep(c("A", "B", "C"), each = 50),
x = rnorm(150),
y = rnorm(150, 5)
)
# Prepare storage for model parameters
<- data.frame(group = character(), intercept = numeric(), slope = numeric(), stringsAsFactors = FALSE)
results
# Loop through each group and fit a linear model
for (g in unique(data$group)) {
<- data %>% filter(group == g)
subset_data <- lm(y ~ x, data = subset_data)
model <- rbind(results, data.frame(
results group = g,
intercept = coef(model)[1],
slope = coef(model)[2]
))
}
print(results)
group intercept slope
(Intercept) A 5.043041 -0.1230692
(Intercept)1 B 5.011152 -0.1343668
(Intercept)2 C 5.204264 -0.1779689
Train logistic regression models on multiple binary targets and store performance metrics.
set.seed(123)
library(caret)
Warning: package 'caret' was built under R version 4.4.2
Loading required package: lattice
# Create a dataset with binary targets
<- data.frame(
data x1 = rnorm(100),
x2 = rnorm(100),
target1 = sample(0:1, 100, replace = TRUE),
target2 = sample(0:1, 100, replace = TRUE)
)
<- c("target1", "target2")
targets <- data.frame(target = character(), accuracy = numeric(), stringsAsFactors = FALSE)
results
# Loop through each target and fit a logistic regression model
for (target in targets) {
<- as.formula(paste(target, "~ x1 + x2"))
formula <- train(formula, data = data, method = "glm", family = "binomial", trControl = trainControl(method = "cv", number = 5))
model <- max(model$results$Accuracy) # Extract accuracy
accuracy <- rbind(results, data.frame(target = target, accuracy = accuracy))
results }
Warning in train.default(x, y, weights = w, ...): You are trying to do
regression and your outcome only has two possible values Are you trying to do
classification? If so, use a 2 level factor as your outcome column.
Warning in max(model$results$Accuracy): no non-missing arguments to max;
returning -Inf
Warning in train.default(x, y, weights = w, ...): You are trying to do
regression and your outcome only has two possible values Are you trying to do
classification? If so, use a 2 level factor as your outcome column.
Warning in max(model$results$Accuracy): no non-missing arguments to max;
returning -Inf
print(results)
target accuracy
1 target1 -Inf
2 target2 -Inf
Train multiple random forest models with different mtry
values and store their OOB error rates.
set.seed(123)
library(randomForest)
Warning: package 'randomForest' was built under R version 4.4.2
randomForest 4.7-1.2
Type rfNews() to see new features/changes/bug fixes.
Attaching package: 'randomForest'
The following object is masked from 'package:dplyr':
combine
The following object is masked from 'package:ggplot2':
margin
# Create a sample dataset
<- data.frame(
data x1 = rnorm(100),
x2 = rnorm(100),
y = factor(sample(0:1, 100, replace = TRUE))
)
# Hyperparameter values to try
<- c(1, 2)
mtry_values <- data.frame(mtry = integer(), OOBError = numeric(), stringsAsFactors = FALSE)
results
# Loop through different mtry values and fit a random forest model
for (m in mtry_values) {
<- randomForest(y ~ x1 + x2, data = data, mtry = m)
model <- rbind(results, data.frame(mtry = m, OOBError = model$err.rate[nrow(model$err.rate), 1]))
results
}
print(results)
mtry OOBError
OOB 1 0.52
OOB1 2 0.56
Train different machine learning models (e.g., SVM, kNN, Decision Trees) and store their performance.
set.seed(123)
library(kernlab)
Attaching package: 'kernlab'
The following object is masked from 'package:ggplot2':
alpha
library(caret)
# Create a dataset
<- data.frame(
data x1 = rnorm(100),
x2 = rnorm(100),
y = factor(sample(0:1, 100, replace = TRUE))
)
# List of models to train
<- c("svmRadial", "knn", "rpart")
models <- data.frame(model = character(), accuracy = numeric(), stringsAsFactors = FALSE)
results
# Loop through each model and fit it
for (m in models) {
<- train(y ~ x1 + x2, data = data, method = m, trControl = trainControl(method = "cv", number = 5))
model <- max(model$results$Accuracy) # Extract accuracy
accuracy <- rbind(results, data.frame(model = m, accuracy = accuracy))
results
}
print(results)
model accuracy
1 svmRadial 0.59
2 knn 0.56
3 rpart 0.56
Train gradient boosting models with different learning rates and store RMSE values.
set.seed(123)
library(gbm)
Warning: package 'gbm' was built under R version 4.4.2
Loaded gbm 2.2.2
This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
# Create a dataset
<- data.frame(
data x1 = rnorm(100),
x2 = rnorm(100),
y = rnorm(100, 10)
)
# Learning rate values to try
<- c(0.01, 0.05, 0.1)
learning_rates <- data.frame(learning_rate = numeric(), RMSE = numeric(), stringsAsFactors = FALSE)
results
# Loop through different learning rates and train a GBM model
for (lr in learning_rates) {
<- gbm(y ~ x1 + x2, data = data, distribution = "gaussian", n.trees = 100, interaction.depth = 3, shrinkage = lr, cv.folds = 5)
model <- sqrt(min(model$cv.error)) # Extract RMSE
rmse <- rbind(results, data.frame(learning_rate = lr, RMSE = rmse))
results
}
print(results)
learning_rate RMSE
1 0.01 0.9489399
2 0.05 0.9448511
3 0.10 0.9568672