Normal Dist Example
set.seed(123)
x <- rnorm(n = 400, mean = 10, sd = 2)
y <- 2*x + 4
df <- data.frame(x=x,y=y)
head(df, 5)
## x y
## 1 8.879049 21.75810
## 2 9.539645 23.07929
## 3 13.117417 30.23483
## 4 10.141017 24.28203
## 5 10.258575 24.51715
plot(x,y)

# write.csv(df, "simulated_regression_example1.csv")
Signal + Noise Example :
rand_noise_normal <- rnorm(n = 400, mean = 0, sd = 1)
x <- rnorm(n = 400, mean = 10, sd = 2)
y <- 2*x + 4 + rand_noise_normal
hist(y)

plot(x,y)

Heteroskedastic Noise – larger variance as X inc.
set.seed(123)
# Generate x
x <- rnorm(n = 400, mean = 10, sd = 2)
# One-direction V-shaped noise (grows as x increases)
noise <- rnorm(400, mean = 0, sd = (x - min(x)))
# Generate y
y <- 2*x + 4 + noise
# Plot
plot(x, y)
abline(lm(y ~ x), col = "red")

Heteroskedastic Noise – larger variance as X dec.
set.seed(123)
# Generate x
x <- rnorm(n = 400, mean = 10, sd = 2)
# One-direction V-shaped noise (grows as x decreases)
noise <- rnorm(400, mean = 0, sd = (max(x) - x))
# Generate y
y <- 2*x + 4 + noise
# Plot
plot(x, y)
abline(lm(y ~ x), col = "red")
