Normal Curve with Shaded Tails

Normal Curve with Shaded Region

To create a normal curve with shaded tails in R, you can use the pnorm function to calculate the probabilities and the curve function to plot the normal curve. Here is an example code:

# Generate a sequence of values for the t-distribution
x <- seq(-4, 4, by = 0.1)

# Calculate the probability density function
y <- dt(x, df = 10)

# Plot the results
plot(x, y, type = "l", main = "t-Distribution Density", xlab = "x", ylab = "Density")

# Shade the tails
x1 <- seq(-1.96, -4, length.out = 50)
x2 <- seq(1.96, 4, length.out = 50)
y <- dnorm(x1)
polygon(c(x1, -1.96, -1.96), c(y, 0, dnorm(-1.96)), col = "black", border = NA)
y <- dnorm(x2)
polygon(c(x2, 1.96, 1.96), c(y, 0, dnorm(1.96)), col = "black", border = NA)

In this code, the seq function creates a sequence of x values from -4 to 4 with a length of 100. The pnorm function calculates the corresponding probabilities for each x value. The plot function plots the normal curve with type = “l” to indicate a line plot and lwd = 2 to make the line thicker. The xlab and ylab arguments are used to label the x and y axis, respectively.

To shade the tails, two sequences of x values are created using the seq function, one for the left tail and one for the right tail. The dnorm function is used to calculate the corresponding y values (i.e., the probability density). The polygon function is used to create a polygon with the x and y values for each tail, along with the x and y values for the boundaries of the shaded area (i.e., -1.96 and 1.96). The col argument sets the color of the shaded area to “grey80”, and the border argument sets the border color to “NA” to make it invisible.

Generate T-table in R

# degrees of freedom
df <- 1:30

# significance levels
alpha <- c(0.1, 0.05, 0.01)

# create empty table
t_table <- matrix(NA, nrow = length(df), ncol = length(alpha) + 1)

# fill in degrees of freedom
t_table[, 1] <- df

# fill in quantiles for each degree of freedom and significance level
for (i in 1:length(df)) {
  for (j in 1:length(alpha)) {
    t_table[i, j + 1] <- qt(1 - alpha[j]/2, df[i])
  }
}

# add row and column names
row.names(t_table) <- paste("df =", df)
colnames(t_table) <- c("t-value", paste("alpha =", alpha))

# view t-table
t_table

        t-value alpha = 0.1 alpha = 0.05 alpha = 0.01
df = 1        1    6.313752    12.706205    63.656741
df = 2        2    2.919986     4.302653     9.924843
df = 3        3    2.353363     3.182446     5.840909
df = 4        4    2.131847     2.776445     4.604095
df = 5        5    2.015048     2.570582     4.032143
df = 6        6    1.943180     2.446912     3.707428
df = 7        7    1.894579     2.364624     3.499483
df = 8        8    1.859548     2.306004     3.355387
df = 9        9    1.833113     2.262157     3.249836
df = 10      10    1.812461     2.228139     3.169273
df = 11      11    1.795885     2.200985     3.105807
df = 12      12    1.782288     2.178813     3.054540
df = 13      13    1.770933     2.160369     3.012276
df = 14      14    1.761310     2.144787     2.976843
df = 15      15    1.753050     2.131450     2.946713
df = 16      16    1.745884     2.119905     2.920782
df = 17      17    1.739607     2.109816     2.898231
df = 18      18    1.734064     2.100922     2.878440
df = 19      19    1.729133     2.093024     2.860935
df = 20      20    1.724718     2.085963     2.845340
df = 21      21    1.720743     2.079614     2.831360
df = 22      22    1.717144     2.073873     2.818756
df = 23      23    1.713872     2.068658     2.807336
df = 24      24    1.710882     2.063899     2.796940
df = 25      25    1.708141     2.059539     2.787436
df = 26      26    1.705618     2.055529     2.778715
df = 27      27    1.703288     2.051831     2.770683
df = 28      28    1.701131     2.048407     2.763262
df = 29      29    1.699127     2.045230     2.756386
df = 30      30    1.697261     2.042272     2.749996

Generate Z-score table

# Create a vector of z-score values
z_scores <- seq(-3, 3, by = 0.1)

# Calculate the corresponding probabilities using pnorm()
probabilities <- pnorm(z_scores)

# Combine the z-score and probability vectors into a data frame
z_score_table <- data.frame(z_score = z_scores, probability = probabilities)

# Print the resulting table
print(z_score_table)

   z_score probability
1     -3.0 0.001349898
2     -2.9 0.001865813
3     -2.8 0.002555130
4     -2.7 0.003466974
5     -2.6 0.004661188
6     -2.5 0.006209665
7     -2.4 0.008197536
8     -2.3 0.010724110
9     -2.2 0.013903448
10    -2.1 0.017864421
11    -2.0 0.022750132
12    -1.9 0.028716560
13    -1.8 0.035930319
14    -1.7 0.044565463
15    -1.6 0.054799292
16    -1.5 0.066807201
17    -1.4 0.080756659
18    -1.3 0.096800485
19    -1.2 0.115069670
20    -1.1 0.135666061
21    -1.0 0.158655254
22    -0.9 0.184060125
23    -0.8 0.211855399
24    -0.7 0.241963652
25    -0.6 0.274253118
26    -0.5 0.308537539
27    -0.4 0.344578258
28    -0.3 0.382088578
29    -0.2 0.420740291
30    -0.1 0.460172163
31     0.0 0.500000000
32     0.1 0.539827837
33     0.2 0.579259709
34     0.3 0.617911422
35     0.4 0.655421742
36     0.5 0.691462461
37     0.6 0.725746882
38     0.7 0.758036348
39     0.8 0.788144601
40     0.9 0.815939875
41     1.0 0.841344746
42     1.1 0.864333939
43     1.2 0.884930330
44     1.3 0.903199515
45     1.4 0.919243341
46     1.5 0.933192799
47     1.6 0.945200708
48     1.7 0.955434537
49     1.8 0.964069681
50     1.9 0.971283440
51     2.0 0.977249868
52     2.1 0.982135579
53     2.2 0.986096552
54     2.3 0.989275890
55     2.4 0.991802464
56     2.5 0.993790335
57     2.6 0.995338812
58     2.7 0.996533026
59     2.8 0.997444870
60     2.9 0.998134187
61     3.0 0.998650102

Central Limit Theorem

# Create a non-normal population
population <- rexp(1000, rate = 0.5)

# Define the sample size
sample_size <- 30

# Simulate the CLT by drawing multiple samples
num_samples <- 1000
sample_means <- replicate(num_samples, mean(sample(population, sample_size)))

# Plot the distribution of sample means
hist(sample_means, breaks = 30, prob = TRUE, main = "Sample Means with Normal Curve")
curve(dnorm(x, mean = mean(sample_means), sd = sd(sample_means)), add = TRUE, col = "blue", lwd = 2)

Using Real time data

myydt <- read.csv("C:\\Users\\user\\Downloads\\gapminder.csv")
attach(myydt)
head(myydt,5)

      country continent year lifeExp      pop gdpPercap
1 Afghanistan      Asia 1952  28.801  8425333  779.4453
2 Afghanistan      Asia 1957  30.332  9240934  820.8530
3 Afghanistan      Asia 1962  31.997 10267083  853.1007
4 Afghanistan      Asia 1967  34.020 11537966  836.1971
5 Afghanistan      Asia 1972  36.088 13079460  739.9811

Histogram with a normal curve

hist(myydt$gdpPercap, breaks = 30, prob = TRUE, main = "Distribution of GDP per capita with Normal Curve")
curve(dnorm(x, mean = mean(myydt$gdpPercap), sd = sd(myydt$gdpPercap)), add = TRUE, col = "blue", lwd = 2)

Log transform the variables

hist(log(gdpPercap), breaks = 25, prob = TRUE, main = "Distribution of GDP per capita with Normal Curve")
curve(dnorm(x, mean = mean(log(gdpPercap))), sd = sd(log(gdpPercap)), add = TRUE, col = "blue", lwd = 2)