R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

# Generate example data from a normal distribution
set.seed(123)
data <- rnorm(1000)

# Create a Q-Q plot
qqnorm(data)
qqline(data, col = "red")

# Generate example data from a Poisson distribution
set.seed(123)
data_poisson <- rpois(1000, lambda = 1)  # lambda is the mean parameter

# Create a Q-Q plot for Poisson distribution
qqplot(qpois(ppoints(length(data_poisson)), lambda = 1), data_poisson, 
       xlab = "Theoretical Quantiles", ylab = "Sample Quantiles", main = "Q-Q Plot for Poisson Distribution")

# Load necessary packages
library(ggplot2)
library(extraDistr)  # For chi-squared distribution

# Generate data for each distribution
x_normal <- seq(-3, 3, length.out = 1000)
y_normal <- dnorm(x_normal, mean = 0, sd = 1)

x_binomial <- 0:20
y_binomial <- dbinom(x_binomial, size = 20, prob = 0.5)

x_poisson <- 0:20
y_poisson <- dpois(x_poisson, lambda = 5)

x_exponential <- seq(0, 5, length.out = 1000)
y_exponential <- dexp(x_exponential, rate = 1)

x_uniform <- seq(0, 1, length.out = 1000)
y_uniform <- dunif(x_uniform, min = 0, max = 1)

# Plot each distribution
ggplot() +
  geom_line(data = data.frame(x = x_normal, y = y_normal), aes(x = x, y = y), color = "blue") +
  ggtitle("Normal Distribution") +
  xlab("x") +
  ylab("Density") +
  theme_minimal()

ggplot() +
  geom_bar(data = data.frame(x = x_binomial, y = y_binomial), aes(x = x, y = y), stat = "identity", fill = "green") +
  ggtitle("Binomial Distribution") +
  xlab("x") +
  ylab("Probability") +
  theme_minimal()

ggplot() +
  geom_bar(data = data.frame(x = x_poisson, y = y_poisson), aes(x = x, y = y), stat = "identity", fill = "red") +
  ggtitle("Poisson Distribution") +
  xlab("x") +
  ylab("Probability") +
  theme_minimal()

ggplot() +
  geom_line(data = data.frame(x = x_exponential, y = y_exponential), aes(x = x, y = y), color = "purple") +
  ggtitle("Exponential Distribution") +
  xlab("x") +
  ylab("Density") +
  theme_minimal()

ggplot() +
  geom_line(data = data.frame(x = x_uniform, y = y_uniform), aes(x = x, y = y), color = "orange") +
  ggtitle("Uniform Distribution") +
  xlab("x") +
  ylab("Density") +
  theme_minimal()

# Generate data for each distribution
x_gamma <- seq(0, 20, length.out = 1000)
y_gamma <- dgamma(x_gamma, shape = 2, rate = 0.5)

x_chi_squared <- seq(0, 20, length.out = 1000)
y_chi_squared <- dchisq(x_chi_squared, df = 5)

x_student_t <- seq(-5, 5, length.out = 1000)
y_student_t <- dt(x_student_t, df = 5)

x_beta <- seq(0, 1, length.out = 1000)
y_beta <- dbeta(x_beta, shape1 = 2, shape2 = 2)

# Plot each distribution
ggplot() +
  geom_line(data = data.frame(x = x_gamma, y = y_gamma), aes(x = x, y = y), color = "brown") +
  ggtitle("Gamma Distribution") +
  xlab("x") +
  ylab("Density") +
  theme_minimal()

ggplot() +
  geom_line(data = data.frame(x = x_chi_squared, y = y_chi_squared), aes(x = x, y = y), color = "gray") +
  ggtitle("Chi-Squared Distribution") +
  xlab("x") +
  ylab("Density") +
  theme_minimal()

ggplot() +
  geom_line(data = data.frame(x = x_student_t, y = y_student_t), aes(x = x, y = y), color = "pink") +
  ggtitle("Student's t-Distribution") +
  xlab("x") +
  ylab("Density") +
  theme_minimal()

ggplot() +
  geom_line(data = data.frame(x = x_beta, y = y_beta), aes(x = x, y = y), color = "cyan") +
  ggtitle("Beta Distribution") +
  xlab("x") +
  ylab("Density") +
  theme_minimal()

# Generate data for each distribution
x_log_normal <- seq(0, 5, length.out = 1000)
y_log_normal <- dlnorm(x_log_normal, meanlog = 1, sdlog = 0.5)

x_geometric <- 0:20
y_geometric <- dgeom(x_geometric, prob = 0.3)

x_f <- seq(0, 5, length.out = 1000)
y_f <- df(x_f, df1 = 2, df2 = 5)

# Plot each distribution
ggplot() +
  geom_line(data = data.frame(x = x_log_normal, y = y_log_normal), aes(x = x, y = y), color = "purple") +
  ggtitle("Log-Normal Distribution") +
  xlab("x") +
  ylab("Density") +
  theme_minimal()

ggplot() +
  geom_bar(data = data.frame(x = x_geometric, y = y_geometric), aes(x = x, y = y), stat = "identity", fill = "blue") +
  ggtitle("Geometric Distribution") +
  xlab("x") +
  ylab("Probability") +
  theme_minimal()

ggplot() +
  geom_line(data = data.frame(x = x_f, y = y_f), aes(x = x, y = y), color = "orange") +
  ggtitle("F-Distribution") +
  xlab("x") +
  ylab("Density") +
  theme_minimal()

# ```{r} # # Load necessary packages # library(ggplot2) # library(extraDistr) # For log-normal distribution # library(dplyr) # library(lubridate) # library(zoo) # # # Read data from a CSV file # data1 <- readxl::read_excel(“tech_layoffs.xlsx”) # # # transform datasaet # new_data <- data1 %>% # select(Date_layoffs, Laid_Off) %>% # mutate(month_year = format(as.Date(Date_layoffs),“%Y-%m”)) %>% # group_by(month_year) %>% # summarise(total_Laid_Off = sum(Laid_Off)) # # summary(new_data) # # new_data1 <- new_data %>% # mutate(month_years = as.yearmon(month_year)) # # glimpse(new_data1) # # plot(new_data1$month_years, new_data1$total_Laid_Off, # xlab = “Month and Year”, ylab = “Laid-OFF”, # main = “Scatter plot of Laid off by Month and Year”) # # data1 %>% # ggplot(aes(dep_time)) + # geom_freqpoly(binwidth = 600) #
# plot(as.Date(new_data$month_year, "%Y-%m"), new_data$total_Laid_Off, # xlab = “Month and Year”, ylab = “Laid-OFF”, # main = “Scatter plot of Laid off by Month and Year”)

any(is.na(new_data))

str(new_data)

summary(new_data)

boxplot(new_data)

range(new_data)

#new_data %>% plot(month_year, total_Laid_Off)

#any(is.na(new_data$month_year)) #plot(new_data$month_year, new_data$Laid_Off, xlab = “YY-MM”, ylab = “Laid-OFF”, main = “Scatter plot of Laid off by YY-MM”) # # # Create a Q-Q plot comparing the data to the log-normal distribution # ggplot(data1, aes(sample = Laid_Off)) + # stat_qq(distribution = qlnorm, dparams = list(meanlog = 1, sdlog = 0.5)) + # ggtitle(“Q-Q Plot for Log-Normal Distribution”) + # xlab(“Theoretical Quantiles (Log-Normal Distribution)”) + # ylab(“Sample Quantiles (Data)”)



```r
pressure <- c(33.2,41.8,37.3,40.2,36.7,39.1,36.2,41.8,36,35.2)
pressure

##  [1] 33.2 41.8 37.3 40.2 36.7 39.1 36.2 41.8 36.0 35.2

length(pressure)

## [1] 10

ss <- (length(pressure)-1) * var(pressure)
ss

## [1] 74.605

L <- qchisq(0.025,9)
U <- qchisq(0.975,9)
lower <- (1/U)*ss
lower

## [1] 3.921879

upper <- (1/L) *ss
upper

## [1] 27.6275

sd <- c(sqrt(lower), sqrt(upper))
sd

## [1] 1.980374 5.256186

Math3302

tan nguyen

2024-03-01