1.Create your own discrete data set and calculate the mean,median and mode using R.
#Creating a discrete grouped data and calculating the central tendancy for that grouped data
Note_num <- c(100,200,300,400,500)
Collections <- c(1400,1200,1800,2400,950)
#Finding the mean for this discrete data
discrete_mean <- function(v,f){
total_weighted_values <- sum(v * f)
total_frequencies <- sum(f)
mean <- total_weighted_values / total_frequencies
cat("The mean of the given data is:",mean,"\n")
}
#Finding the mode for this discrete data
discrete_mode <- function(v,f){
mode_value <- v[which.max(f)]
cat("The mode for the discrete frequency distribution is:",mode_value,"\n")
}
#Finding the median for this discrete data
discrete_median <- function(v,f){
#calculating the cumulative frequency
cf <- cumsum(f)
#caluclating frequency
N <- sum(f)
median_position <- (N + 1) / 2
#finding the relating position
median_index <- which(cf >= median_position)[1]
median <- v[median_index]
cat("The median of the given data is:",median,"\n")
}
cat("Note numbers:",Note_num,"\n")
## Note numbers: 100 200 300 400 500
cat("Collections:",Collections,"\n")
## Collections: 1400 1200 1800 2400 950
discrete_mean(Note_num,Collections)
## The mean of the given data is: 303.871
discrete_mode(Note_num,Collections)
## The mode for the discrete frequency distribution is: 400
discrete_median(Note_num,Collections)
## The median of the given data is: 300
#2.Generate a continuous data set and display its summary statistics.
# Define the revenue ranges and movie counts
revenue_ranges <- list(c(0, 200), c(200, 400), c(400, 600), c(600, 800), c(800, 1000), c(1000, 1200))
movie_counts <- c(38, 19, 8, 4, 2, 2)
# Set seed for reproducibility
set.seed(42)
# Generate continuous revenue data by randomly sampling within each range
continuous_data <- unlist(mapply(function(range, count) runif(count, range[1], range[2]),
revenue_ranges, movie_counts))
# Convert to data frame
df <- data.frame(Revenue = continuous_data)
# Display summary statistics
summary(df)
## Revenue
## Min. : 0.7897
## 1st Qu.: 128.3491
## Median : 195.6453
## Mean : 289.5176
## 3rd Qu.: 391.5153
## Max. :1043.2771
# Create a bar graph for class intervals vs frequency
barplot(movie_counts, names.arg = c("0-200", "200-400", "400-600", "600-800", "800-1000", "1000-1200"),
col = "skyblue", main = "Revenue Ranges vs Movie Count",
xlab = "Revenue Range", ylab = "Number of Movies", border = "black")
#3.Create your own continuous data set and Calculate the mean,median and mode using R.
#Finding the measure of central tendancies for the continuous grouped data
Revenue_range <- c("0-200","200-400","400-600","600-800","800-1000","1000-1200")
Movies <- c(38,19,8,4,2,2)
class_width <- 200
#the mean for this data
lower_limits <- c(0,200,400,600,800,1000)
upper_limits <- c(200,400,600,800,1000,1200)
midpoints <- (lower_limits + upper_limits) / 2
continuous_mean <- sum(Movies * midpoints) / sum(Movies)
#the mode for this data
continuous_mode <- function(lower_limits,Movies,class_width){
modal_index <- which.max(Movies)
l <- lower_limits[modal_index]
f1 <- Movies[modal_index]
f0 <- ifelse(modal_index > 1,Movies[modal_index - 1],0)
f2 <- ifelse(modal_index < length(Movies),Movies[modal_index + 1],0)
#Creating the modal formulae
mode <- l + ((f1 - f0) / (2 *f1 - f0 -f2)) * class_width
cat("The mode for the continuous data is:",mode,"\n")
}
#the median for the continuous data
continuous_median <- function(lower_limits,Movies,class_interval){
cf <- cumsum(Movies)
N <- sum(Movies)
median_position <- N/2
median_class_index <- which(cf >= median_position)[1]
l <- lower_limits[median_class_index]
f <- Movies[median_class_index]
F <- ifelse(median_class_index == 1,0,cf[median_class_index])
c <- class_interval
median <- l + ((median_position - F)/f) * c
return(median)
}
class_interval <- lower_limits[2] - lower_limits[1]
cat("Revenue Range:",Revenue_range,"\n")
## Revenue Range: 0-200 200-400 400-600 600-800 800-1000 1000-1200
cat("Movies:",Movies,"\n")
## Movies: 38 19 8 4 2 2
Mean <- continuous_mean
cat("The mean of the given data is:",Mean,"\n")
## The mean of the given data is: 278.0822
continuous_mode(lower_limits,Movies,class_width)
## The mode for the continuous data is: 133.3333
Median <- continuous_median(lower_limits,Movies,class_interval)
cat("The median of the given data is:",Median)
## The median of the given data is: 192.1053