Measuring Central Tendancy - Continuous and Discrete Grouped data

1.Create your own discrete data set and calculate the mean,median and mode using R.

#Creating a discrete grouped data and calculating the central tendancy for that grouped data

 Note_num <- c(100,200,300,400,500)
 Collections <- c(1400,1200,1800,2400,950)

 #Finding the mean for this discrete data
 discrete_mean <- function(v,f){
 total_weighted_values <- sum(v * f)
 total_frequencies <- sum(f)
 mean <- total_weighted_values / total_frequencies
 cat("The mean of the given data is:",mean,"\n")
 }

 


 #Finding the mode for this discrete data

 discrete_mode <- function(v,f){
 mode_value <- v[which.max(f)]
 cat("The mode for the discrete frequency distribution is:",mode_value,"\n")
 }

 


 #Finding the median for this discrete data

 discrete_median <- function(v,f){
 #calculating the cumulative frequency
 cf <- cumsum(f)
 #caluclating frequency
 N <- sum(f)
 median_position <- (N + 1) / 2
 #finding the relating position
 median_index <- which(cf >= median_position)[1]
 median <- v[median_index]
 cat("The median of the given data is:",median,"\n")
 }
 
 cat("Note numbers:",Note_num,"\n")

## Note numbers: 100 200 300 400 500

 cat("Collections:",Collections,"\n")

## Collections: 1400 1200 1800 2400 950

 discrete_mean(Note_num,Collections)

## The mean of the given data is: 303.871

 discrete_mode(Note_num,Collections)

## The mode for the discrete frequency distribution is: 400

 discrete_median(Note_num,Collections)

## The median of the given data is: 300

#2.Generate a continuous data set and display its summary statistics.

 # Define the revenue ranges and movie counts
 revenue_ranges <- list(c(0, 200), c(200, 400), c(400, 600), c(600, 800), c(800, 1000), c(1000, 1200))
 movie_counts <- c(38, 19, 8, 4, 2, 2)

 # Set seed for reproducibility
 set.seed(42)

 # Generate continuous revenue data by randomly sampling within each range
 continuous_data <- unlist(mapply(function(range, count) runif(count, range[1], range[2]), 
                                 revenue_ranges, movie_counts))

 # Convert to data frame
 df <- data.frame(Revenue = continuous_data)

 # Display summary statistics
 summary(df)

##     Revenue         
##  Min.   :   0.7897  
##  1st Qu.: 128.3491  
##  Median : 195.6453  
##  Mean   : 289.5176  
##  3rd Qu.: 391.5153  
##  Max.   :1043.2771

 # Create a bar graph for class intervals vs frequency
 barplot(movie_counts, names.arg = c("0-200", "200-400", "400-600", "600-800", "800-1000", "1000-1200"),
        col = "skyblue", main = "Revenue Ranges vs Movie Count",
        xlab = "Revenue Range", ylab = "Number of Movies", border = "black")

#3.Create your own continuous data set and Calculate the mean,median and mode using R.

 #Finding the measure of central tendancies for the continuous grouped data

 Revenue_range <- c("0-200","200-400","400-600","600-800","800-1000","1000-1200")
 Movies <- c(38,19,8,4,2,2)
 class_width <- 200

 #the mean for this data
 lower_limits <- c(0,200,400,600,800,1000)
 upper_limits <- c(200,400,600,800,1000,1200)
 midpoints <- (lower_limits + upper_limits) / 2
 continuous_mean <- sum(Movies * midpoints) / sum(Movies)

 


 #the mode for this data

 continuous_mode <- function(lower_limits,Movies,class_width){
 modal_index <- which.max(Movies)
 l <- lower_limits[modal_index]
 f1 <- Movies[modal_index]
 f0 <- ifelse(modal_index > 1,Movies[modal_index - 1],0)
 f2 <- ifelse(modal_index < length(Movies),Movies[modal_index + 1],0)
 #Creating the modal formulae
 mode <- l + ((f1 - f0) / (2 *f1 - f0 -f2)) * class_width
 cat("The mode for the continuous data is:",mode,"\n")
 }

 


 #the median for the continuous data

 continuous_median <- function(lower_limits,Movies,class_interval){
 cf <- cumsum(Movies)
 N <- sum(Movies)
 median_position <- N/2
 median_class_index <- which(cf >= median_position)[1]
 l <- lower_limits[median_class_index]
 f <- Movies[median_class_index]
 F <- ifelse(median_class_index == 1,0,cf[median_class_index])
 c <- class_interval
 median <- l + ((median_position - F)/f) * c
 return(median)
 }
 class_interval <- lower_limits[2] - lower_limits[1]

 cat("Revenue Range:",Revenue_range,"\n")

## Revenue Range: 0-200 200-400 400-600 600-800 800-1000 1000-1200

 cat("Movies:",Movies,"\n")

## Movies: 38 19 8 4 2 2

 Mean <- continuous_mean
 cat("The mean of the given data is:",Mean,"\n")

## The mean of the given data is: 278.0822

 continuous_mode(lower_limits,Movies,class_width)

## The mode for the continuous data is: 133.3333

 Median <- continuous_median(lower_limits,Movies,class_interval)
 cat("The median of the given data is:",Median)

## The median of the given data is: 192.1053

Measuring Central Tendancy - Continuous and Discrete Grouped data

Sirige Venkata Mytresh Sai Gowd

2025-02-06