BASIC DATA STRUCTURES ON R

R’s basic data structures include the vector, list, matrix, data frame, and factors. Some of these structures require that all members be of the same data type (e.g. vectors, matrices) while others permit multiple data types (e.g. lists, data frames).

# Define variables with different data types
numeric_var <- 42
character_var <- "Hello, World!"
logical_var <- TRUE
date_time_var <- as.POSIXct("2023-01-15 14:30:00")

# Print variables
cat("Numeric Variable:", numeric_var, "\n")
## Numeric Variable: 42
cat("Character Variable:", character_var, "\n")
## Character Variable: Hello, World!
cat("Logical Variable:", logical_var, "\n")
## Logical Variable: TRUE
cat("Date and Time Variable:", date_time_var, "\n")
## Date and Time Variable: 1673773200
# Create data structures
vector_example <- c(1, 2, 3, 4, 5)
matrix_example <- matrix(1:6, nrow = 2, ncol = 3)
list_example <- list(1, "apple", TRUE)
data_frame_example <- data.frame(
  Name = c("Alice", "Bob", "Charlie"),
  Age = c(25, 30, 22),
  Score = c(90, 85, 92)
)

# Print data structures
cat("Vector Example:", vector_example, "\n")
## Vector Example: 1 2 3 4 5
cat("Matrix Example:\n")
## Matrix Example:
print(matrix_example)
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
cat("List Example:\n")
## List Example:
print(list_example)
## [[1]]
## [1] 1
## 
## [[2]]
## [1] "apple"
## 
## [[3]]
## [1] TRUE
cat("Data Frame Example:\n")
## Data Frame Example:
print(data_frame_example)
##      Name Age Score
## 1   Alice  25    90
## 2     Bob  30    85
## 3 Charlie  22    92
# a.Create two vectors of integers
vector1 <- c(1, 2, 3)
vector2 <- c(4, 5, 6)

# Add the two vectors element-wise
result_vector <- vector1 + vector2

# Print the result
print(result_vector)
## [1] 5 7 9

we have added 2 vectors successfully.

# b.Create a vector
my_vector <- c(2, 4, 6, 8, 10)

# Calculate the sum, mean, and product
sum_result <- sum(my_vector)
mean_result <- mean(my_vector)
product_result <- prod(my_vector)

# Print the results
print(paste("Sum:", sum_result))
## [1] "Sum: 30"
print(paste("Mean:", mean_result))
## [1] "Mean: 6"
print(paste("Product:", product_result))
## [1] "Product: 3840"

we have found sum,mean and product of vector elements.

# c.Create a vector
my_vector <- c(3, 1, 7, 2, 9)

# Find the minimum and maximum
min_value <- min(my_vector)
max_value <- max(my_vector)

# Print the results
print(paste("Minimum:", min_value))
## [1] "Minimum: 1"
print(paste("Maximum:", max_value))
## [1] "Maximum: 9"
#d. Create a list
my_list <- list(
  string_element = "Hello, World",
  numeric_element = 42,
  vector_element = c(1, 2, 3),
  logical_element = TRUE
)

# Print the list
print(my_list)
## $string_element
## [1] "Hello, World"
## 
## $numeric_element
## [1] 42
## 
## $vector_element
## [1] 1 2 3
## 
## $logical_element
## [1] TRUE

a heterogeneous list is made.

#e. Create a list with named elements
my_list <- list(
  vector_element = c(1, 2, 3),
  matrix_element = matrix(1:6, nrow = 2),
  nested_list = list(a = "apple", b = "banana")
)

# Access the first and second elements of the list
first_element <- my_list$vector_element
second_element <- my_list$matrix_element

# Print the accessed elements
print(first_element)
## [1] 1 2 3
print(second_element)
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
#f. Create a 3x5 matrix filled with zeros
my_matrix <- matrix(0, nrow = 3, ncol = 5)

# Print the matrix
print(my_matrix)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    0    0    0    0    0
## [2,]    0    0    0    0    0
## [3,]    0    0    0    0    0
#g. Create a sample matrix
my_matrix <- matrix(1:12, nrow = 3)

# Access specific elements
element_1 <- my_matrix[2, 3]  # 3rd column, 2nd row
element_2 <- my_matrix[3, ]    # 3rd row
element_3 <- my_matrix[, 4]    # 4th column

# Print the accessed elements
print(element_1)
## [1] 8
print(element_2)
## [1]  3  6  9 12
print(element_3)
## [1] 10 11 12
#h. Create vectors
name <- c("Alice", "Bob", "Charlie")
age <- c(25, 30, 35)

# Create a DataFrame
df <- data.frame(Name = name, Age = age)

# Display the DataFrame
print(df)
##      Name Age
## 1   Alice  25
## 2     Bob  30
## 3 Charlie  35
#i. Create a DataFrame
df <- data.frame(Name = c("Alice", "Bob"), Age = c(25, 30))

# New data to insert
new_data <- data.frame(Name = c("Charlie", "David"), Age = c(35, 40))

# Insert new rows
df <- rbind(df, new_data)

# Display the updated DataFrame
print(df)
##      Name Age
## 1   Alice  25
## 2     Bob  30
## 3 Charlie  35
## 4   David  40
#j. Create a DataFrame
df <- data.frame(Name = c("Alice", "Bob"), Age = c(25, 30))

# Add a new column
df$Salary <- c(50000, 60000)

# Display the updated DataFrame
print(df)
##    Name Age Salary
## 1 Alice  25  50000
## 2   Bob  30  60000
#k. Create a DataFrame
df <- data.frame(Name = c("Alice", "Bob", "Charlie", "David"), Age = c(25, 30, 35, 40))

# Extract the first 2 rows
first_two_rows <- df[1:2, ]

# Display the extracted rows
print(first_two_rows)
##    Name Age
## 1 Alice  25
## 2   Bob  30
#l. Create a DataFrame
df <- data.frame(Name = c("Charlie", "Alice", "Bob"), Age = c(35, 25, 30))

# Sort the DataFrame by the "Age" column
sorted_df <- df[order(df$Age), ]

# Display the sorted DataFrame
print(sorted_df)
##      Name Age
## 2   Alice  25
## 3     Bob  30
## 1 Charlie  35
#m. Create two DataFrames
df1 <- data.frame(ID = 1:3, Name = c("Alice", "Bob", "Charlie"))
df2 <- data.frame(ID = 2:4, Salary = c(50000, 60000, 70000))

# Merge the DataFrames based on the "ID" column
merged_df <- merge(df1, df2, by = "ID", all = TRUE)

# Display the merged DataFrame
print(merged_df)
##   ID    Name Salary
## 1  1   Alice     NA
## 2  2     Bob  50000
## 3  3 Charlie  60000
## 4  4    <NA>  70000
#n. Create two DataFrames
df1 <- data.frame(Name = c("Alice", "Bob"), Age = c(25, 30))
df2 <- data.frame(Name = c("Charlie", "David"), Age = c(35, 40))

# Append df2 to the end of df1
appended_df <- rbind(df1, df2)

# Display the appended DataFrame
print(appended_df)
##      Name Age
## 1   Alice  25
## 2     Bob  30
## 3 Charlie  35
## 4   David  40
#o. Load the dplyr package
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Create a sample DataFrame
df <- data.frame(Group = c("A", "A", "B", "B", "C"),
                 Value = c(10, 15, 25, 20, 30))

# Select rows with maximum value in each group
result <- df %>%
  group_by(Group) %>%
  filter(Value == max(Value))

# Display the result
print(result)
## # A tibble: 3 × 2
## # Groups:   Group [3]
##   Group Value
##   <chr> <dbl>
## 1 A        15
## 2 B        25
## 3 C        30
#p. Create two dataframes
df1 <- data.frame(ID = 1:4, Name = c("Alice", "Bob", "Charlie", "David"))
df2 <- data.frame(ID = 2:5, Salary = c(50000, 60000, 70000, 55000))

# Merge the dataframes based on the "ID" column
merged_df <- merge(df1, df2, by = "ID", all = TRUE)

# Display the merged dataframe
print(merged_df)
##   ID    Name Salary
## 1  1   Alice     NA
## 2  2     Bob  50000
## 3  3 Charlie  60000
## 4  4   David  70000
## 5  5    <NA>  55000
#q.a. Read data from the console
data <- as.numeric(readline("Enter a number: "))
## Enter a number:
print(data)
## [1] NA