##introduction to R R is an open-source programming language that is widely used as a statistical software and data analysis tool. R generally comes with the Command-line interface. R is available across widely used platforms like Windows, Linux, and macOS. Also, the R programming language is the latest cutting-edge tool.It was designed by Ross Ihaka and Robert Gentleman at the University of Auckland, New Zealand, and is currently developed by the R Development Core Team. R programming language is an implementation of the S programming language. It also combines with lexical scoping semantics inspired by Scheme. Moreover, the project conceives in 1992, with an initial version released in 1995 and a stable beta version in 2000. ##basic components Variables: Store and manipulate data. Functions: Perform operations or computations. Data Structures: Organize and store data. Control Structures: Control the flow of program execution (e.g., loops and conditionals). Libraries (Packages): Extend R’s capabilities with specialized functions. Data Import/Export: Read and write data from/to various formats. Documentation: Add comments and documentation for code clarity. Graphics: Create visualizations and plots.
##datatypes in r Numeric: Represents numbers (e.g., 3.14, -42). Character (String): Stores text (e.g., “Welcome to all!”). Logical: Represents TRUE or FALSE values. Date and Time: Handles date and time values.
# Define variables with different data types
numeric_var <- 51
character_var <- "Welcome all!"
logical_var <- TRUE
date_time_var <- as.POSIXct("2023-11-12 14:30:00")
# Print variables
cat("Numeric Variable:", numeric_var, "\n")
## Numeric Variable: 51
cat("Character Variable:", character_var, "\n")
## Character Variable: Welcome all!
cat("Logical Variable:", logical_var, "\n")
## Logical Variable: TRUE
cat("Date and Time Variable:", date_time_var, "\n")
## Date and Time Variable: 1699779600
##data structures in r Vector: A one-dimensional array of elements of the same data type (e.g., c(1, 2, 3)). Matrix: A two-dimensional array of elements of the same data type. List: A versatile data structure that can hold elements of different data types (e.g., list(1, “apple”, TRUE)). Data Frame: A tabular structure where columns can have different data types (e.g., data imported from a CSV file).
# Create data structures
vector_example <- c(5,4,3,2,1)
matrix_example <- matrix(1:6, nrow = 2, ncol = 1)
## Warning in matrix(1:6, nrow = 2, ncol = 1): data length differs from size of
## matrix: [6 != 2 x 1]
list_example <- list(2, "banana", TRUE)
data_frame_example <- data.frame(
Name = c("Sudha", "Meghana", "Vaishnavi"),
Age = c(23, 32, 24),
Score = c(87, 92, 90)
)
# Print data structures
cat("Vector Example:", vector_example, "\n")
## Vector Example: 5 4 3 2 1
cat("Matrix Example:\n")
## Matrix Example:
print(matrix_example)
## [,1]
## [1,] 1
## [2,] 2
cat("List Example:\n")
## List Example:
print(list_example)
## [[1]]
## [1] 2
##
## [[2]]
## [1] "banana"
##
## [[3]]
## [1] TRUE
cat("Data Frame Example:\n")
## Data Frame Example:
print(data_frame_example)
## Name Age Score
## 1 Sudha 23 87
## 2 Meghana 32 92
## 3 Vaishnavi 24 90
# a.Create two vectors of integers
vector1 <- c(7,4,9)
vector2 <- c(6,9,3)
# Add the two vectors element-wise
result_vector <- vector1 + vector2
# Print the result
print(result_vector)
## [1] 13 13 12
we have added 2 vectors successfully.
# b.Create a vector
my_vector <- c(1,3,5,8,9)
# Calculate the sum, mean, and product
sum_result <- sum(my_vector)
mean_result <- mean(my_vector)
product_result <- prod(my_vector)
# Print the results
print(paste("Sum:", sum_result))
## [1] "Sum: 26"
print(paste("Mean:", mean_result))
## [1] "Mean: 5.2"
print(paste("Product:", product_result))
## [1] "Product: 1080"
we have found sum,mean and product of vector elements.
# c.Create a vector
my_vector <- c(5,6,3,7,9)
# Find the minimum and maximum
min_value <- min(my_vector)
max_value <- max(my_vector)
# Print the results
print(paste("Minimum:", min_value))
## [1] "Minimum: 3"
print(paste("Maximum:", max_value))
## [1] "Maximum: 9"
#d. Create a list
my_list <- list(
string_element = "Welcome to all",
numeric_element = 51,
vector_element = c(6,5,4),
logical_element = TRUE
)
# Print the list
print(my_list)
## $string_element
## [1] "Welcome to all"
##
## $numeric_element
## [1] 51
##
## $vector_element
## [1] 6 5 4
##
## $logical_element
## [1] TRUE
a heterogeneous list is made.
#e. Create a list with named elements
my_list <- list(
vector_element = c(4,5,3),
matrix_element = matrix(1:6, nrow = 3),
nested_list = list(a = "Mango", b = "Banana")
)
# Access the first and second elements of the list
first_element <- my_list$vector_element
second_element <- my_list$matrix_element
# Print the accessed elements
print(first_element)
## [1] 4 5 3
print(second_element)
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
#f. Create a 3x5 matrix filled with zeros
my_matrix <- matrix(1, nrow = 2, ncol = 5)
# Print the matrix
print(my_matrix)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 1 1 1 1
## [2,] 1 1 1 1 1
#g. Create a sample matrix
my_matrix <- matrix(1:12, nrow = 2)
# Access specific elements
element_1 <- my_matrix[2, 3] # 3rd column, 3nd row
element_2 <- my_matrix[2, ] # 2rd row
element_3 <- my_matrix[, 3] # 5th column
# Print the accessed elements
print(element_1)
## [1] 6
print(element_2)
## [1] 2 4 6 8 10 12
print(element_3)
## [1] 5 6
#h. Create vectors
name <- c("Meghana", "Bhanu", "Charitha")
age <- c(24, 38, 45)
# Create a DataFrame
df <- data.frame(Name = name, Age = age)
# Display the DataFrame
print(df)
## Name Age
## 1 Meghana 24
## 2 Bhanu 38
## 3 Charitha 45
#i. Create a DataFrame
df <- data.frame(Name = c("Meghana", "Bhavani"), Age = c(23, 21))
# New data to insert
new_data <- data.frame(Name = c("Charitha", "Dravid"), Age = c(37, 43))
# Insert new rows
df <- rbind(df, new_data)
# Display the updated DataFrame
print(df)
## Name Age
## 1 Meghana 23
## 2 Bhavani 21
## 3 Charitha 37
## 4 Dravid 43
#j. Create a DataFrame
df <- data.frame(Name = c("Amrutha", "Bhavana"), Age = c(27, 34))
# Add a new column
df$Salary <- c(52000, 65000)
# Display the updated DataFrame
print(df)
## Name Age Salary
## 1 Amrutha 27 52000
## 2 Bhavana 34 65000
#k. Create a DataFrame
df <- data.frame(Name = c("Amrutha", "Bhavana", "Charitha", "Dravid"), Age = c(21, 33, 37, 43))
# Extract the first 2 rows
first_two_rows <- df[1:3, ]
# Display the extracted rows
print(first_two_rows)
## Name Age
## 1 Amrutha 21
## 2 Bhavana 33
## 3 Charitha 37
#l. Create a DataFrame
df <- data.frame(Name = c("Charitha", "Amrutha", "Bhavana"), Age = c(32, 27, 31))
# Sort the DataFrame by the "Age" column
sorted_df <- df[order(df$Age), ]
# Display the sorted DataFrame
print(sorted_df)
## Name Age
## 2 Amrutha 27
## 3 Bhavana 31
## 1 Charitha 32
#m. Create two DataFrames
df1 <- data.frame(ID = 1:3, Name = c("Amrutha", "Bhavana", "Charitha"))
df2 <- data.frame(ID = 2:4, Salary = c(52500, 64500, 78400))
# Merge the DataFrames based on the "ID" column
merged_df <- merge(df1, df2, by = "ID", all = TRUE)
# Display the merged DataFrame
print(merged_df)
## ID Name Salary
## 1 1 Amrutha NA
## 2 2 Bhavana 52500
## 3 3 Charitha 64500
## 4 4 <NA> 78400
#n. Create two DataFrames
df1 <- data.frame(Name = c("Amrutha", "Bhavana"), Age = c(23, 31))
df2 <- data.frame(Name = c("Charlie", "David"), Age = c(34, 41))
# Append df2 to the end of df1
appended_df <- rbind(df1, df2)
# Display the appended DataFrame
print(appended_df)
## Name Age
## 1 Amrutha 23
## 2 Bhavana 31
## 3 Charlie 34
## 4 David 41
#o. Load the dplyr package
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Create a sample DataFrame
df <- data.frame(Group = c("B", "B", "A", "C", "C"),
Value = c(16, 15, 25, 20, 30))
# Select rows with maximum value in each group
result <- df %>%
group_by(Group) %>%
filter(Value == max(Value))
# Display the result
print(result)
## # A tibble: 3 × 2
## # Groups: Group [3]
## Group Value
## <chr> <dbl>
## 1 B 16
## 2 A 25
## 3 C 30
#p. Create two dataframes
df1 <- data.frame(ID = 1:4, Name = c("Amrutha", "Bhavana", "Charitha", "Dravid"))
df2 <- data.frame(ID = 2:5, Salary = c(52500, 64500, 78500, 552300))
# Merge the dataframes based on the "ID" column
merged_df <- merge(df1, df2, by = "ID", all = TRUE)
# Display the merged dataframe
print(merged_df)
## ID Name Salary
## 1 1 Amrutha NA
## 2 2 Bhavana 52500
## 3 3 Charitha 64500
## 4 4 Dravid 78500
## 5 5 <NA> 552300
#q.a. Read data from the console
data <- as.numeric(readline("Enter a number: "))
## Enter a number:
print(data)
## [1] NA