install.packages('dplyr')
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.5/dplyr_1.1.4.zip'
Content type 'application/zip' length 1594395 bytes (1.5 MB)
downloaded 1.5 MB
package ‘dplyr’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\Dell\AppData\Local\Temp\RtmpWCeVTb\downloaded_packages
library(dplyr)
help(package = 'dplyr')
x <- rep(seq(2, 3, by=0.5), times=3, each=4)
x
[1] 2.0 2.0 2.0 2.0 2.5 2.5 2.5 2.5 3.0 3.0 3.0 3.0 2.0 2.0 2.0
[16] 2.0 2.5 2.5 2.5 2.5 3.0 3.0 3.0 3.0 2.0 2.0 2.0 2.0 2.5 2.5
[31] 2.5 2.5 3.0 3.0 3.0 3.0
cat("rank:", rank(x)) # Rank of elements.
rank: 6.5 6.5 6.5 6.5 18.5 18.5 18.5 18.5 30.5 30.5 30.5 30.5 6.5 6.5 6.5 6.5 18.5 18.5 18.5 18.5 30.5 30.5 30.5 30.5 6.5 6.5 6.5 6.5 18.5 18.5 18.5 18.5 30.5 30.5 30.5 30.5
table(x)
x
2 2.5 3
12 12 12
unique(x)
[1] 2.0 2.5 3.0
x[-(2:4)] # All elements except two to four.
[1] 2.0 2.5 2.5 2.5 2.5 3.0 3.0 3.0 3.0 2.0 2.0 2.0 2.0 2.5 2.5
[16] 2.5 2.5 3.0 3.0 3.0 3.0 2.0 2.0 2.0 2.0 2.5 2.5 2.5 2.5 3.0
[31] 3.0 3.0 3.0
x[x %in% c(1, 2, 5)] # Elements in the set 1, 2, 5.
[1] 2 2 2 2 2 2 2 2 2 2 2 2
is.na(a) == Is missing is.null(a) == Is null
paste() → joining text
collapse → combining into one string
grep() → finding patterns
gsub() → replacing text
toupper() / tolower() → case change
nchar() → string length
x <- c("Data", "Science", "R")
y <- c("is", "with", "Fun")
# Join two vectors element-wise
joined <- paste(x, y, sep = " ")
joined
[1] "Data is" "Science with" "R Fun"
# "Data is" "Science with" "R Fun"
# Join all elements into one string
sentence <- paste(joined, collapse = " | ")
sentence
[1] "Data is | Science with | R Fun"
# "Data is | Science with | R Fun"
# Find words containing letter 'i'
grep("i", joined)
[1] 1 2
# 1 2
# Replace 'R' with 'Statistics'
replaced <- gsub("R", "Statistics", sentence)
replaced
[1] "Data is | Science with | Statistics Fun"
# "Data is | Science with | Statistics Fun"
# Convert to uppercase
upper_text <- toupper(replaced)
upper_text
[1] "DATA IS | SCIENCE WITH | STATISTICS FUN"
# "DATA IS | SCIENCE WITH | STATISTICS FUN"
# Convert to lowercase
lower_text <- tolower(upper_text)
lower_text
[1] "data is | science with | statistics fun"
# "data is | science with | statistics fun"
# Count number of characters
nchar(lower_text)
[1] 39
filter(df, a > 2)
Error in attr(data, "tsp") <- c(start, end, frequency) :
object is not a matrix
Q1.1 Install the dplyr package and load it into your R session. Then create a variable x and assign it the value 10. Print x to the console.
library('dplyr')
x <- 10
x
[1] 10
Q1.2 Create a vector containing the numbers 5, 10, 15, 20. Then extract the substring “Data” from the string “DataScience is fun”.
vec <- c(5, 10, 15, 20)
library(stringr)
str_sub("DataScience is fun", 1, 4)
[1] "Data"
Q2.1 Create a list named my_list with elements: name = “Alice”, age = 30, scores = c(85, 90, 78).
my_list <- list(
name = 'Alice',
age = 30,
scores = c(85, 90, 78)
)
my_list
$name
[1] "Alice"
$age
[1] 30
$scores
[1] 85 90 78
Q2.2 Create a 2x3 matrix filled with the numbers 1 through 6.
mat <- matrix(1:6, nrow=2)
mat
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
Q2.3 Create a data frame df with columns: ID = c(1,2,3), Name = c(“Tom”, “Jerry”, “Spike”).
df <- data.frame(
ID = c(1,2,3),
Name = c("Tom", "Jerry", "Spike")
)
df
Q3.1 Using the mtcars dataset, filter rows where mpg > 20.
library(dplyr)
filter(mtcars, mpg > 20)
Q3.2 Select only the mpg and hp columns from mtcars.
select(mtcars, c(mpg,hp))
Q3.3 Add a new column kmpl to mtcars which is mpg * 0.425.
mutate(mtcars, kmpl = mpg * 0.425)
Q3.4 Calculate the mean of mpg in mtcars.
summarize(mtcars, avg_mpg = mean(mpg))
Q3.5 Arrange mtcars by wt in descending order.
arrange(mtcars, desc(wt))
Q4.1 Create a scatterplot of mpg vs wt from mtcars using ggplot2.
library(ggplot2)
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point()
Q4.2 Create a bar chart of the number of cars per cyl in mtcars.
ggplot(mtcars, aes(x = factor(cyl))) + geom_bar()
Q4.3 Create a scatterplot of mpg vs hp, colored by cyl.
ggplot(mtcars, aes(y=mpg, x=hp, color = factor(cyl))) + geom_point()
Q5.1 Calculate the mean, median, and standard deviation of mpg in mtcars.
# summarize(mtcars['mpg'], std=sd(mpg))
mean(mtcars$mpg)
[1] 20.09062
median(mtcars$mpg)
[1] 19.2
sd(mtcars$mpg)
[1] 6.026948
Q5.2 Find the correlation between mpg and wt.
r <- cor(mtcars['mpg'],mtcars['wt'])
r
wt
mpg -0.8676594
Q5.3 Fit a linear model predicting mpg from wt.
model <- lm(mpg ~ wt, data = mtcars)
summary(model)
Call:
lm(formula = mpg ~ wt, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-4.5432 -2.3647 -0.1252 1.4096 6.8727
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
wt -5.3445 0.5591 -9.559 1.29e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.046 on 30 degrees of freedom
Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
Q5.4 Write a function to compute the mode of a vector.
my_mode <- function(x) {
counts <- table(x)
counts[which.max(counts)]
#as.numeric(names(counts)[which.max(counts)]) # to get only mode, use this
}
my_mode(c(1, 2, 2, 3, 3, 3, 3))
3
4
# way 2
my_mode <- function(x) {
names(sort(table(x), decreasing = TRUE))[1]
}
my_mode(c(1, 2, 2, 3, 3, 3))
[1] "3"
Q5.5 Simulate 10 coin tosses.
set.seed(22)
sample(c("Heads", "Tails"), 10, replace=TRUE)
[1] "Tails" "Heads" "Tails" "Tails" "Tails" "Tails" "Tails" "Heads" "Heads" "Heads"
Q6.1 Write an if statement that prints “Positive” if x > 0, “Negative” if x < 0, else “Zero”.
x <- -5
if (x > 0) {
print("Positive")
} else if (x < 0) {
print("Negative")
} else {
print("Zero")
}
[1] "Negative"
Q6.2 Write a for loop to print squares of numbers 1 through 5.
for (i in 1:5){
print(i^2)
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
Q6.3 Write a function that returns TRUE if a number is even.
even_checker <- function(x){
x%%2==0
}
even_checker(3)
[1] FALSE
Q6.4 Use apply to calculate row sums of a matrix.
mat <- matrix(1:9, nrow = 3)
apply(mat,1,FUN=sum)
[1] 12 15 18
Q7.1 Fit a linear model to predict mpg from wt and hp in mtcars.
model <- lm(mpg~wt + hp, data=mtcars)
summary(model)
Call:
lm(formula = mpg ~ wt + hp, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-3.941 -1.600 -0.182 1.050 5.854
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37.22727 1.59879 23.285 < 2e-16 ***
wt -3.87783 0.63273 -6.129 1.12e-06 ***
hp -0.03177 0.00903 -3.519 0.00145 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.593 on 29 degrees of freedom
Multiple R-squared: 0.8268, Adjusted R-squared: 0.8148
F-statistic: 69.21 on 2 and 29 DF, p-value: 9.109e-12
Q7.2 Plot the residuals of the model.
library(ggplot2)
residuals_df <- data.frame(residuals = model$residuals)
ggplot(residuals_df, aes(x = residuals)) + geom_histogram(bins = 50)
Q8.1 Save the mtcars dataset as a CSV file named mtcars_data.csv.
write.csv(mtcars, "mtcars_data.csv", row.names = FALSE)
Q8.2 Read the CSV file back into R.
new_df <- read.csv("mtcars_data.csv")
new_df
Q8.3 List all files in your current working directory.
list.files()
[1] "1Jan.nb.html" "1Jan.Rmd" "2dec.nb.html"
[4] "2dec.Rmd" "30 Nov (2).R" "30 Nov (3).R"
[7] "30 Nov.R" "data.csv" "final solve.nb.html"
[10] "final solve.Rmd" "hjh.nb.html" "hjh.Rmd"
[13] "Intermediate R practice.nb.html" "Intermediate R practice.Rmd" "message.txt"
[16] "mtcars_data.csv" "output.csv" "output.txt"
[19] "rsconnect" "Sample_Marks.xlsx" "SampleDataGPA.csv"
Load the iris dataset
Create a new column
Petal.Area = Petal.Length × Petal.WidthFilter the data
Petal.Area > 2Create a scatter plot
Petal.AreaSepal.LengthSpeciesFit a linear model
Sepal.Length from Petal.AreaSave the filtered dataset
iris_filtered.csv# 1
data(iris)
# 2
iris <- mutate(iris, Petal.Area = Petal.Length * Petal.Width)
# 3
filtered_iris <- filter(iris, Petal.Area > 2)
# 4
ggplot(filtered_iris, aes(x = Petal.Area, y = Sepal.Length, color = Species)) + geom_point()
# 5
model_iris <- lm(Sepal.Length~Petal.Area, data=filtered_iris)
summary(model_iris)
Call:
lm(formula = Sepal.Length ~ Petal.Area, data = filtered_iris)
Residuals:
Min 1Q Median 3Q Max
-1.23947 -0.28971 -0.05548 0.29046 1.02532
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.04735 0.12802 39.43 <2e-16 ***
Petal.Area 0.14276 0.01402 10.18 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.4644 on 98 degrees of freedom
Multiple R-squared: 0.514, Adjusted R-squared: 0.5091
F-statistic: 103.7 on 1 and 98 DF, p-value: < 2.2e-16
# 6
write.csv(filtered_iris, "iris_filtered.csv", row.names = FALSE)