##Unsolved Exercise questions for R
#1 Write R code to read an Excel file and save it as a Data Frame.
library(readxl)
scimagojr<-read_excel("scimagojr-3.xlsx") #read xlsx
scimagojr <- as.data.frame(scimagojr)
head(scimagojr,c(10,6))
## Rank Country Documents Citable documents Citations Self-citations
## 1 1 China 127050 126767 597237 411683
## 2 2 United States 96661 94747 792274 265436
## 3 3 Japan 30504 30287 223024 61554
## 4 4 United Kingdom 20944 20357 206091 37874
## 5 5 Russian Federation 18534 18301 34266 12422
## 6 6 Canada 17899 17620 215003 40930
## 7 7 Germany 17027 16831 140566 27426
## 8 8 India 15005 14841 128763 37209
## 9 9 France 13153 12973 130632 28601
## 10 10 South Korea 11983 11923 114675 22595
#2 Write R code to read a JSON file in R. You can use any JSON file of your liking.
library(jsonlite)
# Writing scimagojr dataframe first 10 rows to a new JSON file
write_json(scimagojr[1:10,],'sample.json')
# Give the input file name to the function. This function reads JSON file
result <- read_json(path = "sample.json",simplifyVector = TRUE)
head(result,c(10,6))
## Rank Country Documents Citable documents Citations Self-citations
## 1 1 China 127050 126767 597237 411683
## 2 2 United States 96661 94747 792274 265436
## 3 3 Japan 30504 30287 223024 61554
## 4 4 United Kingdom 20944 20357 206091 37874
## 5 5 Russian Federation 18534 18301 34266 12422
## 6 6 Canada 17899 17620 215003 40930
## 7 7 Germany 17027 16831 140566 27426
## 8 8 India 15005 14841 128763 37209
## 9 9 France 13153 12973 130632 28601
## 10 10 South Korea 11983 11923 114675 22595
#3 For the data frame created in #2 of Solved questions, write code to insert 10 missing values in each of the three columns. You can insert missing values at random locations.
col1 <- sample(1:5,size=200,replace=T)
col2 <- sample(1:5,size=200,replace=T)
col3 <- sample(1:5,size=200,replace=T)
df <- data.frame(col1, col2, col3)
#*Inserting 10 NA values in different columns randomly*
df[c(sample(1:nrow(df),size=10)),1] <- NA
df[c(sample(1:nrow(df),size=10)),2] <- NA
df[c(sample(1:nrow(df),size=10)),3] <- NA
df[which((is.na(df$col1) == T)|(is.na(df$col2) == T)|(is.na(df$col3) == T)),]
## col1 col2 col3
## 1 5 4 NA
## 16 NA 5 2
## 17 3 NA 4
## 20 4 NA 4
## 25 5 1 NA
## 26 1 NA 5
## 29 4 5 NA
## 46 NA 5 3
## 47 NA 2 2
## 49 3 NA 2
## 50 3 NA 3
## 52 NA 2 2
## 59 NA 5 4
## 65 3 4 NA
## 77 4 1 NA
## 89 1 5 NA
## 99 4 4 NA
## 100 4 NA 3
## 105 NA 4 2
## 108 4 NA 1
## 134 NA 3 5
## 142 4 3 NA
## 146 NA 4 4
## 158 NA 1 2
## 160 NA NA 5
## 175 4 NA 3
## 185 2 4 NA
## 192 3 5 NA
## 200 4 NA 1
#4 In continuation of #3, get row numbers in the dataframe that have missing values in col2.
col22 <- which(is.na(df$col2) == T)
as.data.frame(col22)
## col22
## 1 17
## 2 20
## 3 26
## 4 49
## 5 50
## 6 100
## 7 108
## 8 160
## 9 175
## 10 200
#5 Make use of the Iris dataset we discussed in the class. On the dataset, perform the following:
#a. Extract last 10 rows and last 3 columns in the dataset.
#b. Get only those rows where Species is of type Setosa, and store the results in a dataframe
#*Loading the data file iris*
data(iris)
#*Extracting last 10 rows and last 3 columns using data frame attributes
iris[seq(nrow(iris)-9,nrow(iris)),seq(ncol(iris)-2,ncol(iris))]
## Petal.Length Petal.Width Species
## 141 5.6 2.4 virginica
## 142 5.1 2.3 virginica
## 143 5.1 1.9 virginica
## 144 5.9 2.3 virginica
## 145 5.7 2.5 virginica
## 146 5.2 2.3 virginica
## 147 5.0 1.9 virginica
## 148 5.2 2.0 virginica
## 149 5.4 2.3 virginica
## 150 5.1 1.8 virginica
#Simpler way to do the same using tail command
tail(iris,c(10,3))
## Petal.Length Petal.Width Species
## 141 5.6 2.4 virginica
## 142 5.1 2.3 virginica
## 143 5.1 1.9 virginica
## 144 5.9 2.3 virginica
## 145 5.7 2.5 virginica
## 146 5.2 2.3 virginica
## 147 5.0 1.9 virginica
## 148 5.2 2.0 virginica
## 149 5.4 2.3 virginica
## 150 5.1 1.8 virginica
#Extracting setosa species rows and storing to a new dataframe
setosaDf <- iris[iris$Species == 'setosa',]
head(setosaDf)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
#6 For any given number, write a function to compute its square root.
#If the number is negative, then the function should not give an error but instead should print a message “Negative number provided”
computeSqrt <- function(num){
ifelse(num<0,"Negative number provided",sqrt(num))
}
#Testing with a positive number
computeSqrt(25)
## [1] 5
#Testing with a negative number
computeSqrt(-2)
## [1] "Negative number provided"
#7 Create a vector that has 100 random numbers from 1 to 1000. Write a function that takes this vector as input and returns a vector that has only those values that are greater than average of all values in the vector.
v1 <- sample(1:1000,100)
#Sample mean of the created vector
mean(v1)
## [1] 516.19
#Creating the function which takes a vector as input and returns elements which are > avg of the vector
avgChecker <- function(inpVector){
inpVector[inpVector > mean(inpVector)]
}
#Executing the function with vector created above. All the values in this should be greater than the mean above.
avgChecker(v1)
## [1] 875 673 574 747 717 750 627 660 636 908 866 653 865 828 887 538 826 943 559
## [20] 796 633 566 683 593 959 605 850 645 723 737 930 679 598 798 642 517 536 575
## [39] 982 868 913 987 813 728 769 738 619 724 630 556 727 938 667