##Unsolved Exercise questions for R

#1 Write R code to read an Excel file and save it as a Data Frame.

library(readxl)
scimagojr<-read_excel("scimagojr-3.xlsx") #read xlsx
scimagojr <- as.data.frame(scimagojr)
head(scimagojr,c(10,6))
##    Rank            Country Documents Citable documents Citations Self-citations
## 1     1              China    127050            126767    597237         411683
## 2     2      United States     96661             94747    792274         265436
## 3     3              Japan     30504             30287    223024          61554
## 4     4     United Kingdom     20944             20357    206091          37874
## 5     5 Russian Federation     18534             18301     34266          12422
## 6     6             Canada     17899             17620    215003          40930
## 7     7            Germany     17027             16831    140566          27426
## 8     8              India     15005             14841    128763          37209
## 9     9             France     13153             12973    130632          28601
## 10   10        South Korea     11983             11923    114675          22595

#2 Write R code to read a JSON file in R. You can use any JSON file of your liking.

library(jsonlite)

# Writing scimagojr dataframe first 10 rows to a new JSON file
write_json(scimagojr[1:10,],'sample.json')

# Give the input file name to the function. This function reads JSON file
result <- read_json(path = "sample.json",simplifyVector = TRUE)
head(result,c(10,6))
##    Rank            Country Documents Citable documents Citations Self-citations
## 1     1              China    127050            126767    597237         411683
## 2     2      United States     96661             94747    792274         265436
## 3     3              Japan     30504             30287    223024          61554
## 4     4     United Kingdom     20944             20357    206091          37874
## 5     5 Russian Federation     18534             18301     34266          12422
## 6     6             Canada     17899             17620    215003          40930
## 7     7            Germany     17027             16831    140566          27426
## 8     8              India     15005             14841    128763          37209
## 9     9             France     13153             12973    130632          28601
## 10   10        South Korea     11983             11923    114675          22595

#3 For the data frame created in #2 of Solved questions, write code to insert 10 missing values in each of the three columns. You can insert missing values at random locations.

col1 <- sample(1:5,size=200,replace=T)
col2 <- sample(1:5,size=200,replace=T)
col3 <- sample(1:5,size=200,replace=T)

df <- data.frame(col1, col2, col3)

#*Inserting 10 NA values in different columns randomly*

df[c(sample(1:nrow(df),size=10)),1] <- NA
df[c(sample(1:nrow(df),size=10)),2] <- NA
df[c(sample(1:nrow(df),size=10)),3] <- NA

df[which((is.na(df$col1) == T)|(is.na(df$col2) == T)|(is.na(df$col3) == T)),]
##     col1 col2 col3
## 1      5    4   NA
## 16    NA    5    2
## 17     3   NA    4
## 20     4   NA    4
## 25     5    1   NA
## 26     1   NA    5
## 29     4    5   NA
## 46    NA    5    3
## 47    NA    2    2
## 49     3   NA    2
## 50     3   NA    3
## 52    NA    2    2
## 59    NA    5    4
## 65     3    4   NA
## 77     4    1   NA
## 89     1    5   NA
## 99     4    4   NA
## 100    4   NA    3
## 105   NA    4    2
## 108    4   NA    1
## 134   NA    3    5
## 142    4    3   NA
## 146   NA    4    4
## 158   NA    1    2
## 160   NA   NA    5
## 175    4   NA    3
## 185    2    4   NA
## 192    3    5   NA
## 200    4   NA    1

#4 In continuation of #3, get row numbers in the dataframe that have missing values in col2.

col22 <- which(is.na(df$col2) == T)
as.data.frame(col22)
##    col22
## 1     17
## 2     20
## 3     26
## 4     49
## 5     50
## 6    100
## 7    108
## 8    160
## 9    175
## 10   200

#5 Make use of the Iris dataset we discussed in the class. On the dataset, perform the following:
#a. Extract last 10 rows and last 3 columns in the dataset.
#b. Get only those rows where Species is of type Setosa, and store the results in a dataframe

#*Loading the data file iris*
data(iris)

#*Extracting last 10 rows and last 3 columns using data frame attributes
iris[seq(nrow(iris)-9,nrow(iris)),seq(ncol(iris)-2,ncol(iris))]
##     Petal.Length Petal.Width   Species
## 141          5.6         2.4 virginica
## 142          5.1         2.3 virginica
## 143          5.1         1.9 virginica
## 144          5.9         2.3 virginica
## 145          5.7         2.5 virginica
## 146          5.2         2.3 virginica
## 147          5.0         1.9 virginica
## 148          5.2         2.0 virginica
## 149          5.4         2.3 virginica
## 150          5.1         1.8 virginica
#Simpler way to do the same using tail command
tail(iris,c(10,3))
##     Petal.Length Petal.Width   Species
## 141          5.6         2.4 virginica
## 142          5.1         2.3 virginica
## 143          5.1         1.9 virginica
## 144          5.9         2.3 virginica
## 145          5.7         2.5 virginica
## 146          5.2         2.3 virginica
## 147          5.0         1.9 virginica
## 148          5.2         2.0 virginica
## 149          5.4         2.3 virginica
## 150          5.1         1.8 virginica
#Extracting setosa species rows and storing to a new dataframe

setosaDf <- iris[iris$Species == 'setosa',]
head(setosaDf)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

#6 For any given number, write a function to compute its square root.
#If the number is negative, then the function should not give an error but instead should print a message “Negative number provided”

computeSqrt <- function(num){
  ifelse(num<0,"Negative number provided",sqrt(num))
}

#Testing with a positive number
computeSqrt(25)
## [1] 5
#Testing with a negative number
computeSqrt(-2)
## [1] "Negative number provided"

#7 Create a vector that has 100 random numbers from 1 to 1000. Write a function that takes this vector as input and returns a vector that has only those values that are greater than average of all values in the vector.

v1 <- sample(1:1000,100)

#Sample mean of the created vector
mean(v1)
## [1] 516.19
#Creating the function which takes a vector as input and returns elements which are > avg of the vector
avgChecker <- function(inpVector){
  inpVector[inpVector > mean(inpVector)]
}

#Executing the function with vector created above. All the values in this should be greater than the mean above.
avgChecker(v1)
##  [1] 875 673 574 747 717 750 627 660 636 908 866 653 865 828 887 538 826 943 559
## [20] 796 633 566 683 593 959 605 850 645 723 737 930 679 598 798 642 517 536 575
## [39] 982 868 913 987 813 728 769 738 619 724 630 556 727 938 667