# Load the necessary packages required to reproduce the report. For example:
library(kableExtra)
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
na<- c("HEMANTH RANGASWAMY")
no<- c(" s4069811")
pc<- c("100")
s<- data.frame(cbind(na,no,pc))
#colnames(s)<- c("HEMANTH RANGASWAMY", "s4069811", "100")
s %>% kbl(caption = "Individual Information") %>%
kable_classic(full_width = F, html_font = "Cambria")
| na | no | pc |
|---|---|---|
| HEMANTH RANGASWAMY | s4069811 | 100 |
/* ## Explanation kableExtra: The kableExtra package improves table styling, adds features like striped tables and font size adjustments, works for both HTML and PDF formats, and includes a convenient pipe operator (%>%) for chaining operations. magrittr: The magrittr package allows you to chain operations by piping values forward, making code more readable and avoiding nested function calls. dplyr- This package provides a set of functions for data manipulation, transformation, and summarization. It’s particularly useful for working with data frames and tibbles. Some common functions include filter(), mutate(), select(), and group_by(). readr: This package is designed for efficient reading of flat files (such as CSV, TSV, and fixed-width files) into R. It provides functions like read_csv(), read_tsv(), and read_delim(). Source explanation : Kaggle is an online community platform for data scientists and machine learning enthusiasts. It allows collaboration, dataset sharing, GPU-integrated notebooks, and data science competitions. ## Read/Import Data #I am using readr library to read my dataset Filename: sample.csv File path: C:/Users/Hemanth Gowda/Downloads/
# Import the data, provide your R codes here.
Covid<- read.csv("C:/Users/Hemanth Gowda/Downloads/sample.csv")
Provide explanations here. read.csv: This is a function in R that reads data from a CSV (Comma-Separated Values) file. It takes the file path as an argument and returns a data frame containing the data from the CSV file. It reads the file from the file path and converts that file into a data frame named covid. ## Inspect and Understand
# Inspection of your data, provide R codes here.
View(Covid)
dim(Covid)
## [1] 31822 16
names(Covid)
## [1] "Date" "Location" "Location.Level"
## [4] "Total.Cases" "Total.Deaths" "Total.Recovered"
## [7] "New.Cases" "New.Deaths" "New.Recovered"
## [10] "Province" "Time.Zone" "Population"
## [13] "Longitude" "Latitude" "Case.Fatality.Rate"
## [16] "Case.Recovered.Rate"
str(Covid)
## 'data.frame': 31822 obs. of 16 variables:
## $ Date : chr "3/1/2020" "3/2/2020" "3/2/2020" "3/2/2020" ...
## $ Location : chr "DKI Jakarta" "DKI Jakarta" "Indonesia" "Riau" ...
## $ Location.Level : chr "Province" "Province" "Country" "Province" ...
## $ Total.Cases : int 39 41 2 1 43 2 1 1 45 2 ...
## $ Total.Deaths : int 20 20 0 0 20 0 1 0 20 0 ...
## $ Total.Recovered : int 75 75 0 1 75 0 60 1 75 0 ...
## $ New.Cases : int 2 2 2 1 2 0 1 0 2 0 ...
## $ New.Deaths : int 0 0 0 0 0 0 1 0 0 0 ...
## $ New.Recovered : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Province : chr "DKI Jakarta" "DKI Jakarta" "" "Riau" ...
## $ Time.Zone : chr "UTC+07:00" "UTC+07:00" "" "UTC+07:00" ...
## $ Population : int 10846145 10846145 265185520 6074100 10846145 265185520 45161325 6074100 10846145 265185520 ...
## $ Longitude : num 107 107 114 102 107 ...
## $ Latitude : num -6.205 -6.205 -0.789 0.512 -6.205 ...
## $ Case.Fatality.Rate : chr "51.28%" "48.78%" "0.00%" "0.00%" ...
## $ Case.Recovered.Rate: chr "192.31%" "182.93%" "0.00%" "100.00%" ...
summary(Covid)
## Date Location Location.Level Total.Cases
## Length:31822 Length:31822 Length:31822 Min. : 1
## Class :character Class :character Class :character 1st Qu.: 5223
## Mode :character Mode :character Mode :character Median : 23596
## Mean : 159450
## 3rd Qu.: 69928
## Max. :6405044
## Total.Deaths Total.Recovered New.Cases New.Deaths
## Min. : 0.0 Min. : 0 Min. : 0.0 Min. : 0.000
## 1st Qu.: 128.0 1st Qu.: 3914 1st Qu.: 3.0 1st Qu.: 0.000
## Median : 565.5 Median : 21028 Median : 27.0 Median : 0.000
## Mean : 4564.8 Mean : 149261 Mean : 402.3 Mean : 9.921
## 3rd Qu.: 2189.0 3rd Qu.: 64142 3rd Qu.: 130.0 3rd Qu.: 3.000
## Max. :157876.0 Max. :6218708 Max. :64718.0 Max. :2069.000
## New.Recovered Province Time.Zone Population
## Min. : 0.0 Length:31822 Length:31822 Min. : 648407
## 1st Qu.: 2.0 Class :character Class :character 1st Qu.: 1999539
## Median : 20.0 Mode :character Mode :character Median : 4216171
## Mean : 390.4 Mean : 15367656
## 3rd Qu.: 123.0 3rd Qu.: 9095591
## Max. :61361.0 Max. :265185520
## Longitude Latitude Case.Fatality.Rate Case.Recovered.Rate
## Min. : 96.91 Min. :-8.682 Length:31822 Length:31822
## 1st Qu.:106.11 1st Qu.:-6.205 Class :character Class :character
## Median :113.42 Median :-2.462 Mode :character Mode :character
## Mean :113.70 Mean :-2.726
## 3rd Qu.:121.20 3rd Qu.: 0.212
## Max. :138.70 Max. : 4.226
levels(factor(Covid$Longitude))
## [1] "96.91052174" "99.05196442" "100.4650624" "101.8051092" "102.3384213"
## [6] "102.7236404" "104.1694647" "105.0214366" "106.1090043" "106.5499324"
## [11] "106.8361183" "107.6037083" "108.261746" "110.2011149" "110.4448783"
## [16] "111.1211776" "112.7329414" "113.4176536" "113.921327" "115.1317136"
## [21] "115.4385783" "116.2188791" "116.4684405" "117.5086257" "119.3450194"
## [26] "120.1620559" "121.2010927" "121.592271" "122.070311" "122.3760581"
## [31] "124.5212396" "127.5391072" "129.576792" "132.9762624" "138.69603"
Provide explanations here. The View() function in R opens an interactive data viewer within RStudio, allowing you to explore and understand your dataset visually. The dim() function in R is used to either retrieve or set the dimensions of an array, matrix, or data frame. The names() function in R returns the column names (variable names) of the data frame or data set stored in the variable. The str() function displays the internal structure of the R object Covid. The summary() function generates summary statistics for each numeric column in the dataset Covid. The levels() function Generates summary statistics for each numeric column in the dataset Covid. ## Subsetting
# Subset your data and convert it to a matrix, provide R codes here.
covid_subset <- Covid[1:10]
covid_matrix <- as.matrix(covid_subset)
str(covid_matrix)
## chr [1:31822, 1:10] "3/1/2020" "3/2/2020" "3/2/2020" "3/2/2020" "3/3/2020" ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:10] "Date" "Location" "Location.Level" "Total.Cases" ...
Provide explanations here. 1.covid_subset <- Covid[1:10] Purpose: Creates a subset of the original Covid dataset containing the first 10 rows. Explanation: The code selects rows 1 to 10 from the Covid dataset and assigns them to the new data frame covid_subset 2.covid_matrix <- as.matrix(covid_subset) Purpose: Converts the covid_subset data frame into a matrix format. Explanation: The as.matrix() function transforms the data frame into a matrix, where each cell contains the corresponding value from the data frame. This can be useful for certain mathematical operations or compatibility with other functions that require matrix input. 3.str(covid_matrix) The str() function displays the internal structure of the R object covid_matrix. ## Create a new Data Frame
# Create a new data frame, provide R codes here.
new_h <- data.frame(
Number_value = 1:3,
Character_value = factor(c("low", "medium", "high")))
str(new_h)
## 'data.frame': 3 obs. of 2 variables:
## $ Number_value : int 1 2 3
## $ Character_value: Factor w/ 3 levels "high","low","medium": 2 3 1
levels(factor(new_h$"1"))
## character(0)
new_rcb <- c(5:7)
# Add vector to data frame using cbind()
new_h <- cbind(new_h, new_rcb)
View(new_h)
dim(new_h)
## [1] 3 3
Provide explanations here. Creating a Data Frame new_h - The
data.frame() function generates a data frame named new_h. It consists of
two columns: Number_value: A numeric column with values 1, 2, and 3.
Character_value: A factor column with levels “low”, “medium”, and
“high”. The str(new_h) command reveals the internal structure of the
data frame. It displays column names, data types, and sample values.
Modifying Levels of the Character_value Factor: The
levels(factor(new_h$high)) command extracts the unique levels
(categories) of the factor variable high within the data frame new_h.
The resulting levels are: “low”, “medium”, and “high”. The cbind()
function in R stands for column-bind. It allows you to combine vectors,
matrices, or data frames by columns.