```We are using Covid-19 Dataset from Kaggle
df <- read.csv("covid_19_data.csv")
library(ggplot2)
library(ggpubr)
df <- read.csv(“amazon_data.csv”) # We are using AMAZON Stock price dataset from Kaggle
nrow(df)
## [1] 306429
ncol(df)
## [1] 8
head(df, 10)
## SNo ObservationDate Province.State Country.Region Last.Update Confirmed
## 1 1 01/22/2020 Anhui Mainland China 1/22/2020 17:00 1
## 2 2 01/22/2020 Beijing Mainland China 1/22/2020 17:00 14
## 3 3 01/22/2020 Chongqing Mainland China 1/22/2020 17:00 6
## 4 4 01/22/2020 Fujian Mainland China 1/22/2020 17:00 1
## 5 5 01/22/2020 Gansu Mainland China 1/22/2020 17:00 0
## 6 6 01/22/2020 Guangdong Mainland China 1/22/2020 17:00 26
## 7 7 01/22/2020 Guangxi Mainland China 1/22/2020 17:00 2
## 8 8 01/22/2020 Guizhou Mainland China 1/22/2020 17:00 1
## 9 9 01/22/2020 Hainan Mainland China 1/22/2020 17:00 4
## 10 10 01/22/2020 Hebei Mainland China 1/22/2020 17:00 1
## Deaths Recovered
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 0
names(df)
## [1] "SNo" "ObservationDate" "Province.State" "Country.Region"
## [5] "Last.Update" "Confirmed" "Deaths" "Recovered"
summary.default
## function (object, ..., digits, quantile.type = 7)
## {
## if (is.factor(object))
## return(summary.factor(object, ...))
## else if (is.matrix(object)) {
## if (missing(digits))
## return(summary.matrix(object, quantile.type = quantile.type,
## ...))
## else return(summary.matrix(object, digits = digits, quantile.type = quantile.type,
## ...))
## }
## value <- if (is.logical(object))
## c(Mode = "logical", {
## tb <- table(object, exclude = NULL, useNA = "ifany")
## if (!is.null(n <- dimnames(tb)[[1L]]) && any(iN <- is.na(n))) dimnames(tb)[[1L]][iN] <- "NA's"
## tb
## })
## else if (is.numeric(object)) {
## nas <- is.na(object)
## object <- object[!nas]
## qq <- stats::quantile(object, names = FALSE, type = quantile.type)
## qq <- c(qq[1L:3L], mean(object), qq[4L:5L])
## if (!missing(digits))
## qq <- signif(qq, digits)
## names(qq) <- c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.",
## "Max.")
## if (any(nas))
## c(qq, `NA's` = sum(nas))
## else qq
## }
## else if (is.recursive(object) && !is.language(object) &&
## (n <- length(object))) {
## sumry <- array("", c(n, 3L), list(names(object), c("Length",
## "Class", "Mode")))
## ll <- numeric(n)
## for (i in 1L:n) {
## ii <- object[[i]]
## ll[i] <- length(ii)
## cls <- oldClass(ii)
## sumry[i, 2L] <- if (length(cls))
## cls[1L]
## else "-none-"
## sumry[i, 3L] <- mode(ii)
## }
## sumry[, 1L] <- format(as.integer(ll))
## sumry
## }
## else c(Length = length(object), Class = class(object), Mode = mode(object))
## class(value) <- c("summaryDefault", "table")
## value
## }
## <bytecode: 0x00000000156563f8>
## <environment: namespace:base>
summary(df$Confirmed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -302844 1042 10375 85671 50752 5863138
hist(df$Confirmed)
hist(log(df$Confirmed))
## Warning in log(df$Confirmed): NaNs produced
boxplot(df$Confirmed)
boxplot(log(df$Confirmed))
## Warning in log(df$Confirmed): NaNs produced
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z$group
## == : Outlier (-Inf) in boxplot 1 is not drawn
qqplot(df$Confirmed, df$Recovered)
qqline(df$Confirmed)
ggplot(data=df, aes(x=1:nrow(df), y=Confirmed, group=1)) + geom_line()
ggqqplot(df$Confirmed)