library(readr)
titanic2208 <- read_csv("titanic2208.csv", show_col_types = FALSE)
View(titanic2208)
help("read_csv")
## starting httpd help server ... done
# Ex. 1
head(titanic2208)
tail(titanic2208)
summary(titanic2208)
## Name Title Sex Age
## Length:2208 Length:2208 Length:2208 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.:22.00
## Mode :character Mode :character Mode :character Median :29.00
## Mean :29.92
## 3rd Qu.:36.00
## Max. :74.00
## NA's :9
## Class Ticket Fare Fare today
## Length:2208 Length:2208 Length:2208 Min. : 245
## Class :character Class :character Class :character 1st Qu.: 612
## Mode :character Mode :character Mode :character Median : 1120
## Mean : 2606
## 3rd Qu.: 2420
## Max. :39600
## NA's :917
## Group Joined Job Life Boat
## Length:2208 Length:2208 Length:2208 Length:2208
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Body Survived
## Min. : -1 Mode :logical
## 1st Qu.: 85 FALSE:1496
## Median :172 TRUE :712
## Mean :170
## 3rd Qu.:255
## Max. :334
## NA's :1967
# Ex.2
help(c) # It will show documentation on combining vectors/ list
help(array)
example(array)
##
## array> dim(as.array(letters))
## [1] 26
##
## array> array(1:3, c(2,4)) # recycle 1:3 "2 2/3 times"
## [,1] [,2] [,3] [,4]
## [1,] 1 3 2 1
## [2,] 2 1 3 2
##
## array> # [,1] [,2] [,3] [,4]
## array> #[1,] 1 3 2 1
## array> #[2,] 2 1 3 2
## array>
## array>
## array>
help("is.factor") # It will show documentation of arithmetic operation
df <- data.frame(x = 1:3, y = 5:7)
is.list(df)
## [1] TRUE
is.matrix(df)
## [1] FALSE
is.array(df)
## [1] FALSE
df <- list(x = 1:3, y = 5:7)
lst_df <- c(x = 1:3, y = 5:7)
is.array(lst_df)
## [1] FALSE
is.data.frame(df)
## [1] FALSE
is.data.frame(lst_df)
## [1] FALSE
is.list(lst_df)
## [1] FALSE
arr <- array(c(1:24), dim = c(2,4,2))
matr <- matrix(c(1:24), ncol = 4, nrow = 6)
is.matrix(arr)
## [1] FALSE
is.array(matr)
## [1] TRUE
demo(graphics)
##
##
## demo(graphics)
## ---- ~~~~~~~~
##
## > # Copyright (C) 1997-2009 The R Core Team
## >
## > require(datasets)
##
## > require(grDevices); require(graphics)
##
## > ## Here is some code which illustrates some of the differences between
## > ## R and S graphics capabilities. Note that colors are generally specified
## > ## by a character string name (taken from the X11 rgb.txt file) and that line
## > ## textures are given similarly. The parameter "bg" sets the background
## > ## parameter for the plot and there is also an "fg" parameter which sets
## > ## the foreground color.
## >
## >
## > x <- stats::rnorm(50)
##
## > opar <- par(bg = "white")
##
## > plot(x, ann = FALSE, type = "n")

##
## > abline(h = 0, col = gray(.90))
##
## > lines(x, col = "green4", lty = "dotted")
##
## > points(x, bg = "limegreen", pch = 21)
##
## > title(main = "Simple Use of Color In a Plot",
## + xlab = "Just a Whisper of a Label",
## + col.main = "blue", col.lab = gray(.8),
## + cex.main = 1.2, cex.lab = 1.0, font.main = 4, font.lab = 3)
##
## > ## A little color wheel. This code just plots equally spaced hues in
## > ## a pie chart. If you have a cheap SVGA monitor (like me) you will
## > ## probably find that numerically equispaced does not mean visually
## > ## equispaced. On my display at home, these colors tend to cluster at
## > ## the RGB primaries. On the other hand on the SGI Indy at work the
## > ## effect is near perfect.
## >
## > par(bg = "gray")
##
## > pie(rep(1,24), col = rainbow(24), radius = 0.9)

##
## > title(main = "A Sample Color Wheel", cex.main = 1.4, font.main = 3)
##
## > title(xlab = "(Use this as a test of monitor linearity)",
## + cex.lab = 0.8, font.lab = 3)
##
## > ## We have already confessed to having these. This is just showing off X11
## > ## color names (and the example (from the postscript manual) is pretty "cute".
## >
## > pie.sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12)
##
## > names(pie.sales) <- c("Blueberry", "Cherry",
## + "Apple", "Boston Cream", "Other", "Vanilla Cream")
##
## > pie(pie.sales,
## + col = c("purple","violetred1","green3","cornsilk","cyan","white"))

##
## > title(main = "January Pie Sales", cex.main = 1.8, font.main = 1)
##
## > title(xlab = "(Don't try this at home kids)", cex.lab = 0.8, font.lab = 3)
##
## > ## Boxplots: I couldn't resist the capability for filling the "box".
## > ## The use of color seems like a useful addition, it focuses attention
## > ## on the central bulk of the data.
## >
## > par(bg="cornsilk")
##
## > n <- 10
##
## > g <- gl(n, 100, n*100)
##
## > x <- rnorm(n*100) + sqrt(as.numeric(g))
##
## > boxplot(split(x,g), col="lavender", notch=TRUE)

##
## > title(main="Notched Boxplots", xlab="Group", font.main=4, font.lab=1)
##
## > ## An example showing how to fill between curves.
## >
## > par(bg="white")
##
## > n <- 100
##
## > x <- c(0,cumsum(rnorm(n)))
##
## > y <- c(0,cumsum(rnorm(n)))
##
## > xx <- c(0:n, n:0)
##
## > yy <- c(x, rev(y))
##
## > plot(xx, yy, type="n", xlab="Time", ylab="Distance")

##
## > polygon(xx, yy, col="gray")
##
## > title("Distance Between Brownian Motions")
##
## > ## Colored plot margins, axis labels and titles. You do need to be
## > ## careful with these kinds of effects. It's easy to go completely
## > ## over the top and you can end up with your lunch all over the keyboard.
## > ## On the other hand, my market research clients love it.
## >
## > x <- c(0.00, 0.40, 0.86, 0.85, 0.69, 0.48, 0.54, 1.09, 1.11, 1.73, 2.05, 2.02)
##
## > par(bg="lightgray")
##
## > plot(x, type="n", axes=FALSE, ann=FALSE)

##
## > usr <- par("usr")
##
## > rect(usr[1], usr[3], usr[2], usr[4], col="cornsilk", border="black")
##
## > lines(x, col="blue")
##
## > points(x, pch=21, bg="lightcyan", cex=1.25)
##
## > axis(2, col.axis="blue", las=1)
##
## > axis(1, at=1:12, lab=month.abb, col.axis="blue")
##
## > box()
##
## > title(main= "The Level of Interest in R", font.main=4, col.main="red")
##
## > title(xlab= "1996", col.lab="red")
##
## > ## A filled histogram, showing how to change the font used for the
## > ## main title without changing the other annotation.
## >
## > par(bg="cornsilk")
##
## > x <- rnorm(1000)
##
## > hist(x, xlim=range(-4, 4, x), col="lavender", main="")

##
## > title(main="1000 Normal Random Variates", font.main=3)
##
## > ## A scatterplot matrix
## > ## The good old Iris data (yet again)
## >
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", font.main=4, pch=19)

##
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", pch=21,
## + bg = c("red", "green3", "blue")[unclass(iris$Species)])

##
## > ## Contour plotting
## > ## This produces a topographic map of one of Auckland's many volcanic "peaks".
## >
## > x <- 10*1:nrow(volcano)
##
## > y <- 10*1:ncol(volcano)
##
## > lev <- pretty(range(volcano), 10)
##
## > par(bg = "lightcyan")
##
## > pin <- par("pin")
##
## > xdelta <- diff(range(x))
##
## > ydelta <- diff(range(y))
##
## > xscale <- pin[1]/xdelta
##
## > yscale <- pin[2]/ydelta
##
## > scale <- min(xscale, yscale)
##
## > xadd <- 0.5*(pin[1]/scale - xdelta)
##
## > yadd <- 0.5*(pin[2]/scale - ydelta)
##
## > plot(numeric(0), numeric(0),
## + xlim = range(x)+c(-1,1)*xadd, ylim = range(y)+c(-1,1)*yadd,
## + type = "n", ann = FALSE)

##
## > usr <- par("usr")
##
## > rect(usr[1], usr[3], usr[2], usr[4], col="green3")
##
## > contour(x, y, volcano, levels = lev, col="yellow", lty="solid", add=TRUE)
##
## > box()
##
## > title("A Topographic Map of Maunga Whau", font= 4)
##
## > title(xlab = "Meters North", ylab = "Meters West", font= 3)
##
## > mtext("10 Meter Contour Spacing", side=3, line=0.35, outer=FALSE,
## + at = mean(par("usr")[1:2]), cex=0.7, font=3)
##
## > ## Conditioning plots
## >
## > par(bg="cornsilk")
##
## > coplot(lat ~ long | depth, data = quakes, pch = 21, bg = "green3")

##
## > par(opar)
# Ex. 3
x <- 2.456
y <- "2"
z <- 3L
k <- 1+6i
typeof(x)
## [1] "double"
typeof(y)
## [1] "character"
typeof(z)
## [1] "integer"
typeof(k)
## [1] "complex"
as.integer(y)
## [1] 2
as.numeric(y)
## [1] 2
as.numeric(z)
## [1] 3
as.numeric(k)
## Warning: imaginary parts discarded in coercion
## [1] 1
as.integer(x) + as.integer(y)
## [1] 4
as.integer(z) / as.integer(y)
## [1] 1.5
1/0
## [1] Inf
1/0 + 1/0
## [1] Inf
1/0 * 0
## [1] NaN
sqrt(-1)
## Warning in sqrt(-1): NaNs produced
## [1] NaN
as.integer(1/0)
## Warning: NAs introduced by coercion to integer range
## [1] NA
as.integer(1/0 + 1/0)
## Warning: NAs introduced by coercion to integer range
## [1] NA
as.integer(sqrt(-1))
## Warning in sqrt(-1): NaNs produced
## [1] NA
as.double(sqrt(-1))
## Warning in sqrt(-1): NaNs produced
## [1] NaN
sqrt(4+0i)
## [1] 2+0i
sqrt(-1+0i)
## [1] 0+1i
as.double(sqrt(-1+0i))
## Warning: imaginary parts discarded in coercion
## [1] 0
as.numeric("-3e-4")
## [1] -3e-04
as.numeric("3e4")
## [1] 30000
a <- 3
b <- 2
as.integer(a) + as.integer(b)
## [1] 5
# Ex 4
y <- Sys.Date()
y
## [1] "2022-10-12"
z <- as.Date(y)
weekdays(y)
## [1] "Wednesday"
class(y)
## [1] "Date"
class(z)
## [1] "Date"
typeof(y)
## [1] "double"
pste_date <- paste(y)
class(pste_date)
## [1] "character"
# `Ex. 5
digits <- 0:9
names(digits) <- c("zero","one","two","three","four","five","six","seven","eight","nine")
words.2.digits <- function(word){ # function is created
return(as.integer(digits[word]));
}
digits.2.words <- function(digit){ # function is created
return(names(digits[digits == digit]))
}
words.2.digits("zero") # Calling the function words.2.digit
## [1] 0
digits.2.words(0)
## [1] "zero"
words.2.digits
## function(word){ # function is created
## return(as.integer(digits[word]));
## }
digits.2.words
## function(digit){ # function is created
## return(names(digits[digits == digit]))
## }
digits <- 1:7
names(digits) <- c("monday","tuesday","Wednesday","thursday","friday","saturday","sunday")
day.2.digit <- function(day){ # function is created
return(as.integer(digits[day]));
}
digit.2.day <- function(digit){ # function is created
return(names(digits[digits == digit])) # Actual logic
}
digit.2.day(3) # Calling the function words.2.digit
## [1] "Wednesday"
day.2.digit("tuesday")
## [1] 2
digits <- c("0", "1", "2", "3", "4", "5", "6", "7","8", "9")
names(digits) <- c("zero","one","two","three","four","five","six","seven","eight","nine")
words.2.digits <- function(word){ # function is created
return(as.integer(digits[word]));
}
digits.2.words <- function(digit){ # function is created
return(names(digits[digits == digit]))
}
words.2.digits("zero") # Calling the function words.2.digit
## [1] 0
digits.2.words("3")
## [1] "three"
# Ex. 6
people_specs <- data.frame(
name = c("Alice","Bob","Charles","David","Eve","Francis","George","Hue","Ian","Jane","Kate","Luci","Margareth","Nikki","Owen","Paul","Rob","Susan","Tom","Ulster","Vickie","Xuo","Yong","Zed"),
gender = c("female","male","male","male","female","male","male","male","male","female","female","female","female","female","male","male","male","female","male","male","female","male","female","male"),
birth = c("1978-01-18","1994-11-18","1983-07-10","1999-05-25","1978-05-04","1978-01-13","1986-04-04","1999-09-29","1991-03-27","1990-06-14","1978-07-07","1997-05-23","1980-06-11","1975-03-04","1976-02-07","1999-11-07","1980-08-30","1991-11-23","1981-11-20","1995-02-24","1998-08-16","1998-04-21","1996-06-30","1989-12-28"),
height = c(176.4,166.1,181.9,172.8,178.2,159.0,167.4,159.5,167.3,171.2,159.4,171.9,180.3,168.2,174.9,182.4,175.5,180.0,174.6,152.1,166.8,182.0,169.3,171.0),
weight = c(76.8,64.8,59.0,62.0,66.1,73.1,60.1,80.3,51.0,72.1,71.9,74.0,91.1,50.8,53.4,59.7,59.9,68.7,63.3,65.3,66.5,69.7,76.7,75.0)
)
summary(people_specs)
## name gender birth height
## Length:24 Length:24 Length:24 Min. :152.1
## Class :character Class :character Class :character 1st Qu.:167.2
## Mode :character Mode :character Mode :character Median :171.6
## Mean :171.2
## 3rd Qu.:176.8
## Max. :182.4
## weight
## Min. :50.80
## 1st Qu.:60.05
## Median :66.30
## Mean :67.14
## 3rd Qu.:73.33
## Max. :91.10
# Ex 7
getwd()
## [1] "C:/Users/ROSHAN D K/Desktop/R project"
dir()
## [1] "Asses_2(binom&prob).nb.html" "Asses_2(binom&prob).Rmd"
## [3] "Assesment1.nb.html" "Assesment1.Rmd"
## [5] "Assesment1_files" "rsconnect"
## [7] "titanic2208.csv"
library(readr)
titanic<- read_csv("titanic2208.csv", show_col_types = FALSE)
head(titanic, 4)
str(titanic)
## spec_tbl_df [2,208 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Name : chr [1:2208] "ALLEN, Miss Elisabeth Walton" "ALLISON, Master Hudson Trevor" "ALLISON, Miss Helen Loraine" "ALLISON, Mr Hudson Joshua Creighton" ...
## $ Title : chr [1:2208] "Miss" "Master" "Miss" "Mr" ...
## $ Sex : chr [1:2208] "female" "male" "female" "male" ...
## $ Age : num [1:2208] 29 0.9 2 30 25 47 62 39 53 71 ...
## $ Class : chr [1:2208] "1st Class" "1st Class" "1st Class" "1st Class" ...
## $ Ticket : chr [1:2208] "24160" "113781" "113781" "113781" ...
## $ Fare : chr [1:2208] "\xa3211 6s 9d" "\xa3151 16s" "\xa3151 16s" "\xa3151 16s" ...
## $ Fare today: num [1:2208] 16300 11700 11700 11700 11700 2050 6020 NA 3980 3820 ...
## $ Group : chr [1:2208] NA NA NA NA ...
## $ Joined : chr [1:2208] "Southampton" "Southampton" "Southampton" "Southampton" ...
## $ Job : chr [1:2208] NA NA NA "Businessman" ...
## $ Life Boat : chr [1:2208] "2" "11" NA NA ...
## $ Body : num [1:2208] NA NA NA 135 NA NA NA NA NA 22 ...
## $ Survived : logi [1:2208] TRUE TRUE FALSE FALSE FALSE TRUE ...
## - attr(*, "spec")=
## .. cols(
## .. Name = col_character(),
## .. Title = col_character(),
## .. Sex = col_character(),
## .. Age = col_double(),
## .. Class = col_character(),
## .. Ticket = col_character(),
## .. Fare = col_character(),
## .. `Fare today` = col_double(),
## .. Group = col_character(),
## .. Joined = col_character(),
## .. Job = col_character(),
## .. `Life Boat` = col_character(),
## .. Body = col_double(),
## .. Survived = col_logical()
## .. )
## - attr(*, "problems")=<externalptr>
summary(titanic)
## Name Title Sex Age
## Length:2208 Length:2208 Length:2208 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.:22.00
## Mode :character Mode :character Mode :character Median :29.00
## Mean :29.92
## 3rd Qu.:36.00
## Max. :74.00
## NA's :9
## Class Ticket Fare Fare today
## Length:2208 Length:2208 Length:2208 Min. : 245
## Class :character Class :character Class :character 1st Qu.: 612
## Mode :character Mode :character Mode :character Median : 1120
## Mean : 2606
## 3rd Qu.: 2420
## Max. :39600
## NA's :917
## Group Joined Job Life Boat
## Length:2208 Length:2208 Length:2208 Length:2208
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Body Survived
## Min. : -1 Mode :logical
## 1st Qu.: 85 FALSE:1496
## Median :172 TRUE :712
## Mean :170
## 3rd Qu.:255
## Max. :334
## NA's :1967
factor(titanic[2])
## Warning in xtfrm.data.frame(x): cannot xtfrm data frames
## Title
## <NA>
## Levels: c("Miss", "Master", "Mr", "Mrs", "Colonel", "Mme.", "Dr", "Major", "Captain", "Lady", "Sir", "Mlle", "Dona", "Countess", "Don.", "Fr", "Rev.", "Ms", "Sig.")
factor(titanic[3])
## Warning in xtfrm.data.frame(x): cannot xtfrm data frames
## Sex
## <NA>
## Levels: c("female", "male")
factor(titanic[4], ordered = TRUE, levels = 0:100)
## Age
## <NA>
## 101 Levels: 0 < 1 < 2 < 3 < 4 < 5 < 6 < 7 < 8 < 9 < 10 < 11 < 12 < 13 < ... < 100
factor(titanic[5])
## Warning in xtfrm.data.frame(x): cannot xtfrm data frames
## Class
## <NA>
## Levels: c("1st Class", "2nd Class", "3rd Class", "A la Carte", "Deck", "Engine", "Victualling")
library(dplyr) # install dplyr package
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
titanic %>%
summarise_all(~sum(is.na(.))) # return the total number of Na value in each column
library("scales")
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
total_survived <- percent((sum(titanic$Survived))/nrow(titanic))
total_survived
## [1] "32%"
male_survived <- (sum(titanic$Survived == TRUE & titanic$Sex == "male"))/(sum(titanic$Survived == TRUE))
female_survived <- (sum(titanic$Survived == TRUE & titanic$Sex == "female"))/(sum(titanic$Survived == "TRUE"))
child_survived <- (sum(titanic$Survived == TRUE & titanic$Age <18))/(sum(titanic$Survived == "TRUE"))
100 * male_survived # Install package scales
## [1] 49.57865
100 * female_survived
## [1] 50.42135
100 * child_survived
## [1] 12.07865
one_class <- (sum(titanic$Survived == TRUE & titanic$Class == "1st Class"))/(sum(titanic$Survived == "TRUE"))
two_class <- sum((titanic$Survived == TRUE & titanic$Class == "2nd Class"))/(sum(titanic$Survived == "TRUE"))
three_class <- (sum(titanic$Survived == TRUE & titanic$Class == "3rd Class"))/(sum(titanic$Survived == "TRUE"))
100 * one_class # Chances people in 1st class survived
## [1] 28.23034
100 * two_class # Chances people in 2nd class survived
## [1] 16.71348
100 * three_class # Chances people in 3rd class survived
## [1] 25.2809
# Ex 8
boys1<-data.frame(id=1:8,grade=as.integer(100*runif(8)),gender=rep("boy",8))
girls1<-data.frame(id=9:18,grade=as.integer(100*runif(10)),gender=rep("girl",10))
boy_girl <- rbind(boys1, girls1) # cbind not possible bcoz of variation in length
names(boy_girl) <- c("Boys & Girls", "Grade", "Gender")
boy_girl
# Ex 9
boys2 <- data.frame(id=1:8, grade=as.integer(100*runif(8)))
girls2 <- data.frame(Number=1:10, Mark=as.integer(100*runif(10)))
boys2
names(girls2) <- c("id", "grade") # left 1, 2, 3, 4
#write.csv(boy_girl)
#savehistory("Assesment1.Rmd")
#help(rbind)