R Notebook

library(readr)
titanic2208 <- read_csv("titanic2208.csv", show_col_types = FALSE)
View(titanic2208)
help("read_csv")

## starting httpd help server ... done

# Ex. 1
head(titanic2208)

tail(titanic2208)

summary(titanic2208)

##      Name              Title               Sex                 Age       
##  Length:2208        Length:2208        Length:2208        Min.   : 0.00  
##  Class :character   Class :character   Class :character   1st Qu.:22.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :29.00  
##                                                           Mean   :29.92  
##                                                           3rd Qu.:36.00  
##                                                           Max.   :74.00  
##                                                           NA's   :9      
##     Class              Ticket              Fare             Fare today   
##  Length:2208        Length:2208        Length:2208        Min.   :  245  
##  Class :character   Class :character   Class :character   1st Qu.:  612  
##  Mode  :character   Mode  :character   Mode  :character   Median : 1120  
##                                                           Mean   : 2606  
##                                                           3rd Qu.: 2420  
##                                                           Max.   :39600  
##                                                           NA's   :917    
##     Group              Joined              Job             Life Boat        
##  Length:2208        Length:2208        Length:2208        Length:2208       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       Body       Survived      
##  Min.   : -1    Mode :logical  
##  1st Qu.: 85    FALSE:1496     
##  Median :172    TRUE :712      
##  Mean   :170                   
##  3rd Qu.:255                   
##  Max.   :334                   
##  NA's   :1967

# Ex.2
help(c) # It will show documentation on combining vectors/ list
help(array)
example(array)

## 
## array> dim(as.array(letters))
## [1] 26
## 
## array> array(1:3, c(2,4)) # recycle 1:3 "2 2/3 times"
##      [,1] [,2] [,3] [,4]
## [1,]    1    3    2    1
## [2,]    2    1    3    2
## 
## array> #     [,1] [,2] [,3] [,4]
## array> #[1,]    1    3    2    1
## array> #[2,]    2    1    3    2
## array> 
## array> 
## array>

help("is.factor") # It will show documentation of arithmetic operation

df <- data.frame(x = 1:3, y = 5:7)
is.list(df)

## [1] TRUE

is.matrix(df)

## [1] FALSE

is.array(df)

## [1] FALSE

df <- list(x = 1:3, y = 5:7)
lst_df <- c(x = 1:3, y = 5:7)
is.array(lst_df)

## [1] FALSE

is.data.frame(df)

## [1] FALSE

is.data.frame(lst_df)

## [1] FALSE

is.list(lst_df)

## [1] FALSE

arr <- array(c(1:24), dim = c(2,4,2))
matr <- matrix(c(1:24), ncol = 4, nrow = 6)
is.matrix(arr)

## [1] FALSE

is.array(matr)

## [1] TRUE

demo(graphics)

## 
## 
##  demo(graphics)
##  ---- ~~~~~~~~
## 
## > #  Copyright (C) 1997-2009 The R Core Team
## > 
## > require(datasets)
## 
## > require(grDevices); require(graphics)
## 
## > ## Here is some code which illustrates some of the differences between
## > ## R and S graphics capabilities.  Note that colors are generally specified
## > ## by a character string name (taken from the X11 rgb.txt file) and that line
## > ## textures are given similarly.  The parameter "bg" sets the background
## > ## parameter for the plot and there is also an "fg" parameter which sets
## > ## the foreground color.
## > 
## > 
## > x <- stats::rnorm(50)
## 
## > opar <- par(bg = "white")
## 
## > plot(x, ann = FALSE, type = "n")

## 
## > abline(h = 0, col = gray(.90))
## 
## > lines(x, col = "green4", lty = "dotted")
## 
## > points(x, bg = "limegreen", pch = 21)
## 
## > title(main = "Simple Use of Color In a Plot",
## +       xlab = "Just a Whisper of a Label",
## +       col.main = "blue", col.lab = gray(.8),
## +       cex.main = 1.2, cex.lab = 1.0, font.main = 4, font.lab = 3)
## 
## > ## A little color wheel.    This code just plots equally spaced hues in
## > ## a pie chart.    If you have a cheap SVGA monitor (like me) you will
## > ## probably find that numerically equispaced does not mean visually
## > ## equispaced.  On my display at home, these colors tend to cluster at
## > ## the RGB primaries.  On the other hand on the SGI Indy at work the
## > ## effect is near perfect.
## > 
## > par(bg = "gray")
## 
## > pie(rep(1,24), col = rainbow(24), radius = 0.9)

## 
## > title(main = "A Sample Color Wheel", cex.main = 1.4, font.main = 3)
## 
## > title(xlab = "(Use this as a test of monitor linearity)",
## +       cex.lab = 0.8, font.lab = 3)
## 
## > ## We have already confessed to having these.  This is just showing off X11
## > ## color names (and the example (from the postscript manual) is pretty "cute".
## > 
## > pie.sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12)
## 
## > names(pie.sales) <- c("Blueberry", "Cherry",
## +              "Apple", "Boston Cream", "Other", "Vanilla Cream")
## 
## > pie(pie.sales,
## +     col = c("purple","violetred1","green3","cornsilk","cyan","white"))

## 
## > title(main = "January Pie Sales", cex.main = 1.8, font.main = 1)
## 
## > title(xlab = "(Don't try this at home kids)", cex.lab = 0.8, font.lab = 3)
## 
## > ## Boxplots:  I couldn't resist the capability for filling the "box".
## > ## The use of color seems like a useful addition, it focuses attention
## > ## on the central bulk of the data.
## > 
## > par(bg="cornsilk")
## 
## > n <- 10
## 
## > g <- gl(n, 100, n*100)
## 
## > x <- rnorm(n*100) + sqrt(as.numeric(g))
## 
## > boxplot(split(x,g), col="lavender", notch=TRUE)

## 
## > title(main="Notched Boxplots", xlab="Group", font.main=4, font.lab=1)
## 
## > ## An example showing how to fill between curves.
## > 
## > par(bg="white")
## 
## > n <- 100
## 
## > x <- c(0,cumsum(rnorm(n)))
## 
## > y <- c(0,cumsum(rnorm(n)))
## 
## > xx <- c(0:n, n:0)
## 
## > yy <- c(x, rev(y))
## 
## > plot(xx, yy, type="n", xlab="Time", ylab="Distance")

## 
## > polygon(xx, yy, col="gray")
## 
## > title("Distance Between Brownian Motions")
## 
## > ## Colored plot margins, axis labels and titles.    You do need to be
## > ## careful with these kinds of effects.    It's easy to go completely
## > ## over the top and you can end up with your lunch all over the keyboard.
## > ## On the other hand, my market research clients love it.
## > 
## > x <- c(0.00, 0.40, 0.86, 0.85, 0.69, 0.48, 0.54, 1.09, 1.11, 1.73, 2.05, 2.02)
## 
## > par(bg="lightgray")
## 
## > plot(x, type="n", axes=FALSE, ann=FALSE)

## 
## > usr <- par("usr")
## 
## > rect(usr[1], usr[3], usr[2], usr[4], col="cornsilk", border="black")
## 
## > lines(x, col="blue")
## 
## > points(x, pch=21, bg="lightcyan", cex=1.25)
## 
## > axis(2, col.axis="blue", las=1)
## 
## > axis(1, at=1:12, lab=month.abb, col.axis="blue")
## 
## > box()
## 
## > title(main= "The Level of Interest in R", font.main=4, col.main="red")
## 
## > title(xlab= "1996", col.lab="red")
## 
## > ## A filled histogram, showing how to change the font used for the
## > ## main title without changing the other annotation.
## > 
## > par(bg="cornsilk")
## 
## > x <- rnorm(1000)
## 
## > hist(x, xlim=range(-4, 4, x), col="lavender", main="")

## 
## > title(main="1000 Normal Random Variates", font.main=3)
## 
## > ## A scatterplot matrix
## > ## The good old Iris data (yet again)
## > 
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", font.main=4, pch=19)

## 
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", pch=21,
## +       bg = c("red", "green3", "blue")[unclass(iris$Species)])

## 
## > ## Contour plotting
## > ## This produces a topographic map of one of Auckland's many volcanic "peaks".
## > 
## > x <- 10*1:nrow(volcano)
## 
## > y <- 10*1:ncol(volcano)
## 
## > lev <- pretty(range(volcano), 10)
## 
## > par(bg = "lightcyan")
## 
## > pin <- par("pin")
## 
## > xdelta <- diff(range(x))
## 
## > ydelta <- diff(range(y))
## 
## > xscale <- pin[1]/xdelta
## 
## > yscale <- pin[2]/ydelta
## 
## > scale <- min(xscale, yscale)
## 
## > xadd <- 0.5*(pin[1]/scale - xdelta)
## 
## > yadd <- 0.5*(pin[2]/scale - ydelta)
## 
## > plot(numeric(0), numeric(0),
## +      xlim = range(x)+c(-1,1)*xadd, ylim = range(y)+c(-1,1)*yadd,
## +      type = "n", ann = FALSE)

## 
## > usr <- par("usr")
## 
## > rect(usr[1], usr[3], usr[2], usr[4], col="green3")
## 
## > contour(x, y, volcano, levels = lev, col="yellow", lty="solid", add=TRUE)
## 
## > box()
## 
## > title("A Topographic Map of Maunga Whau", font= 4)
## 
## > title(xlab = "Meters North", ylab = "Meters West", font= 3)
## 
## > mtext("10 Meter Contour Spacing", side=3, line=0.35, outer=FALSE,
## +       at = mean(par("usr")[1:2]), cex=0.7, font=3)
## 
## > ## Conditioning plots
## > 
## > par(bg="cornsilk")
## 
## > coplot(lat ~ long | depth, data = quakes, pch = 21, bg = "green3")

## 
## > par(opar)

# Ex. 3
x <- 2.456
y <- "2"
z <- 3L
k <- 1+6i
typeof(x)

## [1] "double"

typeof(y)

## [1] "character"

typeof(z)

## [1] "integer"

typeof(k)

## [1] "complex"

as.integer(y)

## [1] 2

as.numeric(y)

## [1] 2

as.numeric(z)

## [1] 3

as.numeric(k)

## Warning: imaginary parts discarded in coercion

## [1] 1

as.integer(x) + as.integer(y)

## [1] 4

as.integer(z) / as.integer(y)

## [1] 1.5

1/0

## [1] Inf

1/0 + 1/0

## [1] Inf

1/0 * 0

## [1] NaN

sqrt(-1)

## Warning in sqrt(-1): NaNs produced

## [1] NaN

as.integer(1/0)

## Warning: NAs introduced by coercion to integer range

## [1] NA

as.integer(1/0 + 1/0)

## Warning: NAs introduced by coercion to integer range

## [1] NA

as.integer(sqrt(-1))

## Warning in sqrt(-1): NaNs produced

## [1] NA

as.double(sqrt(-1))

## Warning in sqrt(-1): NaNs produced

## [1] NaN

sqrt(4+0i)

## [1] 2+0i

sqrt(-1+0i)

## [1] 0+1i

 as.double(sqrt(-1+0i))

## Warning: imaginary parts discarded in coercion

## [1] 0

as.numeric("-3e-4")

## [1] -3e-04

as.numeric("3e4")

## [1] 30000

a <- 3
b <- 2
as.integer(a) + as.integer(b)

## [1] 5

# Ex 4
y <- Sys.Date()
y

## [1] "2022-10-12"

z <- as.Date(y)
weekdays(y)

## [1] "Wednesday"

class(y)

## [1] "Date"

class(z)

## [1] "Date"

typeof(y)

## [1] "double"

pste_date <- paste(y)
class(pste_date)

## [1] "character"

# `Ex. 5
digits <- 0:9
names(digits) <- c("zero","one","two","three","four","five","six","seven","eight","nine")
words.2.digits <- function(word){  # function is created
  return(as.integer(digits[word]));
}
digits.2.words <- function(digit){  # function is created
  return(names(digits[digits == digit]))
}
words.2.digits("zero")  # Calling the function words.2.digit

## [1] 0

digits.2.words(0)

## [1] "zero"

words.2.digits

## function(word){  # function is created
##   return(as.integer(digits[word]));
## }

digits.2.words

## function(digit){  # function is created
##   return(names(digits[digits == digit]))
## }

digits <- 1:7
names(digits) <- c("monday","tuesday","Wednesday","thursday","friday","saturday","sunday")
day.2.digit <- function(day){  # function is created
  return(as.integer(digits[day]));
}
digit.2.day  <- function(digit){  # function is created
  return(names(digits[digits == digit])) # Actual logic
}
digit.2.day(3)  # Calling the function words.2.digit

## [1] "Wednesday"

day.2.digit("tuesday")

## [1] 2

digits <- c("0", "1", "2", "3", "4", "5", "6", "7","8", "9")
names(digits) <- c("zero","one","two","three","four","five","six","seven","eight","nine")
words.2.digits <- function(word){  # function is created
  return(as.integer(digits[word]));
}
digits.2.words <- function(digit){  # function is created
  return(names(digits[digits == digit]))
}
words.2.digits("zero")  # Calling the function words.2.digit

## [1] 0

digits.2.words("3")

## [1] "three"

# Ex. 6
 people_specs <- data.frame(
   name = c("Alice","Bob","Charles","David","Eve","Francis","George","Hue","Ian","Jane","Kate","Luci","Margareth","Nikki","Owen","Paul","Rob","Susan","Tom","Ulster","Vickie","Xuo","Yong","Zed"),
  gender = c("female","male","male","male","female","male","male","male","male","female","female","female","female","female","male","male","male","female","male","male","female","male","female","male"),
  birth = c("1978-01-18","1994-11-18","1983-07-10","1999-05-25","1978-05-04","1978-01-13","1986-04-04","1999-09-29","1991-03-27","1990-06-14","1978-07-07","1997-05-23","1980-06-11","1975-03-04","1976-02-07","1999-11-07","1980-08-30","1991-11-23","1981-11-20","1995-02-24","1998-08-16","1998-04-21","1996-06-30","1989-12-28"),
  height = c(176.4,166.1,181.9,172.8,178.2,159.0,167.4,159.5,167.3,171.2,159.4,171.9,180.3,168.2,174.9,182.4,175.5,180.0,174.6,152.1,166.8,182.0,169.3,171.0),
  weight = c(76.8,64.8,59.0,62.0,66.1,73.1,60.1,80.3,51.0,72.1,71.9,74.0,91.1,50.8,53.4,59.7,59.9,68.7,63.3,65.3,66.5,69.7,76.7,75.0)
 )
summary(people_specs)

##      name              gender             birth               height     
##  Length:24          Length:24          Length:24          Min.   :152.1  
##  Class :character   Class :character   Class :character   1st Qu.:167.2  
##  Mode  :character   Mode  :character   Mode  :character   Median :171.6  
##                                                           Mean   :171.2  
##                                                           3rd Qu.:176.8  
##                                                           Max.   :182.4  
##      weight     
##  Min.   :50.80  
##  1st Qu.:60.05  
##  Median :66.30  
##  Mean   :67.14  
##  3rd Qu.:73.33  
##  Max.   :91.10

# Ex 7
getwd()

## [1] "C:/Users/ROSHAN D K/Desktop/R project"

dir()

## [1] "Asses_2(binom&prob).nb.html" "Asses_2(binom&prob).Rmd"    
## [3] "Assesment1.nb.html"          "Assesment1.Rmd"             
## [5] "Assesment1_files"            "rsconnect"                  
## [7] "titanic2208.csv"

library(readr)
titanic<- read_csv("titanic2208.csv", show_col_types = FALSE)

head(titanic, 4)

str(titanic)

## spec_tbl_df [2,208 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Name      : chr [1:2208] "ALLEN, Miss Elisabeth Walton" "ALLISON, Master Hudson Trevor" "ALLISON, Miss Helen Loraine" "ALLISON, Mr Hudson Joshua Creighton" ...
##  $ Title     : chr [1:2208] "Miss" "Master" "Miss" "Mr" ...
##  $ Sex       : chr [1:2208] "female" "male" "female" "male" ...
##  $ Age       : num [1:2208] 29 0.9 2 30 25 47 62 39 53 71 ...
##  $ Class     : chr [1:2208] "1st Class" "1st Class" "1st Class" "1st Class" ...
##  $ Ticket    : chr [1:2208] "24160" "113781" "113781" "113781" ...
##  $ Fare      : chr [1:2208] "\xa3211 6s 9d" "\xa3151 16s" "\xa3151 16s" "\xa3151 16s" ...
##  $ Fare today: num [1:2208] 16300 11700 11700 11700 11700 2050 6020 NA 3980 3820 ...
##  $ Group     : chr [1:2208] NA NA NA NA ...
##  $ Joined    : chr [1:2208] "Southampton" "Southampton" "Southampton" "Southampton" ...
##  $ Job       : chr [1:2208] NA NA NA "Businessman" ...
##  $ Life Boat : chr [1:2208] "2" "11" NA NA ...
##  $ Body      : num [1:2208] NA NA NA 135 NA NA NA NA NA 22 ...
##  $ Survived  : logi [1:2208] TRUE TRUE FALSE FALSE FALSE TRUE ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Name = col_character(),
##   ..   Title = col_character(),
##   ..   Sex = col_character(),
##   ..   Age = col_double(),
##   ..   Class = col_character(),
##   ..   Ticket = col_character(),
##   ..   Fare = col_character(),
##   ..   `Fare today` = col_double(),
##   ..   Group = col_character(),
##   ..   Joined = col_character(),
##   ..   Job = col_character(),
##   ..   `Life Boat` = col_character(),
##   ..   Body = col_double(),
##   ..   Survived = col_logical()
##   .. )
##  - attr(*, "problems")=<externalptr>

summary(titanic)

##      Name              Title               Sex                 Age       
##  Length:2208        Length:2208        Length:2208        Min.   : 0.00  
##  Class :character   Class :character   Class :character   1st Qu.:22.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :29.00  
##                                                           Mean   :29.92  
##                                                           3rd Qu.:36.00  
##                                                           Max.   :74.00  
##                                                           NA's   :9      
##     Class              Ticket              Fare             Fare today   
##  Length:2208        Length:2208        Length:2208        Min.   :  245  
##  Class :character   Class :character   Class :character   1st Qu.:  612  
##  Mode  :character   Mode  :character   Mode  :character   Median : 1120  
##                                                           Mean   : 2606  
##                                                           3rd Qu.: 2420  
##                                                           Max.   :39600  
##                                                           NA's   :917    
##     Group              Joined              Job             Life Boat        
##  Length:2208        Length:2208        Length:2208        Length:2208       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       Body       Survived      
##  Min.   : -1    Mode :logical  
##  1st Qu.: 85    FALSE:1496     
##  Median :172    TRUE :712      
##  Mean   :170                   
##  3rd Qu.:255                   
##  Max.   :334                   
##  NA's   :1967

factor(titanic[2])

## Warning in xtfrm.data.frame(x): cannot xtfrm data frames

## Title 
##  <NA> 
## Levels: c("Miss", "Master", "Mr", "Mrs", "Colonel", "Mme.", "Dr", "Major", "Captain", "Lady", "Sir", "Mlle", "Dona", "Countess", "Don.", "Fr", "Rev.", "Ms", "Sig.")

factor(titanic[3])

## Warning in xtfrm.data.frame(x): cannot xtfrm data frames

##  Sex 
## <NA> 
## Levels: c("female", "male")

factor(titanic[4], ordered = TRUE, levels = 0:100)

##  Age 
## <NA> 
## 101 Levels: 0 < 1 < 2 < 3 < 4 < 5 < 6 < 7 < 8 < 9 < 10 < 11 < 12 < 13 < ... < 100

factor(titanic[5])

## Warning in xtfrm.data.frame(x): cannot xtfrm data frames

## Class 
##  <NA> 
## Levels: c("1st Class", "2nd Class", "3rd Class", "A la Carte", "Deck", "Engine", "Victualling")

library(dplyr) # install dplyr package

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

titanic %>% 
  summarise_all(~sum(is.na(.))) # return the total number of Na value in each column

library("scales")

## 
## Attaching package: 'scales'

## The following object is masked from 'package:readr':
## 
##     col_factor

total_survived <- percent((sum(titanic$Survived))/nrow(titanic))
total_survived

## [1] "32%"

male_survived <- (sum(titanic$Survived == TRUE & titanic$Sex == "male"))/(sum(titanic$Survived == TRUE))
female_survived <- (sum(titanic$Survived == TRUE & titanic$Sex == "female"))/(sum(titanic$Survived == "TRUE"))
child_survived <- (sum(titanic$Survived == TRUE & titanic$Age <18))/(sum(titanic$Survived == "TRUE"))

100 * male_survived  # Install package scales

## [1] 49.57865

100 * female_survived

## [1] 50.42135

100 * child_survived

## [1] 12.07865

one_class <- (sum(titanic$Survived == TRUE & titanic$Class == "1st Class"))/(sum(titanic$Survived == "TRUE")) 
two_class <- sum((titanic$Survived == TRUE & titanic$Class == "2nd Class"))/(sum(titanic$Survived == "TRUE"))
three_class <- (sum(titanic$Survived == TRUE & titanic$Class == "3rd Class"))/(sum(titanic$Survived == "TRUE"))
100 * one_class # Chances people in 1st class survived

## [1] 28.23034

100 * two_class # Chances people in 2nd class survived

## [1] 16.71348

100 * three_class # Chances people in 3rd class survived

## [1] 25.2809

# Ex 8
boys1<-data.frame(id=1:8,grade=as.integer(100*runif(8)),gender=rep("boy",8))
girls1<-data.frame(id=9:18,grade=as.integer(100*runif(10)),gender=rep("girl",10))
boy_girl <- rbind(boys1, girls1) # cbind not possible bcoz of variation in length
names(boy_girl) <- c("Boys & Girls", "Grade", "Gender")
boy_girl

# Ex 9
boys2 <- data.frame(id=1:8, grade=as.integer(100*runif(8)))
girls2 <- data.frame(Number=1:10, Mark=as.integer(100*runif(10)))
boys2

names(girls2) <- c("id", "grade") # left 1, 2, 3, 4

#write.csv(boy_girl)
#savehistory("Assesment1.Rmd")
#help(rbind)