Vector Operations

We want to do some vector operations;let’s roll!!

##creating a vector myvec
myvec<- c(2,3,1,6,4,3,3,7)
##printing myvec
myvec

## [1] 2 3 1 6 4 3 3 7

#finding a little summary statistics
mean(myvec)

## [1] 3.625

sd(myvec)

## [1] 1.995531

#Extracting elements in vector

myvec[3]

## [1] 1

myvec[c(1,5,6,8)]

## [1] 2 4 3 7

##printing only values that are greater than 4
myvec[myvec > 4]

## [1] 6 7

##testing some values that are greater than 4
myvec > 4

## [1] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE

##assigning the value by an object using logical expressions
val126<- myvec[myvec < 6 & myvec > 2]
val126

## [1] 3 4 3 3

##Replacing elements

myvec[4] <-500
myvec[c(6,7)]<-100 
myvec

## [1]   2   3   1 500   4 100 100   7

##sorting our values in the vector in descending order
vec_sort <- sort((myvec),decreasing = TRUE)
vec_sort

## [1] 500 100 100   7   4   3   2   1

#Creating Dataframe in R

##creating a dataset
p.height <- c(180,155,160,167,181)
p.weight <- c(65,50,52,58,70)
p.names <- c("Joana","Charlotte","Helen","Karen","Amy")
dataf <- data.frame(Height=p.height,Weight=p.weight,Names=p.names,stringsAsFactors = TRUE)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

glimpse(dataf)

## Rows: 5
## Columns: 3
## $ Height <dbl> 180, 155, 160, 167, 181
## $ Weight <dbl> 65, 50, 52, 58, 70
## $ Names  <fct> Joana, Charlotte, Helen, Karen, Amy

#Playing with The dataframe we created

dim(dataf)

## [1] 5 3

str(dataf)

## 'data.frame':    5 obs. of  3 variables:
##  $ Height: num  180 155 160 167 181
##  $ Weight: num  65 50 52 58 70
##  $ Names : Factor w/ 5 levels "Amy","Charlotte",..: 4 2 3 5 1

summary(dataf)

##      Height          Weight         Names  
##  Min.   :155.0   Min.   :50   Amy      :1  
##  1st Qu.:160.0   1st Qu.:52   Charlotte:1  
##  Median :167.0   Median :58   Helen    :1  
##  Mean   :168.6   Mean   :59   Joana    :1  
##  3rd Qu.:180.0   3rd Qu.:65   Karen    :1  
##  Max.   :181.0   Max.   :70

#Importing Dataset into R

 library(readxl)
students <- read_excel("D:/Computer Programming/Data/Students.xlsx")

##extracting first column 
students[1]

## # A tibble: 30 × 1
##       ID
##    <dbl>
##  1     1
##  2     2
##  3     3
##  4     4
##  5     5
##  6     6
##  7     7
##  8     8
##  9     9
## 10    10
## # ℹ 20 more rows

#extracting first row
students[1,]

## # A tibble: 1 × 14
##      ID `Last Name` `First Name` City        State Gender `Student Status` Major
##   <dbl> <chr>       <chr>        <chr>       <chr> <chr>  <chr>            <chr>
## 1     1 DOE01       JANE01       Los Angeles Cali… Female Graduate         Poli…
## # ℹ 6 more variables: Country <chr>, Age <dbl>, SAT <dbl>,
## #   `Average score (grade)` <dbl>, `Height (in)` <dbl>,
## #   `Newspaper readership (times/wk)` <dbl>

students[c(2,3),2] ##it is giving us two elements

## # A tibble: 2 × 1
##   `Last Name`
##   <chr>      
## 1 DOE02      
## 2 DOE01

students[2,2] ##it should give us one reading

## # A tibble: 1 × 1
##   `Last Name`
##   <chr>      
## 1 DOE02

students[2:5,2] #row 2, col 2

## # A tibble: 4 × 1
##   `Last Name`
##   <chr>      
## 1 DOE02      
## 2 DOE01      
## 3 DOE02      
## 4 DOE03

##Accessing a column without using positions or indexing
students["SAT"]

## # A tibble: 30 × 1
##      SAT
##    <dbl>
##  1  2263
##  2  2006
##  3  2221
##  4  1716
##  5  1701
##  6  1786
##  7  1577
##  8  1842
##  9  1813
## 10  2041
## # ℹ 20 more rows

##changing row 3,column 4 to Lilongwe
students[3,4]="Lilongwe"
Students<-students

#Row Binding And Column Binding

married<- rep(c("Yes","No"),each=15)
married

##  [1] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
## [13] "Yes" "Yes" "Yes" "No"  "No"  "No"  "No"  "No"  "No"  "No"  "No"  "No" 
## [25] "No"  "No"  "No"  "No"  "No"  "No"

students<- cbind(students,married)
students$marriednew<- married

##summary statistics

##first let's try to know the length of the dataset
length(students) ##showing number of variables

## [1] 16

ncol(students)  ##showing number of columns

## [1] 16

class(students) ##displaying class of the dataset

## [1] "data.frame"

typeof(students) ##ngth(students) ##it show number of variables

## [1] "list"

##Cross tabulation

table(students$Gender,students$Major) ##showing number of students majoring specific courses

##         
##          Econ Math Politics
##   Female    3    8        4
##   Male      7    2        6

#Renaming Columns

names(students)[1:3]<- c("id","last","first")
names(students)

##  [1] "id"                              "last"                           
##  [3] "first"                           "City"                           
##  [5] "State"                           "Gender"                         
##  [7] "Student Status"                  "Major"                          
##  [9] "Country"                         "Age"                            
## [11] "SAT"                             "Average score (grade)"          
## [13] "Height (in)"                     "Newspaper readership (times/wk)"
## [15] "married"                         "marriednew"

#Creating subset of the dataset

city<-data.frame(students$City) 
state<-data.frame(students$State)
gender<-data.frame(students$gender)
City<- data.frame(students$City,students$Age,students$id)
binded<-cbind(city,City)

#Converting a vector

vec1<- c(2,0.5,1,2,0.5,1,2,0.5,1)
##Replication
rep(1,9)

## [1] 1 1 1 1 1 1 1 1 1

##Replacement using indexing
vec1[]<-1
vec1

## [1] 1 1 1 1 1 1 1 1 1

##Using a vector of length 3
vec1[]<-rep(c(1,1,1),length.out=length(vec1))

#Changing Farehnheit to Degrees Celsius

F<- c(45,77,20,19,101,120,212)
C<- c((5*F-5*32)/9)
C

## [1]   7.222222  25.000000  -6.666667  -7.222222  38.333333  48.888889 100.000000

vector1<- c(2,4,6)
vector2<- c(1,2)
vector3<-c(vector1*vector2[1],vector1*vector2[2])
vector3

## [1]  2  4  6  4  8 12

vector3[2:5]<-c(-.1,-100)

#binding matrix

mat2<- matrix(data=c(1:12),nrow=4,ncol=4,byrow=FALSE)

## Warning in matrix(data = c(1:12), nrow = 4, ncol = 4, byrow = FALSE): data
## length differs from size of matrix: [12 != 4 x 4]

mat3<- rbind(mat2) ##row binding
View(mat3)
mat4<- cbind(mat2)
mat4

##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9    1
## [2,]    2    6   10    2
## [3,]    3    7   11    3
## [4,]    4    8   12    4

#Matrix Dimensions

##dimension of a matrix
dim(mat2)

## [1] 4 4

dim(mat3)

## [1] 4 4

dim(mat4)

## [1] 4 4

nrow(mat2)

## [1] 4

ncol(mat2)

## [1] 4

#subsetting,row,column and diagonal extractions

##first let us create a matrix of 4 by 4
mat5<- matrix(seq(1:20),4,4,byrow = TRUE)

## Warning in matrix(seq(1:20), 4, 4, byrow = TRUE): data length differs from size
## of matrix: [20 != 4 x 4]

mat5[1,4]=0
mat5[1,1]=1
mat5[1,2]=2
mat5[1,3]=3
mat5[1,4]=4
mat5[4,] #row extraction

## [1] 13 14 15 16

mat5[,4] #column extraction

## [1]  4  8 12 16

mat5[2:3,1] ##extracting values in a specific row and column

## [1] 5 9

mat5[1:2,2:4]

##      [,1] [,2] [,3]
## [1,]    2    3    4
## [2,]    6    7    8

diag(mat5)

## [1]  1  6 11 16

##Ommiting and overwriting

mat5[-1,-2]

##      [,1] [,2] [,3]
## [1,]    5    7    8
## [2,]    9   11   12
## [3,]   13   15   16

mat6<- mat5
mat6[2,]<- 5:8
mat6[3,]<-mat6[,3]
View(mat6)

#Exercises

##constucting a matrix
mat7<- matrix(c(4.3,3.1,8.2,8.2,3.2,.9,1.6,6.5),4,2)
mat8<-mat7[-1,]
dim(mat8)

## [1] 3 2

mat7[,2]<-sort(mat7[,2], decreasing = FALSE) ##sorting the column of the matrix
mat8<- mat7[-1,-1]
mat8

## [1] 1.6 3.2 6.5

mat7 ##deleting the rowc1 and first item in column 1

##      [,1] [,2]
## [1,]  4.3  0.9
## [2,]  3.1  1.6
## [3,]  8.2  3.2
## [4,]  8.2  6.5

ele<-mat7[c(1,2),] ##storing elements as 2 by 2 matrix
dim(ele)

## [1] 2 2

mat7[c(4,1),c(2,1)]= -1/2*diag(ele)
mat8<-mat7
mat8

##       [,1]  [,2]
## [1,] -0.80 -0.80
## [2,]  3.10  1.60
## [3,]  8.20  3.20
## [4,] -2.15 -2.15

#Turning variables to other factors

library(readxl)
str(Students) ##to check type of variables

## tibble [30 × 14] (S3: tbl_df/tbl/data.frame)
##  $ ID                             : num [1:30] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Last Name                      : chr [1:30] "DOE01" "DOE02" "DOE01" "DOE02" ...
##  $ First Name                     : chr [1:30] "JANE01" "JANE02" "JOE01" "JOE02" ...
##  $ City                           : chr [1:30] "Los Angeles" "Sedona" "Lilongwe" "Lackawana" ...
##  $ State                          : chr [1:30] "California" "Arizona" "New York" "New York" ...
##  $ Gender                         : chr [1:30] "Female" "Female" "Male" "Male" ...
##  $ Student Status                 : chr [1:30] "Graduate" "Undergraduate" "Graduate" "Graduate" ...
##  $ Major                          : chr [1:30] "Politics" "Math" "Math" "Econ" ...
##  $ Country                        : chr [1:30] "US" "US" "US" "US" ...
##  $ Age                            : num [1:30] 30 19 26 33 37 25 39 21 18 33 ...
##  $ SAT                            : num [1:30] 2263 2006 2221 1716 1701 ...
##  $ Average score (grade)          : num [1:30] 67 63 78.1 77.8 65 ...
##  $ Height (in)                    : num [1:30] 61 64 73 68 71 67 70 62 62 66 ...
##  $ Newspaper readership (times/wk): num [1:30] 5 7 6 3 6 5 5 5 6 5 ...

##setting varibales into numbers so that R should recognize them as categorical varables
Students$Gender<- as.factor(Students$Gender) 
Students$`Student Status`<- as.factor(Students$`Student Status`)
Students$Major<- as.factor(Students$Major)
Students$Country<- as.factor(Students$Country)

##Package used for visualization in R is ggplot2 #we want to plot some graphs

hist(Students$`Height (in)`,col = rainbow(7),main = "Students Height",xlab = "Height",ylab="Freq")##This is for continous data

###we can also go to help which is at the RHS parrallel to the console window

library(ggplot2)
barplot(Students$Age,width=10)

plot(Students$Age~Students$`Height (in)`)

scatter.smooth(Students$`Height (in)`~Students$Age,col=rainbow(7))

boxplot(Students$Age~Students$Major)

pairs(Students[,c("Age","Gender","Height (in)","Major","SAT")])

ggplot(data=Students)+geom_bar(mapping=aes(x=Country,col="red"))

#Class Imbalance:and expoting data

##Inferencing the data

t.test(Students$Age,mu=20,alternative = "greater")

## 
##  One Sample t-test
## 
## data:  Students$Age
## t = 4.1457, df = 29, p-value = 0.0001345
## alternative hypothesis: true mean is greater than 20
## 95 percent confidence interval:
##  23.06874      Inf
## sample estimates:
## mean of x 
##      25.2

shapiro.test(Students$Age) # Ho normal distribution H1 not ormally distributed

## 
##  Shapiro-Wilk normality test
## 
## data:  Students$Age
## W = 0.87053, p-value = 0.001722

t.test(Students$Age~Students$Gender,var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  Students$Age by Students$Gender
## t = -1.6403, df = 28, p-value = 0.1121
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
##  -8.9952023  0.9952023
## sample estimates:
## mean in group Female   mean in group Male 
##                 23.2                 27.2

var.test((Students$Age~Students$Gender))

## 
##  F test to compare two variances
## 
## data:  Students$Age by Students$Gender
## F = 0.94396, num df = 14, denom df = 14, p-value = 0.9156
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3169154 2.8116679
## sample estimates:
## ratio of variances 
##          0.9439601

chisq.test(Students$Gender,Students$Major)

## 
##  Pearson's Chi-squared test
## 
## data:  Students$Gender and Students$Major
## X-squared = 5.6, df = 2, p-value = 0.06081

cor(Students$Age,Students$`Height (in)`)

## [1] 0.06615254

cor(Students[,c("Age","Height (in)")])

##                    Age Height (in)
## Age         1.00000000  0.06615254
## Height (in) 0.06615254  1.00000000

cor.test(Students$`Average score (grade)`,Students$Age,method="spearman")

## Warning in cor.test.default(Students$`Average score (grade)`, Students$Age, :
## cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  Students$`Average score (grade)` and Students$Age
## S = 4516.1, p-value = 0.9803
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.0047044

Genderlm<- lm(Students$`Height (in)`~Students$Age)
multilm<- (Students$`Height (in)`~Students$Age+Students$Gender)
summary(multilm)

##  Length   Class    Mode 
##       3 formula    call

summary(Genderlm)

## 
## Call:
## lm(formula = Students$`Height (in)` ~ Students$Age)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.0075 -3.7496 -0.1038  3.7649  8.3514 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  65.30294    3.33594  19.576   <2e-16 ***
## Students$Age  0.04486    0.12786   0.351    0.728    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.731 on 28 degrees of freedom
## Multiple R-squared:  0.004376,   Adjusted R-squared:  -0.03118 
## F-statistic: 0.1231 on 1 and 28 DF,  p-value: 0.7284

library(readxl)

#using tapply function to calculate descriptive statistics

# Load the chickwts dataset
data(chickwts)

# Calculate the variances for each feed type
variances <- tapply(chickwts$weight, INDEX = chickwts$feed, FUN = var)

# Identify the feed type with the maximum variance
max_variance_feed <- names(which.max(variances))

# Print the feed type with the maximum variance and its variance
print(paste("Feed type with maximum variance:", max_variance_feed))

## [1] "Feed type with maximum variance: meatmeal"

print(paste("Maximum variance:", max(variances)))

## [1] "Maximum variance: 4212.09090909091"

#Basic Statistics

#to filter the dataset 
data(quakes)
dep<- quakes |>filter(quakes$depth>299)
#to find the interquartile range
IQR(dep$depth)

## [1] 101

##to calculate descriptive statistics
des=tapply(students$`Average score (grade)`,INDEX = students$City,FU=summary)
des ##displaying the results

## $Acme
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    79.5    79.5    79.5    79.5    79.5    79.5 
## 
## $Amsterdam
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      75      75      75      75      75      75 
## 
## $Beijing
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      79      79      79      79      79      79 
## 
## $`Buenos Aires`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      85      85      85      85      85      85 
## 
## $Caracas
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      92      92      92      92      92      92 
## 
## $Cimax
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   95.88   95.88   95.88   95.88   95.88   95.88 
## 
## $Defiance
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      65      65      65      65      65      65 
## 
## $`Drunkard Creek`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   82.39   82.39   82.39   82.39   82.39   82.39 
## 
## $Embarrass
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   95.84   95.84   95.84   95.84   95.84   95.84 
## 
## $`Hot Coffe`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   81.53   81.53   81.53   81.53   81.53   81.53 
## 
## $Intercourse
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      88      88      88      88      88      88 
## 
## $Java
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   78.94   78.94   78.94   78.94   78.94   78.94 
## 
## $Lackawana
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   77.81   77.81   77.81   77.81   77.81   77.81 
## 
## $Liberal
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      87      87      87      87      87      87 
## 
## $Lilongwe
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   78.11   78.11   78.11   78.11   78.11   78.11 
## 
## $Loco
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      64      64      64      64      64      64 
## 
## $`Los Angeles`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      67      67      67      67      67      67 
## 
## $`Mexican Hat`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      80      80      80      80      80      80 
## 
## $Mexico
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   95.42   95.42   95.42   95.42   95.42   95.42 
## 
## $Montreal
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      91      91      91      91      91      91 
## 
## $Moscow
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   70.28   70.28   70.28   70.28   70.28   70.28 
## 
## $`New York`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   71.00   73.75   76.50   76.50   79.25   82.00 
## 
## $Remote
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      67      67      67      67      67      67 
## 
## $`San Juan`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      95      95      95      95      95      95 
## 
## $Sedona
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      63      63      63      63      63      63 
## 
## $Stockholm
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      88      88      88      88      88      88 
## 
## $`Tel Aviv`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      69      69      69      69      69      69 
## 
## $`The X`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      89      89      89      89      89      89 
## 
## $Varna
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   79.34   79.34   79.34   79.34   79.34   79.34

vari<-which.max(tapply(students$Age,INDEX = students$Major,FU=mean))
names(vari)

## [1] "Politics"

#playing with dataset

data(quakes)
plot(quakes$long,quakes$lat,xlab = "Longitude",ylab = "Latitude",main = "The Graph Of Quakes",col=rainbow(7))

data("chickwts")
table(chickwts$feed)

## 
##    casein horsebean   linseed  meatmeal   soybean sunflower 
##        12        10        12        11        14        12

##managing and analyzing the dataset of mtcars

data("mtcars")
require(graphics)
pairs(mtcars, main = "mtcars data", gap = 1/4)

coplot(mpg ~ disp | as.factor(cyl), data = mtcars,
       panel = panel.smooth, rows = 1)

## possibly more meaningful, e.g., for summary() or bivariate plots:
mtcars2 <- within(mtcars, {
   vs <- factor(vs, labels = c("V", "S"))
   am <- factor(am, labels = c("automatic", "manual"))
   cyl  <- ordered(cyl)
   gear <- ordered(gear)
   carb <- ordered(carb)
})
summary(mtcars2)

##       mpg        cyl         disp             hp             drat      
##  Min.   :10.40   4:11   Min.   : 71.1   Min.   : 52.0   Min.   :2.760  
##  1st Qu.:15.43   6: 7   1st Qu.:120.8   1st Qu.: 96.5   1st Qu.:3.080  
##  Median :19.20   8:14   Median :196.3   Median :123.0   Median :3.695  
##  Mean   :20.09          Mean   :230.7   Mean   :146.7   Mean   :3.597  
##  3rd Qu.:22.80          3rd Qu.:326.0   3rd Qu.:180.0   3rd Qu.:3.920  
##  Max.   :33.90          Max.   :472.0   Max.   :335.0   Max.   :4.930  
##        wt             qsec       vs             am     gear   carb  
##  Min.   :1.513   Min.   :14.50   V:18   automatic:19   3:15   1: 7  
##  1st Qu.:2.581   1st Qu.:16.89   S:14   manual   :13   4:12   2:10  
##  Median :3.325   Median :17.71                         5: 5   3: 3  
##  Mean   :3.217   Mean   :17.85                                4:10  
##  3rd Qu.:3.610   3rd Qu.:18.90                                6: 1  
##  Max.   :5.424   Max.   :22.90                                8: 1

##Drawing Bar plot
barplot(mtcars$cyl,col=rainbow(7),main="The Graph Of MtCars$Cyl")

# Formula method
barplot(GNP ~ Year, data = longley)

barplot(cbind(Employed, Unemployed) ~ Year, data = longley)

## 3rd form of formula - 2 categories :
op <- par(mfrow = 2:1, mgp = c(3,1,0)/2, mar = .1+c(3,3:1))
summary(d.Titanic <- as.data.frame(Titanic))

##   Class       Sex        Age     Survived      Freq       
##  1st :8   Male  :16   Child:16   No :16   Min.   :  0.00  
##  2nd :8   Female:16   Adult:16   Yes:16   1st Qu.:  0.75  
##  3rd :8                                   Median : 13.50  
##  Crew:8                                   Mean   : 68.78  
##                                           3rd Qu.: 77.00  
##                                           Max.   :670.00

barplot(Freq ~ Class + Survived, data = d.Titanic,
        subset = Age == "Adult" & Sex == "Male",
        main = "barplot(Freq ~ Class + Survived, *)", ylab = "# {passengers}", legend.text = TRUE)
# Corresponding table :
(xt <- xtabs(Freq ~ Survived + Class + Sex, d.Titanic, subset = Age=="Adult"))

## , , Sex = Male
## 
##         Class
## Survived 1st 2nd 3rd Crew
##      No  118 154 387  670
##      Yes  57  14  75  192
## 
## , , Sex = Female
## 
##         Class
## Survived 1st 2nd 3rd Crew
##      No    4  13  89    3
##      Yes 140  80  76   20

# Alternatively, a mosaic plot :
mosaicplot(xt[,,"Male"], main = "mosaicplot(Freq ~ Class + Survived, *)", color=TRUE)

par(op)


# Default method
require(grDevices) # for colours
tN <- table(Ni <- stats::rpois(100, lambda = 5))
r <- barplot(tN, col = rainbow(20))
#- type = "h" plotting *is* 'bar'plot
lines(r, tN, type = "h", col = "red", lwd = 2)

barplot(tN, space = 1.5, axisnames = FALSE,
        sub = "barplot(..., space= 1.5, axisnames = FALSE)")

barplot(VADeaths, plot = FALSE)

## [1] 0.7 1.9 3.1 4.3

barplot(VADeaths, plot = FALSE, beside = TRUE)

##      [,1] [,2] [,3] [,4]
## [1,]  1.5  7.5 13.5 19.5
## [2,]  2.5  8.5 14.5 20.5
## [3,]  3.5  9.5 15.5 21.5
## [4,]  4.5 10.5 16.5 22.5
## [5,]  5.5 11.5 17.5 23.5

mp <- barplot(VADeaths) # default
tot <- colMeans(VADeaths)
text(mp, tot + 3, format(tot), xpd = TRUE, col = "blue")

barplot(VADeaths, beside = TRUE,
        col = c("lightblue", "mistyrose", "lightcyan",
                "lavender", "cornsilk"),
        legend.text = rownames(VADeaths), ylim = c(0, 100))
title(main = "Death Rates in Virginia", font.main = 4)

hh <- t(VADeaths)[, 5:1]
mybarcol <- "gray20"
mp <- barplot(hh, beside = TRUE,
        col = c("lightblue", "mistyrose",
                "lightcyan", "lavender"),
        legend.text = colnames(VADeaths), ylim = c(0,100),
        main = "Death Rates in Virginia", font.main = 4,
        sub = "Faked upper 2*sigma error bars", col.sub = mybarcol,
        cex.names = 1.5)
segments(mp, hh, mp, hh + 2*sqrt(1000*hh/100), col = mybarcol, lwd = 1.5)
stopifnot(dim(mp) == dim(hh))  # corresponding matrices
mtext(side = 1, at = colMeans(mp), line = -2,
      text = paste("Mean", formatC(colMeans(hh))), col = "red")

# Bar shading example
barplot(VADeaths, angle = 15+10*1:5, density = 20, col = "black",
        legend.text = rownames(VADeaths))
title(main = list("Death Rates in Virginia", font = 4))

# Border color
barplot(VADeaths, border = "dark blue")

# Log scales (not much sense here)
barplot(tN, col = heat.colors(12), log = "y")

barplot(tN, col = gray.colors(20), log = "xy")

# Legend location
barplot(height = cbind(x = c(465, 91) / 465 * 100,
                       y = c(840, 200) / 840 * 100,
                       z = c(37, 17) / 37 * 100),
        beside = FALSE,
        width = c(465, 840, 37),
        col = c(1, 2),
        legend.text = c("A", "B"),
        args.legend = list(x = "topleft"))

pie(table(mtcars$cyl),labels=c("V4","V6","V8"),
      col=c("white","gray","black"),main="Performance cars by cylinders")

#Some graphs

pie(students$Age,edges=50,radius = 1,col = rainbow(7),main = "The Pie Chart  Showing Countries",density = -4,)

hist(table(students$`Height (in)`),col = rainbow(7),main = "Height By Frequencies",ylab ="Frequency",xlab="Height")

qplot(mtcars$hp,geom="blank",main="Horsepower",xlab="HP")+geom_histogram(color="black",fill="white",breaks=seq(0,400,25),closed="right")+geom_vline(mapping=aes(xintercept=c(mean(mtcars$hp),median(mtcars$hp)),linetype=factor(c("mean","median"))),show.legend=TRUE)+ scale_linetype_manual(values=c(2,3)) +labs(linetype="")

## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(readxl)
pairs(students[,10:12],col=rainbow(7),cex=0.75,main="Multiple Scatterplots")

data("InsectSprays")
hist(InsectSprays$count,col = rainbow(7),main = "Graph Showing Counts",ylab = "Frequency",xlab = "Counts",cex=0.75)

mu <- -3.42
sigma <- 0.2
mu.minus.1sig <- mu-sigma
mu.minus.1sig

## [1] -3.62

mu.plus.1sig <- mu+sigma
mu.plus.1sig

## [1] -3.22

pnorm(q=mu.plus.1sig,mean=mu,sd=sigma) -
pnorm(q=mu.minus.1sig,mean=mu,sd=sigma)

## [1] 0.6826895

xvals <- seq(-5,-2,length=300)
fx <- dnorm(xvals,mean=mu,sd=sigma)
plot(xvals,fx,type="l",col=rainbow(7),xlim=c(-4.4,-2.5),main="N(-3.42,0.2) distribution",
xlab="x",ylab="f(x)")
abline(h=0,col="green")
abline(v=c(mu.plus.1sig,mu.minus.1sig),lty=3:2)
legend("topleft",legend=c("-3.62\n(mean - 1 sd)","\n-3.22\n(mean + 1 sd)"),
lty=2:3,bty="n")

hist(chickwts$weight,main="",xlab="weight")

qqnorm(chickwts$weight,main="Normal QQ plot of weights")
qqline(chickwts$weight,col="blue")

##importing another data

library(readxl)
Data1 <- read_excel("D:/Computer Programming/Data/Data1.xlsx")
tapply(Data1$income_level,Data1$District, FU=var)

##    Kandreho Maevatanana Tsaratanana 
##  1425855253  1299280192  1251476547

Calculating T-square for Multivariate Data

multdata<-matrix(c(6,9,10,6,8,3),nrow=3,ncol = 2,byrow = TRUE)
mu_0<- matrix(c(9,5),nrow = 2,ncol = 1,byrow = TRUE)
n= 3
p=2
standardde<- matrix(c(sd(multdata[,1]),-1*cov(multdata[,1],multdata[,2]),cov(multdata[,1],multdata[,2]),sd(multdata[,2]),nrow=2))
standarddev1<- matrix(c(2,rep(-3,2),3),nrow= 2, ncol=2,byrow= TRUE)
standarddev2<- matrix(c(4,-1*rep(standarddev1[2:2,1:1],2),9),nrow= 2, ncol=2,byrow= TRUE)


d<-sd(multdata[,1])**2 * sd(multdata[,2])**2-(cov(multdata[,1],multdata[,2])**2)
matrix_transponse<-1/d *(standarddev2)
mu_1<-matrix(c(mean(multdata[,1]),mean(multdata[,2])))
diffe<- matrix(mu_1-mu_0)
diffe

##      [,1]
## [1,]   -1
## [2,]    1

My Codes

Stephen Phen Chikadulah

2024-10-31

Vector Operations

Calculating T-square for Multivariate Data