##creating a vector myvec
myvec<- c(2,3,1,6,4,3,3,7)
##printing myvec
myvec
## [1] 2 3 1 6 4 3 3 7
#finding a little summary statistics
mean(myvec)
## [1] 3.625
sd(myvec)
## [1] 1.995531
#Extracting elements in vector
myvec[3]
## [1] 1
myvec[c(1,5,6,8)]
## [1] 2 4 3 7
##printing only values that are greater than 4
myvec[myvec > 4]
## [1] 6 7
##testing some values that are greater than 4
myvec > 4
## [1] FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
##assigning the value by an object using logical expressions
val126<- myvec[myvec < 6 & myvec > 2]
val126
## [1] 3 4 3 3
##Replacing elements
myvec[4] <-500
myvec[c(6,7)]<-100
myvec
## [1] 2 3 1 500 4 100 100 7
##sorting our values in the vector in descending order
vec_sort <- sort((myvec),decreasing = TRUE)
vec_sort
## [1] 500 100 100 7 4 3 2 1
#Creating Dataframe in R
##creating a dataset
p.height <- c(180,155,160,167,181)
p.weight <- c(65,50,52,58,70)
p.names <- c("Joana","Charlotte","Helen","Karen","Amy")
dataf <- data.frame(Height=p.height,Weight=p.weight,Names=p.names,stringsAsFactors = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
glimpse(dataf)
## Rows: 5
## Columns: 3
## $ Height <dbl> 180, 155, 160, 167, 181
## $ Weight <dbl> 65, 50, 52, 58, 70
## $ Names <fct> Joana, Charlotte, Helen, Karen, Amy
#Playing with The dataframe we created
dim(dataf)
## [1] 5 3
str(dataf)
## 'data.frame': 5 obs. of 3 variables:
## $ Height: num 180 155 160 167 181
## $ Weight: num 65 50 52 58 70
## $ Names : Factor w/ 5 levels "Amy","Charlotte",..: 4 2 3 5 1
summary(dataf)
## Height Weight Names
## Min. :155.0 Min. :50 Amy :1
## 1st Qu.:160.0 1st Qu.:52 Charlotte:1
## Median :167.0 Median :58 Helen :1
## Mean :168.6 Mean :59 Joana :1
## 3rd Qu.:180.0 3rd Qu.:65 Karen :1
## Max. :181.0 Max. :70
#Importing Dataset into R
library(readxl)
students <- read_excel("D:/Computer Programming/Data/Students.xlsx")
##extracting first column
students[1]
## # A tibble: 30 × 1
## ID
## <dbl>
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
## # ℹ 20 more rows
#extracting first row
students[1,]
## # A tibble: 1 × 14
## ID `Last Name` `First Name` City State Gender `Student Status` Major
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 DOE01 JANE01 Los Angeles Cali… Female Graduate Poli…
## # ℹ 6 more variables: Country <chr>, Age <dbl>, SAT <dbl>,
## # `Average score (grade)` <dbl>, `Height (in)` <dbl>,
## # `Newspaper readership (times/wk)` <dbl>
students[c(2,3),2] ##it is giving us two elements
## # A tibble: 2 × 1
## `Last Name`
## <chr>
## 1 DOE02
## 2 DOE01
students[2,2] ##it should give us one reading
## # A tibble: 1 × 1
## `Last Name`
## <chr>
## 1 DOE02
students[2:5,2] #row 2, col 2
## # A tibble: 4 × 1
## `Last Name`
## <chr>
## 1 DOE02
## 2 DOE01
## 3 DOE02
## 4 DOE03
##Accessing a column without using positions or indexing
students["SAT"]
## # A tibble: 30 × 1
## SAT
## <dbl>
## 1 2263
## 2 2006
## 3 2221
## 4 1716
## 5 1701
## 6 1786
## 7 1577
## 8 1842
## 9 1813
## 10 2041
## # ℹ 20 more rows
##changing row 3,column 4 to Lilongwe
students[3,4]="Lilongwe"
Students<-students
#Row Binding And Column Binding
married<- rep(c("Yes","No"),each=15)
married
## [1] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
## [13] "Yes" "Yes" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [25] "No" "No" "No" "No" "No" "No"
students<- cbind(students,married)
students$marriednew<- married
##summary statistics
##first let's try to know the length of the dataset
length(students) ##showing number of variables
## [1] 16
ncol(students) ##showing number of columns
## [1] 16
class(students) ##displaying class of the dataset
## [1] "data.frame"
typeof(students) ##ngth(students) ##it show number of variables
## [1] "list"
##Cross tabulation
table(students$Gender,students$Major) ##showing number of students majoring specific courses
##
## Econ Math Politics
## Female 3 8 4
## Male 7 2 6
#Renaming Columns
names(students)[1:3]<- c("id","last","first")
names(students)
## [1] "id" "last"
## [3] "first" "City"
## [5] "State" "Gender"
## [7] "Student Status" "Major"
## [9] "Country" "Age"
## [11] "SAT" "Average score (grade)"
## [13] "Height (in)" "Newspaper readership (times/wk)"
## [15] "married" "marriednew"
#Creating subset of the dataset
city<-data.frame(students$City)
state<-data.frame(students$State)
gender<-data.frame(students$gender)
City<- data.frame(students$City,students$Age,students$id)
binded<-cbind(city,City)
#Converting a vector
vec1<- c(2,0.5,1,2,0.5,1,2,0.5,1)
##Replication
rep(1,9)
## [1] 1 1 1 1 1 1 1 1 1
##Replacement using indexing
vec1[]<-1
vec1
## [1] 1 1 1 1 1 1 1 1 1
##Using a vector of length 3
vec1[]<-rep(c(1,1,1),length.out=length(vec1))
#Changing Farehnheit to Degrees Celsius
F<- c(45,77,20,19,101,120,212)
C<- c((5*F-5*32)/9)
C
## [1] 7.222222 25.000000 -6.666667 -7.222222 38.333333 48.888889 100.000000
vector1<- c(2,4,6)
vector2<- c(1,2)
vector3<-c(vector1*vector2[1],vector1*vector2[2])
vector3
## [1] 2 4 6 4 8 12
vector3[2:5]<-c(-.1,-100)
#binding matrix
mat2<- matrix(data=c(1:12),nrow=4,ncol=4,byrow=FALSE)
## Warning in matrix(data = c(1:12), nrow = 4, ncol = 4, byrow = FALSE): data
## length differs from size of matrix: [12 != 4 x 4]
mat3<- rbind(mat2) ##row binding
View(mat3)
mat4<- cbind(mat2)
mat4
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 1
## [2,] 2 6 10 2
## [3,] 3 7 11 3
## [4,] 4 8 12 4
#Matrix Dimensions
##dimension of a matrix
dim(mat2)
## [1] 4 4
dim(mat3)
## [1] 4 4
dim(mat4)
## [1] 4 4
nrow(mat2)
## [1] 4
ncol(mat2)
## [1] 4
#subsetting,row,column and diagonal extractions
##first let us create a matrix of 4 by 4
mat5<- matrix(seq(1:20),4,4,byrow = TRUE)
## Warning in matrix(seq(1:20), 4, 4, byrow = TRUE): data length differs from size
## of matrix: [20 != 4 x 4]
mat5[1,4]=0
mat5[1,1]=1
mat5[1,2]=2
mat5[1,3]=3
mat5[1,4]=4
mat5[4,] #row extraction
## [1] 13 14 15 16
mat5[,4] #column extraction
## [1] 4 8 12 16
mat5[2:3,1] ##extracting values in a specific row and column
## [1] 5 9
mat5[1:2,2:4]
## [,1] [,2] [,3]
## [1,] 2 3 4
## [2,] 6 7 8
diag(mat5)
## [1] 1 6 11 16
##Ommiting and overwriting
mat5[-1,-2]
## [,1] [,2] [,3]
## [1,] 5 7 8
## [2,] 9 11 12
## [3,] 13 15 16
mat6<- mat5
mat6[2,]<- 5:8
mat6[3,]<-mat6[,3]
View(mat6)
#Exercises
##constucting a matrix
mat7<- matrix(c(4.3,3.1,8.2,8.2,3.2,.9,1.6,6.5),4,2)
mat8<-mat7[-1,]
dim(mat8)
## [1] 3 2
mat7[,2]<-sort(mat7[,2], decreasing = FALSE) ##sorting the column of the matrix
mat8<- mat7[-1,-1]
mat8
## [1] 1.6 3.2 6.5
mat7 ##deleting the rowc1 and first item in column 1
## [,1] [,2]
## [1,] 4.3 0.9
## [2,] 3.1 1.6
## [3,] 8.2 3.2
## [4,] 8.2 6.5
ele<-mat7[c(1,2),] ##storing elements as 2 by 2 matrix
dim(ele)
## [1] 2 2
mat7[c(4,1),c(2,1)]= -1/2*diag(ele)
mat8<-mat7
mat8
## [,1] [,2]
## [1,] -0.80 -0.80
## [2,] 3.10 1.60
## [3,] 8.20 3.20
## [4,] -2.15 -2.15
#Turning variables to other factors
library(readxl)
str(Students) ##to check type of variables
## tibble [30 × 14] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:30] 1 2 3 4 5 6 7 8 9 10 ...
## $ Last Name : chr [1:30] "DOE01" "DOE02" "DOE01" "DOE02" ...
## $ First Name : chr [1:30] "JANE01" "JANE02" "JOE01" "JOE02" ...
## $ City : chr [1:30] "Los Angeles" "Sedona" "Lilongwe" "Lackawana" ...
## $ State : chr [1:30] "California" "Arizona" "New York" "New York" ...
## $ Gender : chr [1:30] "Female" "Female" "Male" "Male" ...
## $ Student Status : chr [1:30] "Graduate" "Undergraduate" "Graduate" "Graduate" ...
## $ Major : chr [1:30] "Politics" "Math" "Math" "Econ" ...
## $ Country : chr [1:30] "US" "US" "US" "US" ...
## $ Age : num [1:30] 30 19 26 33 37 25 39 21 18 33 ...
## $ SAT : num [1:30] 2263 2006 2221 1716 1701 ...
## $ Average score (grade) : num [1:30] 67 63 78.1 77.8 65 ...
## $ Height (in) : num [1:30] 61 64 73 68 71 67 70 62 62 66 ...
## $ Newspaper readership (times/wk): num [1:30] 5 7 6 3 6 5 5 5 6 5 ...
##setting varibales into numbers so that R should recognize them as categorical varables
Students$Gender<- as.factor(Students$Gender)
Students$`Student Status`<- as.factor(Students$`Student Status`)
Students$Major<- as.factor(Students$Major)
Students$Country<- as.factor(Students$Country)
##Package used for visualization in R is ggplot2 #we want to plot some graphs
hist(Students$`Height (in)`,col = rainbow(7),main = "Students Height",xlab = "Height",ylab="Freq")##This is for continous data
###we can also go to help which is at the RHS parrallel to the console window
library(ggplot2)
barplot(Students$Age,width=10)
plot(Students$Age~Students$`Height (in)`)
scatter.smooth(Students$`Height (in)`~Students$Age,col=rainbow(7))
boxplot(Students$Age~Students$Major)
pairs(Students[,c("Age","Gender","Height (in)","Major","SAT")])
ggplot(data=Students)+geom_bar(mapping=aes(x=Country,col="red"))
#Class Imbalance:and expoting data
##Inferencing the data
t.test(Students$Age,mu=20,alternative = "greater")
##
## One Sample t-test
##
## data: Students$Age
## t = 4.1457, df = 29, p-value = 0.0001345
## alternative hypothesis: true mean is greater than 20
## 95 percent confidence interval:
## 23.06874 Inf
## sample estimates:
## mean of x
## 25.2
shapiro.test(Students$Age) # Ho normal distribution H1 not ormally distributed
##
## Shapiro-Wilk normality test
##
## data: Students$Age
## W = 0.87053, p-value = 0.001722
t.test(Students$Age~Students$Gender,var.equal=TRUE)
##
## Two Sample t-test
##
## data: Students$Age by Students$Gender
## t = -1.6403, df = 28, p-value = 0.1121
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
## -8.9952023 0.9952023
## sample estimates:
## mean in group Female mean in group Male
## 23.2 27.2
var.test((Students$Age~Students$Gender))
##
## F test to compare two variances
##
## data: Students$Age by Students$Gender
## F = 0.94396, num df = 14, denom df = 14, p-value = 0.9156
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3169154 2.8116679
## sample estimates:
## ratio of variances
## 0.9439601
chisq.test(Students$Gender,Students$Major)
##
## Pearson's Chi-squared test
##
## data: Students$Gender and Students$Major
## X-squared = 5.6, df = 2, p-value = 0.06081
cor(Students$Age,Students$`Height (in)`)
## [1] 0.06615254
cor(Students[,c("Age","Height (in)")])
## Age Height (in)
## Age 1.00000000 0.06615254
## Height (in) 0.06615254 1.00000000
cor.test(Students$`Average score (grade)`,Students$Age,method="spearman")
## Warning in cor.test.default(Students$`Average score (grade)`, Students$Age, :
## cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: Students$`Average score (grade)` and Students$Age
## S = 4516.1, p-value = 0.9803
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.0047044
Genderlm<- lm(Students$`Height (in)`~Students$Age)
multilm<- (Students$`Height (in)`~Students$Age+Students$Gender)
summary(multilm)
## Length Class Mode
## 3 formula call
summary(Genderlm)
##
## Call:
## lm(formula = Students$`Height (in)` ~ Students$Age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0075 -3.7496 -0.1038 3.7649 8.3514
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65.30294 3.33594 19.576 <2e-16 ***
## Students$Age 0.04486 0.12786 0.351 0.728
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.731 on 28 degrees of freedom
## Multiple R-squared: 0.004376, Adjusted R-squared: -0.03118
## F-statistic: 0.1231 on 1 and 28 DF, p-value: 0.7284
library(readxl)
#using tapply function to calculate descriptive statistics
# Load the chickwts dataset
data(chickwts)
# Calculate the variances for each feed type
variances <- tapply(chickwts$weight, INDEX = chickwts$feed, FUN = var)
# Identify the feed type with the maximum variance
max_variance_feed <- names(which.max(variances))
# Print the feed type with the maximum variance and its variance
print(paste("Feed type with maximum variance:", max_variance_feed))
## [1] "Feed type with maximum variance: meatmeal"
print(paste("Maximum variance:", max(variances)))
## [1] "Maximum variance: 4212.09090909091"
#Basic Statistics
#to filter the dataset
data(quakes)
dep<- quakes |>filter(quakes$depth>299)
#to find the interquartile range
IQR(dep$depth)
## [1] 101
##to calculate descriptive statistics
des=tapply(students$`Average score (grade)`,INDEX = students$City,FU=summary)
des ##displaying the results
## $Acme
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 79.5 79.5 79.5 79.5 79.5 79.5
##
## $Amsterdam
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 75 75 75 75 75 75
##
## $Beijing
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 79 79 79 79 79 79
##
## $`Buenos Aires`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 85 85 85 85 85 85
##
## $Caracas
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 92 92 92 92 92 92
##
## $Cimax
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 95.88 95.88 95.88 95.88 95.88 95.88
##
## $Defiance
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 65 65 65 65 65 65
##
## $`Drunkard Creek`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 82.39 82.39 82.39 82.39 82.39 82.39
##
## $Embarrass
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 95.84 95.84 95.84 95.84 95.84 95.84
##
## $`Hot Coffe`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 81.53 81.53 81.53 81.53 81.53 81.53
##
## $Intercourse
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 88 88 88 88 88 88
##
## $Java
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 78.94 78.94 78.94 78.94 78.94 78.94
##
## $Lackawana
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 77.81 77.81 77.81 77.81 77.81 77.81
##
## $Liberal
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 87 87 87 87 87 87
##
## $Lilongwe
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 78.11 78.11 78.11 78.11 78.11 78.11
##
## $Loco
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 64 64 64 64 64 64
##
## $`Los Angeles`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 67 67 67 67 67 67
##
## $`Mexican Hat`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 80 80 80 80 80 80
##
## $Mexico
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 95.42 95.42 95.42 95.42 95.42 95.42
##
## $Montreal
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 91 91 91 91 91 91
##
## $Moscow
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 70.28 70.28 70.28 70.28 70.28 70.28
##
## $`New York`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 71.00 73.75 76.50 76.50 79.25 82.00
##
## $Remote
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 67 67 67 67 67 67
##
## $`San Juan`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 95 95 95 95 95 95
##
## $Sedona
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 63 63 63 63 63 63
##
## $Stockholm
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 88 88 88 88 88 88
##
## $`Tel Aviv`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 69 69 69 69 69 69
##
## $`The X`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 89 89 89 89 89 89
##
## $Varna
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 79.34 79.34 79.34 79.34 79.34 79.34
vari<-which.max(tapply(students$Age,INDEX = students$Major,FU=mean))
names(vari)
## [1] "Politics"
#playing with dataset
data(quakes)
plot(quakes$long,quakes$lat,xlab = "Longitude",ylab = "Latitude",main = "The Graph Of Quakes",col=rainbow(7))
data("chickwts")
table(chickwts$feed)
##
## casein horsebean linseed meatmeal soybean sunflower
## 12 10 12 11 14 12
##managing and analyzing the dataset of mtcars
data("mtcars")
require(graphics)
pairs(mtcars, main = "mtcars data", gap = 1/4)
coplot(mpg ~ disp | as.factor(cyl), data = mtcars,
panel = panel.smooth, rows = 1)
## possibly more meaningful, e.g., for summary() or bivariate plots:
mtcars2 <- within(mtcars, {
vs <- factor(vs, labels = c("V", "S"))
am <- factor(am, labels = c("automatic", "manual"))
cyl <- ordered(cyl)
gear <- ordered(gear)
carb <- ordered(carb)
})
summary(mtcars2)
## mpg cyl disp hp drat
## Min. :10.40 4:11 Min. : 71.1 Min. : 52.0 Min. :2.760
## 1st Qu.:15.43 6: 7 1st Qu.:120.8 1st Qu.: 96.5 1st Qu.:3.080
## Median :19.20 8:14 Median :196.3 Median :123.0 Median :3.695
## Mean :20.09 Mean :230.7 Mean :146.7 Mean :3.597
## 3rd Qu.:22.80 3rd Qu.:326.0 3rd Qu.:180.0 3rd Qu.:3.920
## Max. :33.90 Max. :472.0 Max. :335.0 Max. :4.930
## wt qsec vs am gear carb
## Min. :1.513 Min. :14.50 V:18 automatic:19 3:15 1: 7
## 1st Qu.:2.581 1st Qu.:16.89 S:14 manual :13 4:12 2:10
## Median :3.325 Median :17.71 5: 5 3: 3
## Mean :3.217 Mean :17.85 4:10
## 3rd Qu.:3.610 3rd Qu.:18.90 6: 1
## Max. :5.424 Max. :22.90 8: 1
##Drawing Bar plot
barplot(mtcars$cyl,col=rainbow(7),main="The Graph Of MtCars$Cyl")
# Formula method
barplot(GNP ~ Year, data = longley)
barplot(cbind(Employed, Unemployed) ~ Year, data = longley)
## 3rd form of formula - 2 categories :
op <- par(mfrow = 2:1, mgp = c(3,1,0)/2, mar = .1+c(3,3:1))
summary(d.Titanic <- as.data.frame(Titanic))
## Class Sex Age Survived Freq
## 1st :8 Male :16 Child:16 No :16 Min. : 0.00
## 2nd :8 Female:16 Adult:16 Yes:16 1st Qu.: 0.75
## 3rd :8 Median : 13.50
## Crew:8 Mean : 68.78
## 3rd Qu.: 77.00
## Max. :670.00
barplot(Freq ~ Class + Survived, data = d.Titanic,
subset = Age == "Adult" & Sex == "Male",
main = "barplot(Freq ~ Class + Survived, *)", ylab = "# {passengers}", legend.text = TRUE)
# Corresponding table :
(xt <- xtabs(Freq ~ Survived + Class + Sex, d.Titanic, subset = Age=="Adult"))
## , , Sex = Male
##
## Class
## Survived 1st 2nd 3rd Crew
## No 118 154 387 670
## Yes 57 14 75 192
##
## , , Sex = Female
##
## Class
## Survived 1st 2nd 3rd Crew
## No 4 13 89 3
## Yes 140 80 76 20
# Alternatively, a mosaic plot :
mosaicplot(xt[,,"Male"], main = "mosaicplot(Freq ~ Class + Survived, *)", color=TRUE)
par(op)
# Default method
require(grDevices) # for colours
tN <- table(Ni <- stats::rpois(100, lambda = 5))
r <- barplot(tN, col = rainbow(20))
#- type = "h" plotting *is* 'bar'plot
lines(r, tN, type = "h", col = "red", lwd = 2)
barplot(tN, space = 1.5, axisnames = FALSE,
sub = "barplot(..., space= 1.5, axisnames = FALSE)")
barplot(VADeaths, plot = FALSE)
## [1] 0.7 1.9 3.1 4.3
barplot(VADeaths, plot = FALSE, beside = TRUE)
## [,1] [,2] [,3] [,4]
## [1,] 1.5 7.5 13.5 19.5
## [2,] 2.5 8.5 14.5 20.5
## [3,] 3.5 9.5 15.5 21.5
## [4,] 4.5 10.5 16.5 22.5
## [5,] 5.5 11.5 17.5 23.5
mp <- barplot(VADeaths) # default
tot <- colMeans(VADeaths)
text(mp, tot + 3, format(tot), xpd = TRUE, col = "blue")
barplot(VADeaths, beside = TRUE,
col = c("lightblue", "mistyrose", "lightcyan",
"lavender", "cornsilk"),
legend.text = rownames(VADeaths), ylim = c(0, 100))
title(main = "Death Rates in Virginia", font.main = 4)
hh <- t(VADeaths)[, 5:1]
mybarcol <- "gray20"
mp <- barplot(hh, beside = TRUE,
col = c("lightblue", "mistyrose",
"lightcyan", "lavender"),
legend.text = colnames(VADeaths), ylim = c(0,100),
main = "Death Rates in Virginia", font.main = 4,
sub = "Faked upper 2*sigma error bars", col.sub = mybarcol,
cex.names = 1.5)
segments(mp, hh, mp, hh + 2*sqrt(1000*hh/100), col = mybarcol, lwd = 1.5)
stopifnot(dim(mp) == dim(hh)) # corresponding matrices
mtext(side = 1, at = colMeans(mp), line = -2,
text = paste("Mean", formatC(colMeans(hh))), col = "red")
# Bar shading example
barplot(VADeaths, angle = 15+10*1:5, density = 20, col = "black",
legend.text = rownames(VADeaths))
title(main = list("Death Rates in Virginia", font = 4))
# Border color
barplot(VADeaths, border = "dark blue")
# Log scales (not much sense here)
barplot(tN, col = heat.colors(12), log = "y")
barplot(tN, col = gray.colors(20), log = "xy")
# Legend location
barplot(height = cbind(x = c(465, 91) / 465 * 100,
y = c(840, 200) / 840 * 100,
z = c(37, 17) / 37 * 100),
beside = FALSE,
width = c(465, 840, 37),
col = c(1, 2),
legend.text = c("A", "B"),
args.legend = list(x = "topleft"))
pie(table(mtcars$cyl),labels=c("V4","V6","V8"),
col=c("white","gray","black"),main="Performance cars by cylinders")
#Some graphs
pie(students$Age,edges=50,radius = 1,col = rainbow(7),main = "The Pie Chart Showing Countries",density = -4,)
hist(table(students$`Height (in)`),col = rainbow(7),main = "Height By Frequencies",ylab ="Frequency",xlab="Height")
qplot(mtcars$hp,geom="blank",main="Horsepower",xlab="HP")+geom_histogram(color="black",fill="white",breaks=seq(0,400,25),closed="right")+geom_vline(mapping=aes(xintercept=c(mean(mtcars$hp),median(mtcars$hp)),linetype=factor(c("mean","median"))),show.legend=TRUE)+ scale_linetype_manual(values=c(2,3)) +labs(linetype="")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(readxl)
pairs(students[,10:12],col=rainbow(7),cex=0.75,main="Multiple Scatterplots")
data("InsectSprays")
hist(InsectSprays$count,col = rainbow(7),main = "Graph Showing Counts",ylab = "Frequency",xlab = "Counts",cex=0.75)
mu <- -3.42
sigma <- 0.2
mu.minus.1sig <- mu-sigma
mu.minus.1sig
## [1] -3.62
mu.plus.1sig <- mu+sigma
mu.plus.1sig
## [1] -3.22
pnorm(q=mu.plus.1sig,mean=mu,sd=sigma) -
pnorm(q=mu.minus.1sig,mean=mu,sd=sigma)
## [1] 0.6826895
xvals <- seq(-5,-2,length=300)
fx <- dnorm(xvals,mean=mu,sd=sigma)
plot(xvals,fx,type="l",col=rainbow(7),xlim=c(-4.4,-2.5),main="N(-3.42,0.2) distribution",
xlab="x",ylab="f(x)")
abline(h=0,col="green")
abline(v=c(mu.plus.1sig,mu.minus.1sig),lty=3:2)
legend("topleft",legend=c("-3.62\n(mean - 1 sd)","\n-3.22\n(mean + 1 sd)"),
lty=2:3,bty="n")
hist(chickwts$weight,main="",xlab="weight")
qqnorm(chickwts$weight,main="Normal QQ plot of weights")
qqline(chickwts$weight,col="blue")
##importing another data
library(readxl)
Data1 <- read_excel("D:/Computer Programming/Data/Data1.xlsx")
tapply(Data1$income_level,Data1$District, FU=var)
## Kandreho Maevatanana Tsaratanana
## 1425855253 1299280192 1251476547
multdata<-matrix(c(6,9,10,6,8,3),nrow=3,ncol = 2,byrow = TRUE)
mu_0<- matrix(c(9,5),nrow = 2,ncol = 1,byrow = TRUE)
n= 3
p=2
standardde<- matrix(c(sd(multdata[,1]),-1*cov(multdata[,1],multdata[,2]),cov(multdata[,1],multdata[,2]),sd(multdata[,2]),nrow=2))
standarddev1<- matrix(c(2,rep(-3,2),3),nrow= 2, ncol=2,byrow= TRUE)
standarddev2<- matrix(c(4,-1*rep(standarddev1[2:2,1:1],2),9),nrow= 2, ncol=2,byrow= TRUE)
d<-sd(multdata[,1])**2 * sd(multdata[,2])**2-(cov(multdata[,1],multdata[,2])**2)
matrix_transponse<-1/d *(standarddev2)
mu_1<-matrix(c(mean(multdata[,1]),mean(multdata[,2])))
diffe<- matrix(mu_1-mu_0)
diffe
## [,1]
## [1,] -1
## [2,] 1