A M M A M M E M O A E E M A O E M A M A M A O A M E E M A M
Ans.
Majors = data.frame(Majors=c("A","M","M","A","M","M","E","M","O","A","E","E","M","A","O","E","M","A","M","A","M","A","O","A","M","E","E","M","A","M"))
frequency = table(Majors)
relativefrequency = frequency / nrow(Majors)
frequencydistribution = cbind(frequency)
relativefrequencydistribution = cbind(relativefrequency)
sink("Results.txt")
frequencydistribution
## frequency
## A 9
## E 6
## M 12
## O 3
relativefrequencydistribution
## relativefrequency
## A 0.3
## E 0.2
## M 0.4
## O 0.1
sink()
write.csv(Majors,"Majors.csv")
barplot(frequency)
barplot(frequency,xlab = "Majors",ylab = "Frequency",col= c('Red','Blue','Green','Yellow' ))
lbls = paste(names(frequency), "\n", sep="")
pct = round(frequency/sum(frequency)*100)
lbls = paste(lbls, pct) # add percents to labels
lbls = paste(lbls,"%",sep="") # ad % to labels
pie(frequency,labels = lbls, col=rainbow(length(frequency)))
3 12 4 7 14 6 2 9 11
Ans.
library(psych)
data_set = data.frame(Age = c(3,12,4,7,14,6,2,9,11))
summary(data_set$Age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 4.000 7.000 7.556 11.000 14.000
fivenum(data_set$Age)
## [1] 2 4 7 11 14
describe(data_set$Age)
A B A B C
C C B B B
B A B B B
C B C B A
Ans.
Grade = c("A","C","B","C","B","C","A","B","A","B","B","C","B","B","B","B","C","B","B","A")
dataset = data.frame(Grade)
frequency = table(dataset)
frequencydistribution = data.frame( cbind(frequency))
frequencydistribution
lbls = paste(row.names(frequencydistribution),frequency, "\n", sep="")
barplot(table(dataset),legend.text = lbls,col=rainbow(length(frequency)),args.legend = lbls)
Ans.
Grade = c("A","C","B","C","B","C","A","B","A","B","B","C","B","B","B","B","C","B","B","A")
dataset = data.frame(Grade)
frequency = table(dataset)
lbls = paste(names(frequency), "\n", sep="")
pct = round(frequency/sum(frequency)*100)
lbls = paste(lbls, pct) # add percents to labels
lbls = paste(lbls,"%",sep="") # ad % to labels
pie(frequency,labels = lbls, col=rainbow(length(frequency)))
Ans.
library(psych)
Faculty = c(1:8)
Age = c(42,30,73,50,51,37,42,59)
dataset = data.frame(Faculty,Age)
summary(dataset$Age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30.00 40.75 46.00 48.00 53.00 73.00
fivenum(dataset$Age)
## [1] 30.0 39.5 46.0 55.0 73.0
describe(dataset$Age)
## Course Credithours Grade
## 1 Chemisry 5 C
## 2 Calculus 5 A
## 3 English 4 C
## 4 Music 3 F
## 5 PE 1 A
The local university uses a 4 point grading system, i.e., A = 4, B = 3, C = 2, D = 1, F = 0. Compute Marissa’s semester grade point average.
Course = c("Chemisry","Calculus","English","Music","PE")
Credithours = c(5,5,4,3,1)
Grade = c("C","A","C","F","A")
Report = data.frame(Course,Credithours,Grade)
equi = function(grade) {
if(grade == "A")
grade = 4
if(grade == "B")
grade = 3
if(grade == "C")
grade = 2
if(grade == "D")
grade = 1
if(grade == "F")
grade = 0
grade
}
Report$gradenumeric = lapply(Report$Grade,equi)
cgpa = function(Credithours,Grade){
score = Credithours * Grade
score
}
Report$gpa = mapply(cgpa,Report$Credithours,Report$gradenumeric)
gpacalc = sum(Report$gpa)/sum(Report$Credithours)
cat(sprintf("\n Grade Point Average = %f\n",gpacalc))
##
## Grade Point Average = 2.333333
## x y
## 1 2 7
## 2 6 19
## 3 3 9
## 4 5 17
## 5 4 11
A positive relationship between x and y appears to exist as per the scatterplot below.
x = c(2,6,3,5,4)
y = c(7,19,9,17,11)
dataset = data.frame(x,y)
plot(dataset,col = "blue")
pairs(~x+y,data=dataset,
main="Simple Scatterplot Matrix of x and y")
## x y
## 1 8 4
## 2 5 5
## 3 3 9
## 4 2 12
## 5 1 14
A negative relationship between x and y appears to exist as per the scatterplot below.
x = c(8,5,3,2,1)
y = c(4,5,9,12,14)
dataset = data.frame(x,y)
plot(dataset,col = "red")
pairs(~x+y,data=dataset,
main="Simple Scatterplot Matrix of x and y")
library(datasets)
library(corrgram)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust gclus
data("iris")
ds = iris
head(iris)
boxplot(ds$Sepal.Length ~ ds$Species,col = "red", xlab = "Species", ylab = "Sepal Length")
boxplot(ds$Sepal.Width ~ ds$Species,col = "blue",xlab = "Species",ylab = "Sepal Width")
boxplot(ds$Petal.Length ~ ds$Species,col = "green",xlab = "Species",ylab = "Petal Length")
boxplot(ds$Petal.Width ~ ds$Species,col = "orange",xlab = "Species", ylab = "Petal Width")
cat(sprintf("\n From the boxplots, it can be concluded that Petal Width differentiates the different species of the flower.\n"))
##
## From the boxplots, it can be concluded that Petal Width differentiates the different species of the flower.
corrgram(ds,order = TRUE,lower.panel = panel.shade,upper.panel = panel.pie,main ="Corrgram")
cat(sprintf("\nFrom the corrgram , it can be concluded that Petal Width and Petal Length are highly positively correlated.\n"))
##
## From the corrgram , it can be concluded that Petal Width and Petal Length are highly positively correlated.
hist(ds$Sepal.Length,xlab = "Sepal Length",col = "red")
hist(ds$Sepal.Width,xlab = "Sepal Width",col = "blue")
hist(ds$Petal.Length,xlab = "Petal Length",col = "green")
hist(ds$Petal.Width,xlab = "Petal Width",col = "orange")
##
## Cell Contents
## |-------------------------|
## | Count |
## |-------------------------|
##
## Total Observations in Table: 300
##
## | Smoking
## Disease | Non-Smokers | Smokers | Row Total |
## -------------|-------------|-------------|-------------|
## Emphysema | 20 | 60 | 80 |
## -------------|-------------|-------------|-------------|
## HeartProblem | 70 | 80 | 150 |
## -------------|-------------|-------------|-------------|
## Cancer | 30 | 40 | 70 |
## -------------|-------------|-------------|-------------|
## Column Total | 120 | 180 | 300 |
## -------------|-------------|-------------|-------------|
##
##
## NULL
We are interested in visualizing whether or not illness is independent of smoking.
datamatrix = matrix(c(20,70,30,60,80,40),nrow = 3,ncol = 2)
row.names(datamatrix) = c("Emphysema","HeartProblem","Cancer")
colnames(datamatrix) = c("Non-Smokers","Smokers")
prop.table(datamatrix)
## Non-Smokers Smokers
## Emphysema 0.06666667 0.2000000
## HeartProblem 0.23333333 0.2666667
## Cancer 0.10000000 0.1333333
assocplot(datamatrix,col = c("green","red"))
From the association plot , it can be concluded that illness is not independent of smoking. Green colour indicates positive association between the corresponding categories whereas Red colour indicates negative association between the corresponding categories.
Develop a percent frequency distribution.
Ans.
Major = c("Accounting", "Finance","Management","Marketing")
Number_of_Students = c(240,160,320,80)
dataset = data.frame(Major, Number_of_Students)
dataset$Percentage_Students = dataset$Number_of_Students/sum(dataset$Number_of_Students)*100
dataset