## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
## [1] "numeric"
## [1] "numeric"
## [1] "numeric"
## [1] "numeric"
mtcars$cyl <- as.factor(mtcars$cyl)
mtcars$am <- as.factor(mtcars$am)
mtcars$vs <- as.factor(mtcars$vs)
mtcars$gear <- as.factor(mtcars$gear)
# Verify that their data type has been changed
class(mtcars$cyl)## [1] "factor"
## [1] "factor"
## [1] "factor"
## [1] "factor"
## [1] "4" "6" "8"
## [1] "0" "1"
## [1] "0" "1"
## [1] "3" "4" "5"
# Consider mtcars$vs. "vs" tracks whether a car has a V-shaped engine (vs=1) or not (vs=0)
# 1) Verify that column vs is a factor variable. Convert it to a factor variable, if necessary
class(mtcars$vs)## [1] "factor"
##
## 0 1
## 18 14
## [1] "0" "1"
# 4) Modify the levels of mtcars$vs
mtcars$vs <- factor(mtcars$vs, levels = c("1", "0"))
# 5) Check that the levels of vs have changed
levels(mtcars$vs)## [1] "1" "0"
##
## 1 0
## 14 18
##
## 0 1
## 19 13
##
## 0 1
## 0.59375 0.40625
##
## 0 1
## 59.4 40.6
## [1] "59.4%" "40.6%"
# 3b) Generate the Pie Chart
pie(tab2,
col = c("lightblue","red"),
labels = pielabels,
main = '% of cars by Transmission (am)',
cex = 1.1)
# 3c) Legend for the pie chart
legend("topright",
c("1","0"),
cex=0.8,
fill=rainbow(length(tab2)))library(plotly)
# 1) Create the Contingency Table for which you want a Pie Chart
tab1 <- table(mtcars$am)
tab2 = prop.table(tab1)
# 2) Create a dataframe containing the percentages %
percent <- round(tab2*100,1)
df = data.frame(percent)
df## Var1 Freq
## 1 0 59.4
## 2 1 40.6
# 1) Generate the Contingency Table with Proportions
tab1 <- table(mtcars$cyl)
tab2 <- prop.table(tab1)
tab3 <- round(tab2*100, 2)
tab3##
## 4 6 8
## 34.38 21.88 43.75
# 2) Generate the Bar Plot
bp <- barplot(tab3,
xlab = "cyl", ylab = "Percent (%)",
main = "% Cars by number of cylinders (cyl=4,6,8)",
col = c("yellow"),
beside = TRUE,
ylim = c(0, 60))
# 3) (Optional) Display the percentages on the Bar Plot
text(bp, 0, tab3, pos=3)# 1) Create the Contingency Table
tab1 <- table(mtcars$cyl)
tab2 <- prop.table(tab1)
tab3 <- round(tab2*100, 2)
tab4 <- data.frame(tab3)
tab4## Var1 Freq
## 1 4 34.38
## 2 6 21.88
## 3 8 43.75
# 2) Set the x axis and y axis for the Bar Chart
mtcars$cyl <- as.factor(mtcars$cyl) # Check that cyl is a factor
x <- levels(mtcars$cyl)
y <- tab4$Freq
# 3) (Optional) Set the labels for the bar chart
mytext <- c('4 cyl', '6 cyl', '8 cyl')
# 4) Create the dataframe for the bar chart
df <- data.frame(x, y, mytext)
#df
# 5) Generate the bar chart
fig <- plot_ly(df,
x = ~x,
y = ~y,
type = 'bar',
text = mytext,
textposition = 'auto',
marker = list(color = 'rgb(258,202,225)')
)
fig # 6) Display the bar chart# 1)
tab1 <- table(mtcars$am, mtcars$cyl)
tab2 <- prop.table(tab1)
tab3 <- round(tab2*100, 2)
# 2) Grouped bar-plot
bp <- barplot(tab3,
xlab = "cyl", ylab = "Percent (%)",
main = "% Cars by number of cylinders (cyl=4,6,8) and transmission (am=0,1)",
col = c("lightblue","orange"),
beside = TRUE,
ylim = c(0, 50),
legend = rownames(tab3))
# 3) (Optional) Display percentage on the bars
text(bp, 0, tab3, pos = 3)# 1) Create table of counts
counts <- table(mtcars$am, mtcars$cyl)
# 2) Generate a Stacked Bar plot
barplot(counts,
col = c("white","black"),
xlab = "cyl", ylab = "Counts",
main = "Count Cars by cylinders (cyl=4,6,8) and transmission (am=0,1)",
legend = rownames(counts))## The following object is masked from package:ggplot2:
##
## mpg
## am
## 0 1 Sum
## 19 13 32
## am
## 0 1
## 59.375 40.625
##
## Chi-squared test for given probabilities
##
## data: table_am
## X-squared = 0.0052083, df = 1, p-value = 0.9425
Since p value > 0.05 , we fail to reject the Null Hypothesis H0a
##
## Chi-squared test for given probabilities
##
## data: table_am
## X-squared = 8.5078, df = 1, p-value = 0.003536
Since p value < 0.05 , we reject the Null Hypothesis H0b
##
## Chi-squared test for given probabilities
##
## data: table_am
## X-squared = 1.125, df = 1, p-value = 0.2888
Since p value > 0.05 , we fail to reject the Null Hypothesis H0c
## cyl
## 4 6 8 Sum
## 11 7 14 32
## cyl
## 4 6 8
## 34.375 21.875 43.750
##
## Chi-squared test for given probabilities
##
## data: tablecyl
## X-squared = 2.3125, df = 2, p-value = 0.3147
Since p value > 0.05, we fail to reject the Null Hypothesis H0d