data()

Introduction In this assignment, I explore the Iris dataset (Edgar Anderson’s Iris Data), which contains information about the measurements of different Iris flowers.

Variables:

Sepal.Length: Sepal length in centimeters (Numeric)

Sepal.Width: Sepal width in centimeters (Numeric)

Petal.Length: Petal length in centimeters (Numeric)

Petal.Width: Petal width in centimeters (Numeric)

Species: Species of iris flower (Setosa, Versicolor, Virginica) (Categorical)

install.packages("tidyverse")
Error in install.packages : Updating loaded packages
library(tidyverse) # Load the tidyverse package for data manipulation and visualization
library(ggplot2) # Load ggplot2 for creating plots

# Load the dataset (built into R)
data(iris)

# View the first few rows to understand the data
head(iris)

Scatter Plot

# Convert 'Species' to a factor (it should already be a factor, but I did it just for clarity)
iris$Species <- as.factor(iris$Species)
# Scatter plot of Sepal.Length vs Sepal.Width
plot(iris$Sepal.Length, iris$Sepal.Width, 
     xlab = "Sepal Length", ylab = "Sepal Width", 
     main = "Scatter Plot of Sepal Length vs Sepal Width",
     col = iris$Species, pch = 19)
legend("topright", legend = levels(iris$Species), col = 1:3, pch = 19)

Line Graph

library(dplyr)
library(ggplot2)

# Add an index column to the iris dataset to represent row numbers
iris_line <- iris %>% mutate(index = row_number())

# Create the line graph of Sepal.Length by index (row number)
ggplot(iris_line, aes(x = index, y = Sepal.Length)) +
  geom_line() +  # Add a line
  labs(title = "Sepal Length by Index", x = "Index", y = "Sepal Length")  # Add plot labels

Horizontal Bar Chart

# Calculate the average Sepal.Length for each species
avg_sepal_length_by_species <- iris %>% 
  group_by(Species) %>% 
  summarize(avg_sepal_length = mean(Sepal.Length))

# Create a horizontal bar chart of average Sepal Length by Species
ggplot(avg_sepal_length_by_species, aes(y = Species, x = avg_sepal_length)) +
  geom_bar(stat = 'identity', fill = "lightblue") +  # Create bars based on average Sepal Length
  labs(title = "Average Sepal Length by Species", y = "Species", x = "Average Sepal Length")  # Add plot labels

Stacked Bar Chart

library(tidyr)

# Calculate average values for each measurement and pivot the data into a long format
bar_data_iris <- iris %>% 
  group_by(Species) %>% 
  summarize(
    Sepal.Length = mean(Sepal.Length), 
    Sepal.Width = mean(Sepal.Width),
    Petal.Length = mean(Petal.Length),
    Petal.Width = mean(Petal.Width)
  ) %>% 
  pivot_longer(cols = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"), 
               names_to = "Measurement", values_to = "Average")

# Create a stacked bar chart of average Sepal and Petal measurements by Species
ggplot(bar_data_iris, aes(x = Species, fill = Measurement, y = Average)) +
  geom_bar(stat = "identity") +  # Create bars based on the calculated averages
  labs(title = "Average Measurements by Species", x = "Species", y = "Average Measurement")  # Add plot labels

Conclusion

This notebook demonstrates various data visualization techniques using the Iris dataset which is a public dataset built into R.

LS0tCnRpdGxlOiAiQ2xhc3MgQWN0aXZpdHkgMzogSXJpcyBEYXRhc2V0IFZpc3VhbGl6YXRpb24iCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KYGBge3J9CmRhdGEoKQpgYGAKSW50cm9kdWN0aW9uCkluIHRoaXMgYXNzaWdubWVudCwgSSBleHBsb3JlIHRoZSBJcmlzIGRhdGFzZXQgKEVkZ2FyIEFuZGVyc29uJ3MgSXJpcyBEYXRhKSwgd2hpY2ggY29udGFpbnMgaW5mb3JtYXRpb24gYWJvdXQgdGhlIG1lYXN1cmVtZW50cyBvZiBkaWZmZXJlbnQgSXJpcyBmbG93ZXJzLgoKVmFyaWFibGVzOgoKU2VwYWwuTGVuZ3RoOiBTZXBhbCBsZW5ndGggaW4gY2VudGltZXRlcnMgKE51bWVyaWMpCgpTZXBhbC5XaWR0aDogU2VwYWwgd2lkdGggaW4gY2VudGltZXRlcnMgKE51bWVyaWMpCgpQZXRhbC5MZW5ndGg6IFBldGFsIGxlbmd0aCBpbiBjZW50aW1ldGVycyAoTnVtZXJpYykKClBldGFsLldpZHRoOiBQZXRhbCB3aWR0aCBpbiBjZW50aW1ldGVycyAoTnVtZXJpYykKClNwZWNpZXM6IFNwZWNpZXMgb2YgaXJpcyBmbG93ZXIgKFNldG9zYSwgVmVyc2ljb2xvciwgVmlyZ2luaWNhKSAoQ2F0ZWdvcmljYWwpCmBgYHtyfQppbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKQpsaWJyYXJ5KHRpZHl2ZXJzZSkgIyBMb2FkIHRoZSB0aWR5dmVyc2UgcGFja2FnZSBmb3IgZGF0YSBtYW5pcHVsYXRpb24gYW5kIHZpc3VhbGl6YXRpb24KbGlicmFyeShnZ3Bsb3QyKSAjIExvYWQgZ2dwbG90MiBmb3IgY3JlYXRpbmcgcGxvdHMKCiMgTG9hZCB0aGUgZGF0YXNldCAoYnVpbHQgaW50byBSKQpkYXRhKGlyaXMpCgojIFZpZXcgdGhlIGZpcnN0IGZldyByb3dzIHRvIHVuZGVyc3RhbmQgdGhlIGRhdGEKaGVhZChpcmlzKQpgYGAKU2NhdHRlciBQbG90CmBgYHtyfQojIENvbnZlcnQgJ1NwZWNpZXMnIHRvIGEgZmFjdG9yIChpdCBzaG91bGQgYWxyZWFkeSBiZSBhIGZhY3RvciwgYnV0IEkgZGlkIGl0IGp1c3QgZm9yIGNsYXJpdHkpCmlyaXMkU3BlY2llcyA8LSBhcy5mYWN0b3IoaXJpcyRTcGVjaWVzKQojIFNjYXR0ZXIgcGxvdCBvZiBTZXBhbC5MZW5ndGggdnMgU2VwYWwuV2lkdGgKcGxvdChpcmlzJFNlcGFsLkxlbmd0aCwgaXJpcyRTZXBhbC5XaWR0aCwgCiAgICAgeGxhYiA9ICJTZXBhbCBMZW5ndGgiLCB5bGFiID0gIlNlcGFsIFdpZHRoIiwgCiAgICAgbWFpbiA9ICJTY2F0dGVyIFBsb3Qgb2YgU2VwYWwgTGVuZ3RoIHZzIFNlcGFsIFdpZHRoIiwKICAgICBjb2wgPSBpcmlzJFNwZWNpZXMsIHBjaCA9IDE5KQpsZWdlbmQoInRvcHJpZ2h0IiwgbGVnZW5kID0gbGV2ZWxzKGlyaXMkU3BlY2llcyksIGNvbCA9IDE6MywgcGNoID0gMTkpCmBgYApMaW5lIEdyYXBoCmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KGdncGxvdDIpCgojIEFkZCBhbiBpbmRleCBjb2x1bW4gdG8gdGhlIGlyaXMgZGF0YXNldCB0byByZXByZXNlbnQgcm93IG51bWJlcnMKaXJpc19saW5lIDwtIGlyaXMgJT4lIG11dGF0ZShpbmRleCA9IHJvd19udW1iZXIoKSkKCiMgQ3JlYXRlIHRoZSBsaW5lIGdyYXBoIG9mIFNlcGFsLkxlbmd0aCBieSBpbmRleCAocm93IG51bWJlcikKZ2dwbG90KGlyaXNfbGluZSwgYWVzKHggPSBpbmRleCwgeSA9IFNlcGFsLkxlbmd0aCkpICsKICBnZW9tX2xpbmUoKSArICAjIEFkZCBhIGxpbmUKICBsYWJzKHRpdGxlID0gIlNlcGFsIExlbmd0aCBieSBJbmRleCIsIHggPSAiSW5kZXgiLCB5ID0gIlNlcGFsIExlbmd0aCIpICAjIEFkZCBwbG90IGxhYmVscwpgYGAKSG9yaXpvbnRhbCBCYXIgQ2hhcnQKYGBge3J9CiMgQ2FsY3VsYXRlIHRoZSBhdmVyYWdlIFNlcGFsLkxlbmd0aCBmb3IgZWFjaCBzcGVjaWVzCmF2Z19zZXBhbF9sZW5ndGhfYnlfc3BlY2llcyA8LSBpcmlzICU+JSAKICBncm91cF9ieShTcGVjaWVzKSAlPiUgCiAgc3VtbWFyaXplKGF2Z19zZXBhbF9sZW5ndGggPSBtZWFuKFNlcGFsLkxlbmd0aCkpCgojIENyZWF0ZSBhIGhvcml6b250YWwgYmFyIGNoYXJ0IG9mIGF2ZXJhZ2UgU2VwYWwgTGVuZ3RoIGJ5IFNwZWNpZXMKZ2dwbG90KGF2Z19zZXBhbF9sZW5ndGhfYnlfc3BlY2llcywgYWVzKHkgPSBTcGVjaWVzLCB4ID0gYXZnX3NlcGFsX2xlbmd0aCkpICsKICBnZW9tX2JhcihzdGF0ID0gJ2lkZW50aXR5JywgZmlsbCA9ICJsaWdodGJsdWUiKSArICAjIENyZWF0ZSBiYXJzIGJhc2VkIG9uIGF2ZXJhZ2UgU2VwYWwgTGVuZ3RoCiAgbGFicyh0aXRsZSA9ICJBdmVyYWdlIFNlcGFsIExlbmd0aCBieSBTcGVjaWVzIiwgeSA9ICJTcGVjaWVzIiwgeCA9ICJBdmVyYWdlIFNlcGFsIExlbmd0aCIpICAjIEFkZCBwbG90IGxhYmVscwpgYGAKU3RhY2tlZCBCYXIgQ2hhcnQKYGBge3J9CmxpYnJhcnkodGlkeXIpCgojIENhbGN1bGF0ZSBhdmVyYWdlIHZhbHVlcyBmb3IgZWFjaCBtZWFzdXJlbWVudCBhbmQgcGl2b3QgdGhlIGRhdGEgaW50byBhIGxvbmcgZm9ybWF0CmJhcl9kYXRhX2lyaXMgPC0gaXJpcyAlPiUgCiAgZ3JvdXBfYnkoU3BlY2llcykgJT4lIAogIHN1bW1hcml6ZSgKICAgIFNlcGFsLkxlbmd0aCA9IG1lYW4oU2VwYWwuTGVuZ3RoKSwgCiAgICBTZXBhbC5XaWR0aCA9IG1lYW4oU2VwYWwuV2lkdGgpLAogICAgUGV0YWwuTGVuZ3RoID0gbWVhbihQZXRhbC5MZW5ndGgpLAogICAgUGV0YWwuV2lkdGggPSBtZWFuKFBldGFsLldpZHRoKQogICkgJT4lIAogIHBpdm90X2xvbmdlcihjb2xzID0gYygiU2VwYWwuTGVuZ3RoIiwgIlNlcGFsLldpZHRoIiwgIlBldGFsLkxlbmd0aCIsICJQZXRhbC5XaWR0aCIpLCAKICAgICAgICAgICAgICAgbmFtZXNfdG8gPSAiTWVhc3VyZW1lbnQiLCB2YWx1ZXNfdG8gPSAiQXZlcmFnZSIpCgojIENyZWF0ZSBhIHN0YWNrZWQgYmFyIGNoYXJ0IG9mIGF2ZXJhZ2UgU2VwYWwgYW5kIFBldGFsIG1lYXN1cmVtZW50cyBieSBTcGVjaWVzCmdncGxvdChiYXJfZGF0YV9pcmlzLCBhZXMoeCA9IFNwZWNpZXMsIGZpbGwgPSBNZWFzdXJlbWVudCwgeSA9IEF2ZXJhZ2UpKSArCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpICsgICMgQ3JlYXRlIGJhcnMgYmFzZWQgb24gdGhlIGNhbGN1bGF0ZWQgYXZlcmFnZXMKICBsYWJzKHRpdGxlID0gIkF2ZXJhZ2UgTWVhc3VyZW1lbnRzIGJ5IFNwZWNpZXMiLCB4ID0gIlNwZWNpZXMiLCB5ID0gIkF2ZXJhZ2UgTWVhc3VyZW1lbnQiKSAgIyBBZGQgcGxvdCBsYWJlbHMKYGBgCkNvbmNsdXNpb24KClRoaXMgbm90ZWJvb2sgZGVtb25zdHJhdGVzIHZhcmlvdXMgZGF0YSB2aXN1YWxpemF0aW9uIHRlY2huaXF1ZXMgdXNpbmcgdGhlIElyaXMgZGF0YXNldCB3aGljaCBpcyBhIHB1YmxpYyBkYXRhc2V0IGJ1aWx0IGludG8gUi4=