Load packages

library(ggplot2)

Load swiss dataset

data(“swiss”)

Exploratory data analysis

summary statistics

summary(swiss) #providing a summary of each variable in the dataset head(swiss) #view the first rows of the dataset

summary statistics on one variable (e.g., fertitlity)

mean(swiss\(Fertility) sd(swiss\)Fertility)

Exploratory data analysis

summary statistics

summary(swiss) #providing a summary of each variable in the dataset head(swiss) #view the first rows of the dataset

summary statistics on one variable (e.g., fertitlity)

mean(swiss\(Fertility) sd(swiss\)Fertility)

summary statistics on two variables (e.g., fertility and education)

mean(swiss\(Education) sd(swiss\)Education) mean(swiss\(Fertility) sd(swiss\)Fertility)

#plot the correlation between fertility and education cor(swiss\(Fertility, swiss\)Education) plot(swiss\(Fertility, swiss\)Education)

create a scatterplot between Fertility and Agriculture using ggplot2

ggplot(data = swiss, aes(x = Fertility, y = Agriculture)) + geom_point()

improve the visualization, add a title, impose minimal setting and change the color of the points to a more neutral one

ggplot(data = swiss, aes(x = Fertility, y = Agriculture)) + geom_point(color = “grey”) + theme_minimal() + labs(title = “Fertility and Agriculture in Switzerland”)

I want the dots to be blue for better visibility

ggplot(data = swiss, aes(x = Fertility, y = Agriculture)) + geom_point(color = “blue”) + theme_minimal() + labs(title = “Fertility and Agriculture in Switzerland”)

add the regression line to the plot to visualize a trend in the data

ggplot(data = swiss, aes(x = Fertility, y = Agriculture)) + geom_point(color = “blue”) + theme_minimal() + labs(title = “Fertility and Agriculture in Switzerland”) + geom_smooth(method = “lm”, se = FALSE)

make the line dashed

ggplot(data = swiss, aes(x = Fertility, y = Agriculture)) + geom_point(color = “blue”) + theme_minimal() + labs(title = “Fertility and Agriculture in Switzerland”) + geom_smooth(method = “lm”, se = FALSE, linetype = “dashed”)

#create a density plot of the variable fertility ggplot(swiss, aes(x = Fertility)) + geom_bar(stat = “density”, fill = “skyblue”) + labs(title = “Fertility Rates in Swiss Provinces”, y = “Density”, x = “Fertility Rate”) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for better readability

ggplot(swiss, aes(x = Fertility)) + geom_bar(stat = “density”, fill = “skyblue”) + labs(title = “Fertility Rates in Swiss Provinces”, y = “Density”, x = “Fertility Rate”) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Rotate x-axis labels for better readability geom_vline(aes(xintercept = mean(Fertility)), color = “red”, linetype = “dashed”, size = 1) + geom_text(aes(x = 80, y = 0.02, label = “Mean Fertility”), color = “red”, size = 4)

I want to remove the index and add it as a column in the dataset. Suggest me a way to do it

swiss$Cantones <- rownames(swiss) head(swiss)

data(“swiss”) library(tibble) swiss <- as_tibble(swiss, rownames = “Cantons”)

#i want to remove all objects from my environment rm(list = ls())

#load dplyr package library(dplyr) library(ggplot2)

#load swiss data data(swiss)

#I want to change the column Catholic to a factor of 1 if the value is above 50 and 0 if the value is below 50 swiss\(Catholic <- ifelse(swiss\)Catholic > 50, 1, 0) swiss

#Explore the mean education level of the Catholic and non-Catholic cantons swiss %>% group_by(Catholic) %>% summarise(mean(Education))

#Filter the cantons with education level above 10 high_education <- swiss %>% filter(Education > 10)

#Explore the correlation between high education cantons and fertility cor_test_result <- cor.test(high_education\(Fertility, high_education\)Education) cor_test_result

#Visualize the correlation between high education cantons and fertility ggplot(high_education, aes(x = Fertility, y = Education)) + geom_point() + geom_smooth(method = “lm”, se = FALSE) + labs(title = “Correlation between high education cantons and fertility”, x = “Fertility”, y = “Education”)

You can also ask questions to Copilot.

q: What is the definition of mean of a variable?

a: The mean is the average of the numbers. It is easy to calculate: add up all the numbers, then divide by how many numbers there are. In other words, it is the sum divided by the count.