# Read the data
df <- read.csv('https://pastebin.com/raw/cSZ8pYWh')
head(df)
## id english japanese nationality department classes gender
## 1 1 17.8 75.6 japan literature 2 male
## 2 2 64.4 53.3 nepal literature 2 male
## 3 3 86.7 31.1 nepal literature 1 male
## 4 4 60.0 62.2 indonesia literature 2 male
## 5 5 42.2 80.0 japan literature 1 male
## 6 6 33.3 75.6 japan literature 1 male
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Calculate means by nationality
df_by_nationality <- df %>%
select(english, nationality) %>%
group_by(nationality) %>%
summarise(mean_english = mean(english))
# Print the data frame
df_by_nationality
## # A tibble: 5 × 2
## nationality mean_english
## <chr> <dbl>
## 1 china 43.2
## 2 indonesia 51.1
## 3 japan 37.1
## 4 nepal 69.3
## 5 vietnam 45.8
library(ggplot2)
# Create a bar chart
ggplot(df_by_nationality, aes(x = nationality, y = mean_english)) +
geom_bar(stat = "identity") +
labs(x = "Nationality", y = "Mean English Value") +
theme_minimal()
You can learn how to use R Markdown for Python from the following tutrials:
Installing and Configuring Python with RStudio https://support.posit.co/hc/en-us/articles/360023654474-Installing-and-Configuring-Python-with-RStudio
How to Seamlessly Integrate Python into R/RMarkdown Codes https://medium.com/data-and-beyond/how-to-seamlessly-integrate-python-into-r-rmarkdown-codes-2fe09cfdd0ee
Create a project using RStudio
Navigate into the RStudio project directory
Create a directory called “my_env”
Create a virtual environment in the my_env directory
If you use pipenv and install Python 3.11, run the following command in the terminal:
pipenv –python 3.11 pipenv shell
You can install libraries using pip:
pipenv install pandas matplotlib
Run the following code in the R console:
install.packages(“reticulate”)
Run the following commands to create a file called “.Renviron” in the my_env directory:
touch .Renviron
echo “RETICULATE_PYTHON=RETICULATE_PYTHON = path/to/your/python” > .Renviron
You can show the path to your Python by running the following command in the my_env directory:
which python
import pandas as pd
# Load the data
df = pd.read_csv('https://pastebin.com/raw/cSZ8pYWh')
df
## id english japanese nationality department classes gender
## 0 1 17.8 75.6 japan literature 2 male
## 1 2 64.4 53.3 nepal literature 2 male
## 2 3 86.7 31.1 nepal literature 1 male
## 3 4 60.0 62.2 indonesia literature 2 male
## 4 5 42.2 80.0 japan literature 1 male
## .. ... ... ... ... ... ... ...
## 195 196 66.7 55.6 nepal economics 2 male
## 196 197 44.4 80.0 china economics 2 male
## 197 198 57.8 48.9 vietnam economics 1 female
## 198 199 86.7 26.7 vietnam economics 2 male
## 199 200 24.4 68.9 japan economics 2 male
##
## [200 rows x 7 columns]
# Calculate means by nationality
df_by_nationality = (df.
filter(['english', 'nationality']).
groupby(by='nationality').
mean()
)
df_by_nationality
## english
## nationality
## china 43.150000
## indonesia 51.100000
## japan 37.133333
## nepal 69.311538
## vietnam 45.833333
# Create a bar chart
df_by_nationality.plot.bar()