You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
all_data$occupation <-mapvalues(substring(all_data$occupation_full, 1, 1), from =c(1, 2, 3, 4, 5, 6, 7, 8, 9),to =c("LEGISLATORS, SENIOR OFFICIALS AND MANAGERS","PROFESSIONALS","TECHNICIANS AND ASSOCIATE PROFESSIONALS","CLERKS","SERVICE WORKERS AND SHOP AND MARKET SALES WORKERS","SKILLED AGRICULTURAL AND FISHERY WORKERS","CRAFT AND RELATED TRADES WORKERS","PLANT AND MACHINE OPERATORS AND ASSEMBLERS","ELEMENTARY OCCUPATIONS" ))
library(ggplot2)library(reshape2)selected_countries <-c("JP", "FR", "FI")filtered_data <- all_data[all_data$country_code %in% selected_countries & all_data$hcomp <=20, ]# Create the table and melt itheatmap_data <-table(filtered_data$country_code, filtered_data$hcomp)heatmap_data_melted <-melt(heatmap_data)# Plot the heatmapggplot(heatmap_data_melted, aes(Var1, Var2, fill = value)) +geom_tile(color ="white") +labs(title ="Household Composition Heatmap for Selected Countries",x ="Country Code",y ="Household Composition",fill ="Count" ) +scale_fill_gradient(low ="white", high ="steelblue") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
text
ggplot(filter(all_data, country_code %in%c("JP", "FR", "FI")), aes(x = country_code, fill =factor(union))) +geom_bar(position ="fill") +labs(title ="Union Membership Proportion by Country", x ="Country", y ="Proportion") +theme_minimal()
ggplot(filter(all_data, country_code %in%c("JP", "FR", "FI")), aes(x =factor(hcomp), y = working_hours, color = country_code)) +geom_jitter(width =0.2, alpha =0.5) +facet_wrap(~ country_code) +labs(title ="Working Hours by Household Composition", x ="Household Composition", y ="Working Hours") +theme_minimal()+theme(axis.text.x =element_text(angle =45, hjust =1, size =5))
Warning: Removed 1526 rows containing missing values or values outside the scale range
(`geom_point()`).
ggplot(filter(all_data, country_code %in%c("JP", "FR", "FI")), aes(x = age, fill = country_code)) +geom_histogram(alpha =0.7) +labs(title ="Distribution of Age (Selected Countries)", x ="Age", y ="Count") +theme_minimal() +facet_wrap(~ country_code)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(filter(all_data, country_code %in%c("JP", "FR", "FI"), educ_years <=30), aes(x = educ_years, fill = country_code)) +geom_histogram(alpha =0.7) +labs(title ="Distribution of Education Years (Selected Countries)", x ="Years of Education", y ="Count") +theme_minimal() +facet_wrap(~ country_code)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(dplyr)library(ggplot2)filtered_data <- all_data %>%filter(country_code %in%c("JP", "FR", "FI"))ggplot(filtered_data, aes(x = working_hours, fill = country_code)) +geom_histogram(alpha =0.6) +facet_wrap(~ country_code) +labs(title ="Working Hours by Country", x ="Working Hours", y ="Count") +theme_minimal()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 1526 rows containing non-finite outside the scale range
(`stat_bin()`).
gender
filtered_data <- filtered_data %>%filter(!is.na(gender)) %>%# Remove rows with NA in gendermutate(gender =factor(gender, levels =c(1, 2), labels =c("Male", "Female")))ggplot(filtered_data, aes(x = gender, fill = gender)) +geom_bar(color ="black", alpha =0.8) +facet_wrap(~ country_code) +scale_fill_manual(values =c("Male"="#FF9999", "Female"="#9999FF")) +# Assign colors to factor levelslabs(title ="Gender Distribution by Country",x ="Gender",y ="Count" ) +theme_minimal()
gender ad=nd working hours
filtered_data <- all_data %>%filter(country_code %in%c("JP", "FR", "FI")) %>%select(country_code, gender, working_hours)filtered_data <- filtered_data %>%filter(!is.na(gender) &!is.na(working_hours)) %>%mutate(gender =factor(gender, levels =c(1, 2), labels =c("Male", "Female")) )library(ggplot2)ggplot(filtered_data, aes(x = gender, y = working_hours, fill = gender)) +geom_boxplot(color ="black", alpha =0.7) +facet_wrap(~ country_code) +labs(title ="Gender Distribution and Working Hours by Country",x ="Gender",y ="Working Hours" ) +theme_minimal() +scale_fill_manual(values =c("lightblue", "pink"))
library(ggplot2)# Filter for Japan, France, and Finlandselected_countries <-c("JP", "FR", "FI")filtered_data <- all_data[all_data$country_code %in% selected_countries, ]ggplot(filtered_data, aes(x =factor(hcomp), fill = country_code)) +geom_bar(position ="dodge") +labs(title ="Distribution of Household Composition by Country",x ="Household Composition",y ="Count",fill ="Country Code" ) +theme_minimal()
# Select relevant columns for income dataincome_data <- filtered_data[, c("country_code", "FI_RINC", "FR_RINC", "JP_RINC")]# Melt the data for ggplotlibrary(reshape2)income_data_melted <-melt(income_data, id.vars ="country_code", variable.name ="country", value.name ="income")
Warning: attributes are not identical across measure variables; they will be
dropped
# Create the boxplot for income distribution by countryggplot(income_data_melted, aes(x = country, y = income, fill = country)) +geom_boxplot() +labs(title ="Distribution of Income by Country",x ="Country",y ="Income" ) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1, size =10))
Warning: Removed 8592 rows containing non-finite outside the scale range
(`stat_boxplot()`).
updated
# Filter data for the selected countriesselected_countries <-c("FI", "FR", "JP")filtered_data <- all_data[all_data$country_code %in% selected_countries, ]# Remove rows where income for Finland (FI_RINC) is greater than 5000filtered_data <- filtered_data[!(filtered_data$country_code =="FI"& filtered_data$FI_RINC >5000), ]# Select relevant columns for income dataincome_data <- filtered_data[, c("country_code", "FI_RINC", "FR_RINC", "JP_RINC")]# Melt the data for ggplotlibrary(reshape2)income_data_melted <-melt(income_data, id.vars ="country_code", variable.name ="country", value.name ="income")
Warning: attributes are not identical across measure variables; they will be
dropped
# Create the boxplot for income distribution by countryggplot(income_data_melted, aes(x = country, y = income, fill = country)) +geom_boxplot() +labs(title ="Distribution of Income by Country",x ="Country",y ="Income" ) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1, size =10))
Warning: Removed 8522 rows containing non-finite outside the scale range
(`stat_boxplot()`).
income for cointries
ggplot(filtered_data, aes(x = working_hours, y = FI_RINC)) +geom_point(aes(color = country_code)) +labs(title ="Income vs. Working Hours (Finland)",x ="Working Hours",y ="Income" ) +theme_minimal()
Warning: Removed 3183 rows containing missing values or values outside the scale range
(`geom_point()`).
ggplot(filtered_data, aes(x = working_hours, y = FR_RINC, color = country_code)) +geom_point() +labs(title ="Income vs. Working Hours (France)",x ="Working Hours",y ="Income" ) +theme_minimal()
Warning: Removed 2797 rows containing missing values or values outside the scale range
(`geom_point()`).
ggplot(filtered_data, aes(x = working_hours, y = JP_RINC, color = country_code)) +geom_point() +labs(title ="Income vs. Working Hours (Japan)",x ="Working Hours",y ="Income" ) +theme_minimal()
Warning: Removed 3452 rows containing missing values or values outside the scale range
(`geom_point()`).