This report provides an analysis of an agricultural data, exploring relationships between temperature, humidity, moisture, soil type, crop type, and nutrient levels.
# Import Library
library(ggplot2)
library(dplyr)
library(reshape2)
library(plotly)
library(ggcorrplot)# Data Overview
# Load the data
data_core <- read.csv("~/R-Visualization/MINI_RESEARCH/Agriculture/data_core.csv")
# 1. Summary Statistics
## This gives an overview of numerical columns
summary(data_core)## Temparature Humidity Moisture Soil.Type
## Min. :20.00 Min. :40.02 Min. :20.00 Length:8000
## 1st Qu.:27.05 1st Qu.:53.28 1st Qu.:33.97 Class :character
## Median :30.24 Median :59.11 Median :42.25 Mode :character
## Mean :30.34 Mean :59.21 Mean :43.58
## 3rd Qu.:33.46 3rd Qu.:65.08 3rd Qu.:52.95
## Max. :40.00 Max. :80.00 Max. :70.00
## Crop.Type Nitrogen Potassium Phosphorous
## Length:8000 Min. : 0.00 Min. : 0.000 Min. : 0.00
## Class :character 1st Qu.: 9.00 1st Qu.: 0.000 1st Qu.: 8.00
## Mode :character Median :14.00 Median : 1.000 Median :18.00
## Mean :18.43 Mean : 3.916 Mean :18.51
## 3rd Qu.:26.00 3rd Qu.: 5.000 3rd Qu.:30.00
## Max. :46.00 Max. :23.000 Max. :46.00
## Fertilizer.Name
## Length:8000
## Class :character
## Mode :character
##
##
##
## Temparature Humidity Moisture Soil.Type Crop.Type
## 0 0 0 0 0
## Nitrogen Potassium Phosphorous Fertilizer.Name
## 0 0 0 0
##
## Black Clayey Loamy Red Sandy
## 1613 1623 1590 1594 1580
##
## Barley Cotton Ground Nuts Maize Millets Oil seeds
## 703 722 732 753 718 711
## Paddy Pulses Sugarcane Tobacco Wheat
## 706 728 763 717 747
##
## 10-26-26 14-35-14 17-17-17 20-20 28-28 DAP Urea
## 1128 1188 1124 1103 1120 1167 1170
## Histogram for Each Numerical Variable
num_vars <- c("Temparature", "Humidity", "Moisture", "Nitrogen", "Potassium", "Phosphorous")
for (var in num_vars) {
print(
ggplot(data_core, aes_string(x = var)) +
geom_histogram(bins = 30, fill = "steelblue", color = "black", alpha = 0.7) +
ggtitle(paste("Histogram of", var)) +
theme_minimal()
)
}# Density Plots for Continuous Variables
for (var in num_vars) {
print(
ggplot(data_core, aes_string(x = var)) +
geom_density(fill = "skyblue", alpha = 0.6) +
ggtitle(paste("Density Plot of", var)) +
theme_minimal()
)
}# Categorical Features - Bar Plots
ggplot(data_core, aes(x = Soil.Type)) +
geom_bar(fill = "steelblue") +
theme_minimal() +
ggtitle("Distribution of Soil Types") +
xlab("Soil Type") +
ylab("Count") +
coord_flip()# Bar Plot for Crop Type
ggplot(data_core, aes(x = Crop.Type)) +
geom_bar(fill = "darkgreen") +
theme_minimal() +
ggtitle("Distribution of Crop Types") +
xlab("Crop Type") +
ylab("Count") +
coord_flip()# Bar Plot for Fertilizer Name
ggplot(data_core, aes(x = Fertilizer.Name)) +
geom_bar(fill = "purple") +
theme_minimal() +
ggtitle("Distribution of Fertilizer Types") +
xlab("Fertilizer Name") +
ylab("Count") +
coord_flip()Now, we’ll explore relationships between variables using scatter plots, box plots, and correlation heatmaps.
Scatter Plots for Key Relationships
ggplot(data_core, aes(x = Temparature, y = Humidity)) +
geom_point(alpha = 0.5, color = "blue") +
ggtitle("Temperature vs Humidity") +
theme_minimal()ggplot(data_core, aes(x = Moisture, y = Nitrogen)) +
geom_point(alpha = 0.5, color = "red") +
ggtitle("Moisture vs Nitrogen") +
theme_minimal()# Correlation Heatmap
# Selecting only numerical variables
num_data <- data_core[, c("Temparature", "Humidity", "Moisture", "Nitrogen", "Potassium", "Phosphorous")]
# Compute correlation matrix
cor_matrix <- cor(num_data, use = "complete.obs")
# Plot heatmap
ggcorrplot(cor_matrix, method = "circle", type = "lower", lab = TRUE, lab_size = 3)## Temperature Distribution by Soil Type
ggplot(data_core, aes(x = Soil.Type, y = Temparature, fill = Soil.Type)) +
geom_boxplot() +
theme_minimal() +
ggtitle("Temperature Distribution Across Soil Types") +
xlab("Soil Type") +
ylab("Temperature") +
coord_flip()## Moisture Levels by Crop Type
ggplot(data_core, aes(x = Crop.Type, y = Moisture, fill = Crop.Type)) +
geom_boxplot() +
theme_minimal() +
ggtitle("Moisture Levels by Crop Type") +
xlab("Crop Type") +
ylab("Moisture") +
coord_flip()
## Categorical-Categorical Relationships (Stacked Bar Charts &
Heatmaps) Soil Type vs Crop Type (Stacked Bar Chart)
ggplot(data_core, aes(x = Soil.Type, fill = Crop.Type)) +
geom_bar(position = "fill") +
theme_minimal() +
ggtitle("Proportion of Crops Grown in Each Soil Type") +
xlab("Soil Type") +
ylab("Proportion") +
coord_flip()We’ll compare Nitrogen, Phosphorous, and Potassium levels for each Fertilizer Type. Box Plot: Nitrogen Levels by Fertilizer Type
ggplot(data_core, aes(x = Fertilizer.Name, y = Nitrogen, fill = Fertilizer.Name)) +
geom_boxplot() +
theme_minimal() +
ggtitle("Nitrogen Levels Across Different Fertilizers") +
xlab("Fertilizer Name") +
ylab("Nitrogen") +
coord_flip()#Box Plot: Phosphorous Levels by Fertilizer Type
ggplot(data_core, aes(x = Fertilizer.Name, y = Phosphorous, fill = Fertilizer.Name)) +
geom_boxplot() +
theme_minimal() +
ggtitle("Phosphorous Levels Across Different Fertilizers") +
xlab("Fertilizer Name") +
ylab("Phosphorous") +
coord_flip()#Box Plot: Potassium Levels by Fertilizer Type
ggplot(data_core, aes(x = Fertilizer.Name, y = Potassium, fill = Fertilizer.Name)) +
geom_boxplot() +
theme_minimal() +
ggtitle("Potassium Levels Across Different Fertilizers") +
xlab("Fertilizer Name") +
ylab("Potassium") +
coord_flip()# Relationship Between Crop Type and Fertilizer Used
ggplot(data_core, aes(x = Crop.Type, fill = Fertilizer.Name)) +
geom_bar(position = "fill") +
theme_minimal() +
ggtitle("Fertilizer Usage Across Crop Types") +
xlab("Crop Type") +
ylab("Proportion") +
coord_flip()#Soil Type and Fertilizer Usage
ggplot(data_core, aes(x = Soil.Type, fill = Fertilizer.Name)) +
geom_bar(position = "fill") +
theme_minimal() +
ggtitle("Fertilizer Usage Across Soil Types") +
xlab("Soil Type") +
ylab("Proportion") +
coord_flip()# Select only nutrient-related columns
nutrient_data <- data_core[, c("Nitrogen", "Phosphorous", "Potassium")]
# Check summary statistics
summary(nutrient_data)## Nitrogen Phosphorous Potassium
## Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.: 9.00 1st Qu.: 8.00 1st Qu.: 0.000
## Median :14.00 Median :18.00 Median : 1.000
## Mean :18.43 Mean :18.51 Mean : 3.916
## 3rd Qu.:26.00 3rd Qu.:30.00 3rd Qu.: 5.000
## Max. :46.00 Max. :46.00 Max. :23.000
# Sample 500 random rows for better visualization
set.seed(123) # Ensure reproducibility
nutrient_sample <- nutrient_data %>% sample_n(500)
# Add a sample index for y-axis
nutrient_sample$Sample_ID <- seq_len(nrow(nutrient_sample))
# Convert to long format for ggplot
melted_data <- melt(nutrient_sample, id.vars = "Sample_ID")
# Ensure no missing values
melted_data <- melted_data %>% filter(!is.na(value))
# Create heatmap using geom_raster()
ggplot(melted_data, aes(x = variable, y = Sample_ID, fill = value)) +
geom_raster() + # Works better for large datasets
scale_fill_gradient(low = "lightblue", high = "darkblue") +
ggtitle("Heatmap of Nutrient Levels (Sampled 500 Observations)") +
xlab("Nutrient Type") +
ylab("Sample Index") +
theme_minimal()