1 Introduction

This report provides an analysis of an agricultural data, exploring relationships between temperature, humidity, moisture, soil type, crop type, and nutrient levels.

1.1 R Code

# Import Library

library(ggplot2)
library(dplyr)
library(reshape2)
library(plotly)
library(ggcorrplot)
# Data Overview

# Load the data

data_core <- read.csv("~/R-Visualization/MINI_RESEARCH/Agriculture/data_core.csv")
# 1. Summary Statistics
## This gives an overview of numerical columns

summary(data_core)
##   Temparature       Humidity        Moisture      Soil.Type        
##  Min.   :20.00   Min.   :40.02   Min.   :20.00   Length:8000       
##  1st Qu.:27.05   1st Qu.:53.28   1st Qu.:33.97   Class :character  
##  Median :30.24   Median :59.11   Median :42.25   Mode  :character  
##  Mean   :30.34   Mean   :59.21   Mean   :43.58                     
##  3rd Qu.:33.46   3rd Qu.:65.08   3rd Qu.:52.95                     
##  Max.   :40.00   Max.   :80.00   Max.   :70.00                     
##   Crop.Type            Nitrogen       Potassium       Phosphorous   
##  Length:8000        Min.   : 0.00   Min.   : 0.000   Min.   : 0.00  
##  Class :character   1st Qu.: 9.00   1st Qu.: 0.000   1st Qu.: 8.00  
##  Mode  :character   Median :14.00   Median : 1.000   Median :18.00  
##                     Mean   :18.43   Mean   : 3.916   Mean   :18.51  
##                     3rd Qu.:26.00   3rd Qu.: 5.000   3rd Qu.:30.00  
##                     Max.   :46.00   Max.   :23.000   Max.   :46.00  
##  Fertilizer.Name   
##  Length:8000       
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
# 2. Check for Missing Values

colSums(is.na(data_core))
##     Temparature        Humidity        Moisture       Soil.Type       Crop.Type 
##               0               0               0               0               0 
##        Nitrogen       Potassium     Phosphorous Fertilizer.Name 
##               0               0               0               0
# 3. Unique Values in Categorical Variables

table(data_core$Soil.Type)
## 
##  Black Clayey  Loamy    Red  Sandy 
##   1613   1623   1590   1594   1580
table(data_core$Crop.Type)
## 
##      Barley      Cotton Ground Nuts       Maize     Millets   Oil seeds 
##         703         722         732         753         718         711 
##       Paddy      Pulses   Sugarcane     Tobacco       Wheat 
##         706         728         763         717         747
table(data_core$Fertilizer.Name)
## 
## 10-26-26 14-35-14 17-17-17    20-20    28-28      DAP     Urea 
##     1128     1188     1124     1103     1120     1167     1170

2 Univariate Analysis

## Histogram for Each Numerical Variable

num_vars <- c("Temparature", "Humidity", "Moisture", "Nitrogen", "Potassium", "Phosphorous")

for (var in num_vars) {
  print(
    ggplot(data_core, aes_string(x = var)) +
      geom_histogram(bins = 30, fill = "steelblue", color = "black", alpha = 0.7) +
      ggtitle(paste("Histogram of", var)) +
      theme_minimal()
  )
}

# Density Plots for Continuous Variables

for (var in num_vars) {
  print(
    ggplot(data_core, aes_string(x = var)) +
      geom_density(fill = "skyblue", alpha = 0.6) +
      ggtitle(paste("Density Plot of", var)) +
      theme_minimal()
  )
}

#  Categorical Features - Bar Plots

ggplot(data_core, aes(x = Soil.Type)) +
  geom_bar(fill = "steelblue") +
  theme_minimal() +
  ggtitle("Distribution of Soil Types") +
  xlab("Soil Type") +
  ylab("Count") +
  coord_flip()

# Bar Plot for Crop Type

ggplot(data_core, aes(x = Crop.Type)) +
  geom_bar(fill = "darkgreen") +
  theme_minimal() +
  ggtitle("Distribution of Crop Types") +
  xlab("Crop Type") +
  ylab("Count") +
  coord_flip()

# Bar Plot for Fertilizer Name

ggplot(data_core, aes(x = Fertilizer.Name)) +
  geom_bar(fill = "purple") +
  theme_minimal() +
  ggtitle("Distribution of Fertilizer Types") +
  xlab("Fertilizer Name") +
  ylab("Count") +
  coord_flip()

3 Bivariate Analysis

Now, we’ll explore relationships between variables using scatter plots, box plots, and correlation heatmaps.

3.1 Numerical-Numerical Relationships (Scatter Plots & Correlation Heatmap)

Scatter Plots for Key Relationships

ggplot(data_core, aes(x = Temparature, y = Humidity)) +
  geom_point(alpha = 0.5, color = "blue") +
  ggtitle("Temperature vs Humidity") +
  theme_minimal()

ggplot(data_core, aes(x = Moisture, y = Nitrogen)) +
  geom_point(alpha = 0.5, color = "red") +
  ggtitle("Moisture vs Nitrogen") +
  theme_minimal()

# Correlation Heatmap

# Selecting only numerical variables
num_data <- data_core[, c("Temparature", "Humidity", "Moisture", "Nitrogen", "Potassium", "Phosphorous")]

# Compute correlation matrix
cor_matrix <- cor(num_data, use = "complete.obs")

# Plot heatmap
ggcorrplot(cor_matrix, method = "circle", type = "lower", lab = TRUE, lab_size = 3)

3.2 Categorical-Numerical Relationships (Box Plots)

## Temperature Distribution by Soil Type
ggplot(data_core, aes(x = Soil.Type, y = Temparature, fill = Soil.Type)) +
  geom_boxplot() +
  theme_minimal() +
  ggtitle("Temperature Distribution Across Soil Types") +
  xlab("Soil Type") +
  ylab("Temperature") +
  coord_flip()

## Moisture Levels by Crop Type
ggplot(data_core, aes(x = Crop.Type, y = Moisture, fill = Crop.Type)) +
  geom_boxplot() +
  theme_minimal() +
  ggtitle("Moisture Levels by Crop Type") +
  xlab("Crop Type") +
  ylab("Moisture") +
  coord_flip()

## Categorical-Categorical Relationships (Stacked Bar Charts & Heatmaps) Soil Type vs Crop Type (Stacked Bar Chart)

ggplot(data_core, aes(x = Soil.Type, fill = Crop.Type)) +
  geom_bar(position = "fill") +
  theme_minimal() +
  ggtitle("Proportion of Crops Grown in Each Soil Type") +
  xlab("Soil Type") +
  ylab("Proportion") +
  coord_flip()

3.2.1 Additional Plots for Deeper Insights

3.2.1.1 Impact of Fertilizer on Soil Nutrients

We’ll compare Nitrogen, Phosphorous, and Potassium levels for each Fertilizer Type. Box Plot: Nitrogen Levels by Fertilizer Type

ggplot(data_core, aes(x = Fertilizer.Name, y = Nitrogen, fill = Fertilizer.Name)) +
  geom_boxplot() +
  theme_minimal() +
  ggtitle("Nitrogen Levels Across Different Fertilizers") +
  xlab("Fertilizer Name") +
  ylab("Nitrogen") +
  coord_flip()

#Box Plot: Phosphorous Levels by Fertilizer Type

ggplot(data_core, aes(x = Fertilizer.Name, y = Phosphorous, fill = Fertilizer.Name)) +
  geom_boxplot() +
  theme_minimal() +
  ggtitle("Phosphorous Levels Across Different Fertilizers") +
  xlab("Fertilizer Name") +
  ylab("Phosphorous") +
  coord_flip()

#Box Plot: Potassium Levels by Fertilizer Type

ggplot(data_core, aes(x = Fertilizer.Name, y = Potassium, fill = Fertilizer.Name)) +
  geom_boxplot() +
  theme_minimal() +
  ggtitle("Potassium Levels Across Different Fertilizers") +
  xlab("Fertilizer Name") +
  ylab("Potassium") +
  coord_flip()

# Relationship Between Crop Type and Fertilizer Used

ggplot(data_core, aes(x = Crop.Type, fill = Fertilizer.Name)) +
  geom_bar(position = "fill") +
  theme_minimal() +
  ggtitle("Fertilizer Usage Across Crop Types") +
  xlab("Crop Type") +
  ylab("Proportion") +
  coord_flip()

#Soil Type and Fertilizer Usage

ggplot(data_core, aes(x = Soil.Type, fill = Fertilizer.Name)) +
  geom_bar(position = "fill") +
  theme_minimal() +
  ggtitle("Fertilizer Usage Across Soil Types") +
  xlab("Soil Type") +
  ylab("Proportion") +
  coord_flip()

4 Advanced Visualizations

4.1 Interactive Scatter Plot (Temperature vs Humidity by Crop Type)

p <- ggplot(data_core, aes(x = Temparature, y = Humidity, color = Crop.Type)) +
  geom_point(alpha = 0.7) +
  ggtitle("Temperature vs Humidity (by Crop Type)") +
  theme_minimal()

ggplotly(p) 

4.2 Heatmap of Nutrient Levels (Nitrogen, Phosphorous, Potassium)

# Select only nutrient-related columns
nutrient_data <- data_core[, c("Nitrogen", "Phosphorous", "Potassium")]

# Check summary statistics
summary(nutrient_data)
##     Nitrogen      Phosphorous      Potassium     
##  Min.   : 0.00   Min.   : 0.00   Min.   : 0.000  
##  1st Qu.: 9.00   1st Qu.: 8.00   1st Qu.: 0.000  
##  Median :14.00   Median :18.00   Median : 1.000  
##  Mean   :18.43   Mean   :18.51   Mean   : 3.916  
##  3rd Qu.:26.00   3rd Qu.:30.00   3rd Qu.: 5.000  
##  Max.   :46.00   Max.   :46.00   Max.   :23.000
# Sample 500 random rows for better visualization
set.seed(123)  # Ensure reproducibility
nutrient_sample <- nutrient_data %>% sample_n(500)

# Add a sample index for y-axis
nutrient_sample$Sample_ID <- seq_len(nrow(nutrient_sample))

# Convert to long format for ggplot
melted_data <- melt(nutrient_sample, id.vars = "Sample_ID")

# Ensure no missing values
melted_data <- melted_data %>% filter(!is.na(value))

# Create heatmap using geom_raster()
ggplot(melted_data, aes(x = variable, y = Sample_ID, fill = value)) +
  geom_raster() +  # Works better for large datasets
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  ggtitle("Heatmap of Nutrient Levels (Sampled 500 Observations)") +
  xlab("Nutrient Type") +
  ylab("Sample Index") +
  theme_minimal()