Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 58 additions & 17 deletions Crop Yield Prediction/crop_yield_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,49 @@
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# --- 1. CONFIGURATION & LOAD ---
DATA_PATH = "data/crop_yield_dataset.csv"
MODEL_DIR = "models"
# Use absolute or relative paths from the script's directory for safety
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = os.path.join(BASE_DIR, "data", "crop_yield_dataset.csv")
MODEL_DIR = os.path.join(BASE_DIR, "models")
ASSETS_DIR = os.path.join(BASE_DIR, "assets")

os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(ASSETS_DIR, exist_ok=True)

try:
df = pd.read_csv(DATA_PATH)
# Clean column names: strip spaces, lowercase, replace spaces with underscores
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
print(f"Dataset loaded. Shape: {df.shape}")
print(f"Dataset loaded. Shape: {df.shape}")
except FileNotFoundError:
print(f"Error: {DATA_PATH} not found. Please check the file path.")
print(f"Error: {DATA_PATH} not found. Please check the file path.")
exit()

# --- 2. ENCODING & PREPROCESSING ---
# We use LabelEncoders but add a step to handle 'unseen' categories during inference
encoders = {}
categorical_cols = ['area', 'item']

for col in categorical_cols:
le = LabelEncoder()
# Adding a placeholder for unknown values if your dataset is small/evolving
df[f'{col}_encoded'] = le.fit_transform(df[col])
encoders[col] = le

# Define Features and Target based on your dataset columns
# Define Features and Target
features = ['area_encoded', 'item_encoded', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']
target = 'hg/ha_yield'

# Ensure all feature columns exist before training
if not all(col in df.columns for col in features + [target]):
missing = [col for col in features + [target] if col not in df.columns]
print(f"Missing columns in CSV: {missing}")
print(f"Missing columns in CSV: {missing}")
exit()

X = df[features]
Expand All @@ -51,7 +56,7 @@
)

# --- 4. MODEL TRAINING ---
print("🚀 Training Random Forest Regressor...")
print("Training Random Forest Regressor...")
model = RandomForestRegressor(
n_estimators=100,
max_depth=15, # Prevents extreme overfitting
Expand All @@ -66,17 +71,53 @@
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("-" * 30)
print(f"📊 Model Performance:")
print(f"✅ R² Score (Accuracy): {r2 * 100:.2f}%")
print(f"Mean Absolute Error: {mae:.2f} hg/ha")
print(f"Root Mean Squared Error: {rmse:.2f} hg/ha")
print("-" * 30)
print("-" * 40)
print(f"Model Performance:")
print(f"R2 Score (Accuracy): {r2 * 100:.2f}%")
print(f"Mean Absolute Error: {mae:.2f} hg/ha")
print(f"Root Mean Squared Error: {rmse:.2f} hg/ha")
print("-" * 40)

# --- 6. SAVE ARTIFACTS ---
# Saving everything into a 'models' folder for better organization
joblib.dump(model, os.path.join(MODEL_DIR, 'yield_predictor_model.pkl'))
joblib.dump(encoders['area'], os.path.join(MODEL_DIR, 'area_encoder.pkl'))
joblib.dump(encoders['item'], os.path.join(MODEL_DIR, 'item_encoder.pkl'))
print(f"Model and encoders saved successfully in '{MODEL_DIR}/'")

# --- 7. VISUALIZATIONS ---
print("Generating plots and saving to assets...")

# Feature Importance Plot
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
clean_feature_names = ['Region/Area', 'Crop/Item', 'Rainfall (mm/year)', 'Pesticides (tonnes)', 'Avg Temperature (C)']
sorted_feature_names = [clean_feature_names[i] for i in indices]

plt.figure(figsize=(10, 6))
sns.set_theme(style="whitegrid")
sns.barplot(x=importances[indices], y=sorted_feature_names, palette="viridis", hue=sorted_feature_names, legend=False)
plt.title("Feature Importance in Crop Yield Prediction Model", fontsize=14, pad=15)
plt.xlabel("Relative Importance Score", fontsize=12)
plt.ylabel("Features", fontsize=12)
plt.tight_layout()
feat_importance_path = os.path.join(ASSETS_DIR, "feature_importance.png")
plt.savefig(feat_importance_path, dpi=300)
plt.close()
print(f"Feature Importance plot saved as '{feat_importance_path}'")

print(f"💾 Model and encoders saved successfully in '{MODEL_DIR}/'")
# Actual vs Predicted Plot
plt.figure(figsize=(10, 6))
# Sample data for scatter plot to avoid sluggish rendering with huge datasets
sample_indices = np.random.choice(len(y_test), min(len(y_test), 2000), replace=False)
plt.scatter(y_test.iloc[sample_indices], y_pred[sample_indices], alpha=0.4, color="#2e8b57", label="Predicted vs Actual")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=2, label="Perfect Fit Line")
plt.xlabel("Actual Yield (hg/ha)", fontsize=12)
plt.ylabel("Predicted Yield (hg/ha)", fontsize=12)
plt.title("Actual vs Predicted Crop Yield (Sampled Test Set)", fontsize=14, pad=15)
plt.legend()
plt.tight_layout()
act_vs_pred_path = os.path.join(ASSETS_DIR, "actual_vs_predicted.png")
plt.savefig(act_vs_pred_path, dpi=300)
plt.close()
print(f"Performance plot saved as '{act_vs_pred_path}'")
print("Training pipeline complete!")
172 changes: 172 additions & 0 deletions Crop Yield Prediction/predict_yield_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import os
import sys
import joblib
import numpy as np

# Set up paths relative to the script directory
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MODEL_DIR = os.path.join(BASE_DIR, "models")
MODEL_PATH = os.path.join(MODEL_DIR, "yield_predictor_model.pkl")
AREA_ENC_PATH = os.path.join(MODEL_DIR, "area_encoder.pkl")
ITEM_ENC_PATH = os.path.join(MODEL_DIR, "item_encoder.pkl")

def clear_screen():
os.system('cls' if os.name == 'nt' else 'clear')

def get_closest_matches(query, choices, limit=5):
"""Find simple case-insensitive matching string choices."""
query_lower = query.lower()
matches = [c for c in choices if query_lower in c.lower()]
return matches[:limit]

def main():
clear_screen()
print("=" * 60)
print(" AgriPredict: ML-Based Crop Yield Predictor CLI ")
print("=" * 60)

# Check if model files exist
if not (os.path.exists(MODEL_PATH) and os.path.exists(AREA_ENC_PATH) and os.path.exists(ITEM_ENC_PATH)):
print("\nError: Trained model files not found in the 'models/' directory.")
print("Please train the model first by running:")
print(" python crop_yield_predictor.py")
sys.exit(1)

print("\nLoading machine learning assets...")
try:
model = joblib.load(MODEL_PATH)
le_area = joblib.load(AREA_ENC_PATH)
le_item = joblib.load(ITEM_ENC_PATH)
print("Models and Encoders loaded successfully.")
except Exception as e:
print(f"Critical error loading model assets: {e}")
sys.exit(1)

print("\nEnter the following environmental and location parameters:")

# 1. Location / Area
while True:
area_input = input("\nEnter Country/Region Name (e.g. India, Albania): ").strip()
if not area_input:
print("Warning: Country name cannot be empty.")
continue

if area_input in le_area.classes_:
area = area_input
break

# Try finding closest match
matches = get_closest_matches(area_input, le_area.classes_)
if matches:
print(f"Warning: Region '{area_input}' not found. Did you mean one of these?")
for i, match in enumerate(matches, 1):
print(f" [{i}] {match}")
choice = input("Enter option number or press Enter to type again: ").strip()
if choice.isdigit() and 1 <= int(choice) <= len(matches):
area = matches[int(choice) - 1]
print(f"Selected: {area}")
break
else:
print("Warning: Country/Region not recognized by the model training set.")
print("Valid examples: Albania, India, Algeria, Argentina, Brazil, Canada, France, etc.")

# 2. Crop Type / Item
while True:
crop_input = input("\nEnter Crop Type (e.g. Maize, Wheat, Potatoes): ").strip()
if not crop_input:
print("Warning: Crop type cannot be empty.")
continue

if crop_input in le_item.classes_:
crop = crop_input
break

# Try finding closest match
matches = get_closest_matches(crop_input, le_item.classes_)
if matches:
print(f"Warning: Crop '{crop_input}' not found. Did you mean one of these?")
for i, match in enumerate(matches, 1):
print(f" [{i}] {match}")
choice = input("Enter option number or press Enter to type again: ").strip()
if choice.isdigit() and 1 <= int(choice) <= len(matches):
crop = matches[int(choice) - 1]
print(f"Selected: {crop}")
break
else:
print("Warning: Crop type not recognized by the model training set.")
print(f"Valid classes: {', '.join(le_item.classes_[:10])}...")

# 3. Average Rainfall
while True:
try:
rainfall_str = input("\nEnter Average Rainfall (mm/year) [e.g. 1200]: ").strip()
rainfall = float(rainfall_str)
if rainfall < 0:
print("Warning: Rainfall cannot be negative.")
continue
break
except ValueError:
print("Warning: Invalid input. Please enter a valid number.")

# 4. Pesticides
while True:
try:
pesticide_str = input("\nEnter Pesticide Usage (tonnes) [e.g. 50]: ").strip()
pesticide = float(pesticide_str)
if pesticide < 0:
print("Warning: Pesticide usage cannot be negative.")
continue
break
except ValueError:
print("Warning: Invalid input. Please enter a valid number.")

# 5. Temperature
while True:
try:
temp_str = input("\nEnter Average Temperature (C) [e.g. 25]: ").strip()
temperature = float(temp_str)
if not (-30 <= temperature <= 60):
print("Warning: Temperature must be in range -30C to 60C.")
continue
break
except ValueError:
print("Warning: Invalid input. Please enter a valid number.")

# Inference logic
print("\nRunning Random Forest Regressor Prediction...")
try:
import pandas as pd
# Encode inputs
area_encoded = le_area.transform([area])[0]
item_encoded = le_item.transform([crop])[0]

# Feature DataFrame with matching column names
input_data = pd.DataFrame(
[[area_encoded, item_encoded, rainfall, pesticide, temperature]],
columns=['area_encoded', 'item_encoded', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']
)

# Predict
prediction = model.predict(input_data)[0]

print("\n" + "=" * 50)
print(" PREDICTION RESULT ")
print("=" * 50)
print(f"Location : {area}")
print(f"Crop Type : {crop}")
print(f"Rainfall : {rainfall:,.1f} mm/year")
print(f"Pesticides : {pesticide:,.1f} tonnes")
print(f"Temperature : {temperature:.1f} C")
print("-" * 50)
print(f"Predicted Yield: {prediction:,.2f} hg/ha")
print("=" * 50)

except Exception as e:
print(f"\nError during prediction calculation: {e}")

if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\nGoodbye!")
sys.exit(0)
36 changes: 36 additions & 0 deletions Disease prediction/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import torch
import torch.nn as nn
import os
import logging

logger = logging.getLogger(__name__)

class PlantDiseaseNet(nn.Module):
def __init__(self, num_classes=38): # original model had 38 classes
Expand Down Expand Up @@ -77,3 +82,34 @@ def forward(self, x):
x = self.pool(x)
x = self.classifier(x)
return x

def load_pytorch_model(model_path, device='cpu'):
try:
if not os.path.exists(model_path):
logger.error(f"PyTorch model file missing: {model_path}")
return None
# Safe loading with torch.load
model = torch.load(model_path, map_location=device)
# If it was saved as state dict rather than full model
if isinstance(model, dict):
net = PlantDiseaseNet()
net.load_state_dict(model)
model = net
model.eval()
return model
except Exception as e:
logger.error(f"Error loading PyTorch model from {model_path}: {str(e)}")
return None

def predict_pytorch(model, input_tensor):
if model is None:
logger.error("Prediction failed: PyTorch model is not loaded (None)")
return None
try:
with torch.no_grad():
output = model(input_tensor)
return output
except Exception as e:
logger.error(f"Error predicting with PyTorch model: {str(e)}")
return None

Loading
Loading