omroy07 · omroy07 · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/Crop Yield Prediction/crop_yield_predictor.py b/Crop Yield Prediction/crop_yield_predictor.py
@@ -2,44 +2,49 @@
 import pandas as pd
 import numpy as np
 import joblib
+import matplotlib.pyplot as plt
+import seaborn as sns
 from sklearn.model_selection import train_test_split 
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.preprocessing import LabelEncoder
 from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
 
 # --- 1. CONFIGURATION & LOAD ---
-DATA_PATH = "data/crop_yield_dataset.csv"
-MODEL_DIR = "models"
+# Use absolute or relative paths from the script's directory for safety
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_PATH = os.path.join(BASE_DIR, "data", "crop_yield_dataset.csv")
+MODEL_DIR = os.path.join(BASE_DIR, "models")
+ASSETS_DIR = os.path.join(BASE_DIR, "assets")
+
 os.makedirs(MODEL_DIR, exist_ok=True)
+os.makedirs(ASSETS_DIR, exist_ok=True)
 
 try:
     df = pd.read_csv(DATA_PATH)
     # Clean column names: strip spaces, lowercase, replace spaces with underscores
     df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
-    print(f"✅ Dataset loaded. Shape: {df.shape}")
+    print(f"Dataset loaded. Shape: {df.shape}")
 except FileNotFoundError:
-    print(f"❌ Error: {DATA_PATH} not found. Please check the file path.")
+    print(f"Error: {DATA_PATH} not found. Please check the file path.")
     exit()
 
 # --- 2. ENCODING & PREPROCESSING ---
-# We use LabelEncoders but add a step to handle 'unseen' categories during inference
 encoders = {}
 categorical_cols = ['area', 'item']
 
 for col in categorical_cols:
     le = LabelEncoder()
-    # Adding a placeholder for unknown values if your dataset is small/evolving
     df[f'{col}_encoded'] = le.fit_transform(df[col])
     encoders[col] = le
 
-# Define Features and Target based on your dataset columns
+# Define Features and Target
 features = ['area_encoded', 'item_encoded', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']
 target = 'hg/ha_yield'
 
 # Ensure all feature columns exist before training
 if not all(col in df.columns for col in features + [target]):
     missing = [col for col in features + [target] if col not in df.columns]
-    print(f"❌ Missing columns in CSV: {missing}")
+    print(f"Missing columns in CSV: {missing}")
     exit()
 
 X = df[features]
@@ -51,7 +56,7 @@
 )
 
 # --- 4. MODEL TRAINING ---
-print("🚀 Training Random Forest Regressor...")
+print("Training Random Forest Regressor...")
 model = RandomForestRegressor(
     n_estimators=100, 
     max_depth=15,       # Prevents extreme overfitting
@@ -66,17 +71,53 @@
 mae = mean_absolute_error(y_test, y_pred)
 rmse = np.sqrt(mean_squared_error(y_test, y_pred))
 
-print("-" * 30)
-print(f"📊 Model Performance:")
-print(f"✅ R² Score (Accuracy): {r2 * 100:.2f}%")
-print(f"✅ Mean Absolute Error: {mae:.2f} hg/ha")
-print(f"✅ Root Mean Squared Error: {rmse:.2f} hg/ha")
-print("-" * 30)
+print("-" * 40)
+print(f"Model Performance:")
+print(f"R2 Score (Accuracy): {r2 * 100:.2f}%")
+print(f"Mean Absolute Error: {mae:.2f} hg/ha")
+print(f"Root Mean Squared Error: {rmse:.2f} hg/ha")
+print("-" * 40)
 
 # --- 6. SAVE ARTIFACTS ---
-# Saving everything into a 'models' folder for better organization
 joblib.dump(model, os.path.join(MODEL_DIR, 'yield_predictor_model.pkl'))
 joblib.dump(encoders['area'], os.path.join(MODEL_DIR, 'area_encoder.pkl'))
 joblib.dump(encoders['item'], os.path.join(MODEL_DIR, 'item_encoder.pkl'))
+print(f"Model and encoders saved successfully in '{MODEL_DIR}/'")
+
+# --- 7. VISUALIZATIONS ---
+print("Generating plots and saving to assets...")
+
+# Feature Importance Plot
+importances = model.feature_importances_
+indices = np.argsort(importances)[::-1]
+clean_feature_names = ['Region/Area', 'Crop/Item', 'Rainfall (mm/year)', 'Pesticides (tonnes)', 'Avg Temperature (C)']
+sorted_feature_names = [clean_feature_names[i] for i in indices]
+
+plt.figure(figsize=(10, 6))
+sns.set_theme(style="whitegrid")
+sns.barplot(x=importances[indices], y=sorted_feature_names, palette="viridis", hue=sorted_feature_names, legend=False)
+plt.title("Feature Importance in Crop Yield Prediction Model", fontsize=14, pad=15)
+plt.xlabel("Relative Importance Score", fontsize=12)
+plt.ylabel("Features", fontsize=12)
+plt.tight_layout()
+feat_importance_path = os.path.join(ASSETS_DIR, "feature_importance.png")
+plt.savefig(feat_importance_path, dpi=300)
+plt.close()
+print(f"Feature Importance plot saved as '{feat_importance_path}'")
 
-print(f"💾 Model and encoders saved successfully in '{MODEL_DIR}/'")
+# Actual vs Predicted Plot
+plt.figure(figsize=(10, 6))
+# Sample data for scatter plot to avoid sluggish rendering with huge datasets
+sample_indices = np.random.choice(len(y_test), min(len(y_test), 2000), replace=False)
+plt.scatter(y_test.iloc[sample_indices], y_pred[sample_indices], alpha=0.4, color="#2e8b57", label="Predicted vs Actual")
+plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=2, label="Perfect Fit Line")
+plt.xlabel("Actual Yield (hg/ha)", fontsize=12)
+plt.ylabel("Predicted Yield (hg/ha)", fontsize=12)
+plt.title("Actual vs Predicted Crop Yield (Sampled Test Set)", fontsize=14, pad=15)
+plt.legend()
+plt.tight_layout()
+act_vs_pred_path = os.path.join(ASSETS_DIR, "actual_vs_predicted.png")
+plt.savefig(act_vs_pred_path, dpi=300)
+plt.close()
+print(f"Performance plot saved as '{act_vs_pred_path}'")
+print("Training pipeline complete!")
diff --git a/Crop Yield Prediction/predict_yield_cli.py b/Crop Yield Prediction/predict_yield_cli.py
@@ -0,0 +1,172 @@
+import os
+import sys
+import joblib
+import numpy as np
+
+# Set up paths relative to the script directory
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+MODEL_DIR = os.path.join(BASE_DIR, "models")
+MODEL_PATH = os.path.join(MODEL_DIR, "yield_predictor_model.pkl")
+AREA_ENC_PATH = os.path.join(MODEL_DIR, "area_encoder.pkl")
+ITEM_ENC_PATH = os.path.join(MODEL_DIR, "item_encoder.pkl")
+
+def clear_screen():
+    os.system('cls' if os.name == 'nt' else 'clear')
+
+def get_closest_matches(query, choices, limit=5):
+    """Find simple case-insensitive matching string choices."""
+    query_lower = query.lower()
+    matches = [c for c in choices if query_lower in c.lower()]
+    return matches[:limit]
+
+def main():
+    clear_screen()
+    print("=" * 60)
+    print("      AgriPredict: ML-Based Crop Yield Predictor CLI      ")
+    print("=" * 60)
+
+    # Check if model files exist
+    if not (os.path.exists(MODEL_PATH) and os.path.exists(AREA_ENC_PATH) and os.path.exists(ITEM_ENC_PATH)):
+        print("\nError: Trained model files not found in the 'models/' directory.")
+        print("Please train the model first by running:")
+        print("   python crop_yield_predictor.py")
+        sys.exit(1)
+
+    print("\nLoading machine learning assets...")
+    try:
+        model = joblib.load(MODEL_PATH)
+        le_area = joblib.load(AREA_ENC_PATH)
+        le_item = joblib.load(ITEM_ENC_PATH)
+        print("Models and Encoders loaded successfully.")
+    except Exception as e:
+        print(f"Critical error loading model assets: {e}")
+        sys.exit(1)
+
+    print("\nEnter the following environmental and location parameters:")
+
+    # 1. Location / Area
+    while True:
+        area_input = input("\nEnter Country/Region Name (e.g. India, Albania): ").strip()
+        if not area_input:
+            print("Warning: Country name cannot be empty.")
+            continue
+
+        if area_input in le_area.classes_:
+            area = area_input
+            break
+
+        # Try finding closest match
+        matches = get_closest_matches(area_input, le_area.classes_)
+        if matches:
+            print(f"Warning: Region '{area_input}' not found. Did you mean one of these?")
+            for i, match in enumerate(matches, 1):
+                print(f"   [{i}] {match}")
+            choice = input("Enter option number or press Enter to type again: ").strip()
+            if choice.isdigit() and 1 <= int(choice) <= len(matches):
+                area = matches[int(choice) - 1]
+                print(f"Selected: {area}")
+                break
+        else:
+            print("Warning: Country/Region not recognized by the model training set.")
+            print("Valid examples: Albania, India, Algeria, Argentina, Brazil, Canada, France, etc.")
+
+    # 2. Crop Type / Item
+    while True:
+        crop_input = input("\nEnter Crop Type (e.g. Maize, Wheat, Potatoes): ").strip()
+        if not crop_input:
+            print("Warning: Crop type cannot be empty.")
+            continue
+
+        if crop_input in le_item.classes_:
+            crop = crop_input
+            break
+
+        # Try finding closest match
+        matches = get_closest_matches(crop_input, le_item.classes_)
+        if matches:
+            print(f"Warning: Crop '{crop_input}' not found. Did you mean one of these?")
+            for i, match in enumerate(matches, 1):
+                print(f"   [{i}] {match}")
+            choice = input("Enter option number or press Enter to type again: ").strip()
+            if choice.isdigit() and 1 <= int(choice) <= len(matches):
+                crop = matches[int(choice) - 1]
+                print(f"Selected: {crop}")
+                break
+        else:
+            print("Warning: Crop type not recognized by the model training set.")
+            print(f"Valid classes: {', '.join(le_item.classes_[:10])}...")
+
+    # 3. Average Rainfall
+    while True:
+        try:
+            rainfall_str = input("\nEnter Average Rainfall (mm/year) [e.g. 1200]: ").strip()
+            rainfall = float(rainfall_str)
+            if rainfall < 0:
+                print("Warning: Rainfall cannot be negative.")
+                continue
+            break
+        except ValueError:
+            print("Warning: Invalid input. Please enter a valid number.")
+
+    # 4. Pesticides
+    while True:
+        try:
+            pesticide_str = input("\nEnter Pesticide Usage (tonnes) [e.g. 50]: ").strip()
+            pesticide = float(pesticide_str)
+            if pesticide < 0:
+                print("Warning: Pesticide usage cannot be negative.")
+                continue
+            break
+        except ValueError:
+            print("Warning: Invalid input. Please enter a valid number.")
+
+    # 5. Temperature
+    while True:
+        try:
+            temp_str = input("\nEnter Average Temperature (C) [e.g. 25]: ").strip()
+            temperature = float(temp_str)
+            if not (-30 <= temperature <= 60):
+                print("Warning: Temperature must be in range -30C to 60C.")
+                continue
+            break
+        except ValueError:
+            print("Warning: Invalid input. Please enter a valid number.")
+
+    # Inference logic
+    print("\nRunning Random Forest Regressor Prediction...")
+    try:
+        import pandas as pd
+        # Encode inputs
+        area_encoded = le_area.transform([area])[0]
+        item_encoded = le_item.transform([crop])[0]
+
+        # Feature DataFrame with matching column names
+        input_data = pd.DataFrame(
+            [[area_encoded, item_encoded, rainfall, pesticide, temperature]],
+            columns=['area_encoded', 'item_encoded', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']
+        )
+
+        # Predict
+        prediction = model.predict(input_data)[0]
+
+        print("\n" + "=" * 50)
+        print("          PREDICTION RESULT          ")
+        print("=" * 50)
+        print(f"Location      : {area}")
+        print(f"Crop Type     : {crop}")
+        print(f"Rainfall      : {rainfall:,.1f} mm/year")
+        print(f"Pesticides    : {pesticide:,.1f} tonnes")
+        print(f"Temperature   : {temperature:.1f} C")
+        print("-" * 50)
+        print(f"Predicted Yield: {prediction:,.2f} hg/ha")
+        print("=" * 50)
+
+    except Exception as e:
+        print(f"\nError during prediction calculation: {e}")
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n\nGoodbye!")
+        sys.exit(0)
diff --git a/Disease prediction/model.py b/Disease prediction/model.py
@@ -1,4 +1,9 @@
+import torch
 import torch.nn as nn
+import os
+import logging
+
+logger = logging.getLogger(__name__)
 
 class PlantDiseaseNet(nn.Module):
     def __init__(self, num_classes=38):  # original model had 38 classes
@@ -77,3 +82,34 @@ def forward(self, x):
         x = self.pool(x)
         x = self.classifier(x)
         return x
+
+def load_pytorch_model(model_path, device='cpu'):
+    try:
+        if not os.path.exists(model_path):
+            logger.error(f"PyTorch model file missing: {model_path}")
+            return None
+        # Safe loading with torch.load
+        model = torch.load(model_path, map_location=device)
+        # If it was saved as state dict rather than full model
+        if isinstance(model, dict):
+            net = PlantDiseaseNet()
+            net.load_state_dict(model)
+            model = net
+        model.eval()
+        return model
+    except Exception as e:
+        logger.error(f"Error loading PyTorch model from {model_path}: {str(e)}")
+        return None
+
+def predict_pytorch(model, input_tensor):
+    if model is None:
+        logger.error("Prediction failed: PyTorch model is not loaded (None)")
+        return None
+    try:
+        with torch.no_grad():
+            output = model(input_tensor)
+            return output
+    except Exception as e:
+        logger.error(f"Error predicting with PyTorch model: {str(e)}")
+        return None
+