DI504-Raman-project/augment.py at main · iremtopsakal/DI504-Raman-project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
"""
This script provides data augmentation tools for Raman spectra.

1. Augmentation functions:
    - shift, scale, and noise to simulate spectral variation.
2. apply_augmentation:
    - Combines all augmentations with optional randomness.
3. generate_augmented_files:
    - Saves new augmented .txt files when run directly.
4.  AugmentedWrapper:
    - Dataset wrapper for augmentation during training.
"""

def shift_spectrum(intensities, max_shift=3):
    shift = np.random.randint(-max_shift, max_shift + 1)
    if shift == 0:
        return intensities
    elif shift > 0:
        return np.pad(intensities[:-shift], (shift, 0), mode='edge')
    else:
        return np.pad(intensities[-shift:], (0, -shift), mode='edge')

def scale_intensity(intensities, scale_range=(0.95, 1.05)):
    scale = np.random.uniform(*scale_range)
    return intensities * scale

def add_noise(intensities, noise_level=0.005):
    noise = np.random.normal(0, noise_level, size=intensities.shape)
    return intensities + noise

def apply_augmentation(intensities, force=False, augment_prob=0.5):
    if force or np.random.rand() < augment_prob:
        intensities = shift_spectrum(intensities)
        intensities = scale_intensity(intensities)
        intensities = add_noise(intensities)
    return intensities

# === Offline Augmentation: Save New Files ===

def generate_augmented_files(input_root="Data/ASL baseline corrected merged", num_augmentations=2):
    for folder in os.listdir(input_root):
        folder_path = os.path.join(input_root, folder)
        if not os.path.isdir(folder_path):
            continue

        for fname in os.listdir(folder_path):
            if fname.endswith('.txt') and "_aug" not in fname:
                original_path = os.path.join(folder_path, fname)
                try:
                    data = np.loadtxt(original_path)
                    x_values = data[:, 0]
                    y_values = data[:, 1]

                    for i in range(num_augmentations):
                        augmented_y = apply_augmentation(y_values)
                        augmented_data = np.column_stack((x_values, augmented_y))

                        out_name = fname.replace(".txt", f"_aug{i+1}.txt")
                        out_path = os.path.join(folder_path, out_name)
                        np.savetxt(out_path, augmented_data, fmt="%.6f")
                        print(f"Saved: {out_path}")
                except Exception as e:
                    print(f"Failed to process {original_path}: {e}")

# === Run offline augmentation only when executed directly ===

if __name__ == "__main__":
    generate_augmented_files()


class AugmentedWrapper(torch.utils.data.Dataset):
    def __init__(self, base_dataset, num_aug=1):
        self.base_dataset = base_dataset
        self.num_aug = num_aug

    def __len__(self):
        return len(self.base_dataset) * (self.num_aug + 1)

    def __getitem__(self, idx):
        base_len = len(self.base_dataset)
        base_idx = idx % base_len
        x, y = self.base_dataset[base_idx]

        if idx < base_len:
            return x, y  # Return original data
        else:
            x_aug = apply_augmentation(x.squeeze().numpy(), force=True)
            x_aug = (x_aug - x_aug.mean()) / (x_aug.std() + 1e-8)
            x = torch.tensor(x_aug[np.newaxis, :], dtype=torch.float32)
            return x, y