diff --git a/CHANGELOG.md b/CHANGELOG.md index 1adbbf2..90fa8bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Change log of DVH Analytics +v0.9.1 (TBD) +-------------------- + - [Options] Clicking 'Cancel' in user options properly reloads options from file now + - [Import] High resolution DVH calculation with interpolation for small volume ROIs [Issue 119](https://github.com/cutright/DVH-Analytics/issues/119) + - [Import] DVHs in the SQL DB now store 5 decimals instead 2 [Issue 119](https://github.com/cutright/DVH-Analytics/issues/119) + - [Import] Use dicompyler-core's memory mapping on dvh calculation MemoryError [Issue 119](https://github.com/cutright/DVH-Analytics/issues/119) + - [ROI Map] Allow commas in roi names [Issue 121](https://github.com/cutright/DVH-Analytics/issues/121) + v0.9.0 (2020.12.4) -------------------- - [Database] New feature to apply edits to database by uploading a CSV for SQL commands diff --git a/dvha/db/dicom_parser.py b/dvha/db/dicom_parser.py index 74fdf6d..51f0dfa 100644 --- a/dvha/db/dicom_parser.py +++ b/dvha/db/dicom_parser.py @@ -63,6 +63,10 @@ def __init__(self, plan_file=None, structure_file=None, dose_file=None, dose_sum self.dvh_bin_max_dose = options.dvh_bin_max_dose self.dvh_bin_max_dose_units = options.dvh_bin_max_dose_units self.mlca_options = options.MLC_ANALYZER_OPTIONS + self.get_dvh_kwargs = options.GET_DVH_KWARGS + self.dvh_small_volume_threshold = options.DVH_SMALL_VOLUME_THRESHOLD + self.dvh_high_resolution_factor = options.DVH_HIGH_RESOLUTION_FACTOR + self.dvh_high_resolution_segments_between = options.DVH_HIGH_RESOLUTION_SEGMENTS_BETWEEN self.plan_file = plan_file self.structure_file = structure_file @@ -437,14 +441,36 @@ def get_dvh_row(self, dvh_index): pass if dvh is None: + kwargs = {key: value for key, value in self.get_dvh_kwargs.items()} # make copy + kwargs['structure'] = self.rt_data['structure'] + kwargs['dose'] = self.rt_data['dose'] + kwargs['roi'] = dvh_index + kwargs['limit'] = limit + kwargs['callback'] = self.send_dvh_progress + try: - dvh = dvhcalc.get_dvh(self.rt_data['structure'], self.rt_data['dose'], dvh_index, - callback=self.send_dvh_progress, limit=limit) - except AttributeError: - dose = validate_transfer_syntax_uid(self.rt_data['dose']) - structure = validate_transfer_syntax_uid(self.rt_data['structure']) - dvh = dvhcalc.get_dvh(structure, dose, dvh_index, - callback=self.send_dvh_progress, limit=limit) + try: + dvh = dvhcalc.get_dvh(**kwargs) + except AttributeError: + kwargs['dose'] = validate_transfer_syntax_uid(self.rt_data['dose']) + kwargs['structure'] = validate_transfer_syntax_uid(self.rt_data['structure']) + dvh = dvhcalc.get_dvh(**kwargs) + except MemoryError: + kwargs['memmap_rtdose'] = True + dvh = dvhcalc.get_dvh(**kwargs) + + # If small volume, increase resolution + if dvh.volume < self.dvh_small_volume_threshold: + kwargs['interpolation_resolution'] = (self.rt_data['dose'].PixelSpacing[0] / self.dvh_high_resolution_factor, + self.rt_data['dose'].PixelSpacing[1] / self.dvh_high_resolution_factor) + kwargs['interpolation_segments_between_planes'] = self.dvh_high_resolution_segments_between + + try: + dvh = dvhcalc.get_dvh(**kwargs) + except MemoryError: + kwargs['memmap_rtdose'] = True + # dicompyler-core needs to re-parse the dose file + dvh = dvhcalc.get_dvh(**kwargs) if dvh and dvh.volume > 0: # ignore points and empty ROIs geometries = self.get_dvh_geometries(dvh_index) @@ -459,7 +485,7 @@ def get_dvh_row(self, dvh_index): 'min_dose': [dvh.min, 'real'], 'mean_dose': [dvh.mean, 'real'], 'max_dose': [dvh.max, 'real'], - 'dvh_string': [','.join(['%.2f' % num for num in dvh.counts]), 'text'], + 'dvh_string': [','.join(['%.5f' % num for num in dvh.counts]), 'text'], 'roi_coord_string': [geometries['roi_coord_str'], 'text'], 'dist_to_ptv_min': [None, 'real'], 'dist_to_ptv_mean': [None, 'real'], @@ -1681,7 +1707,7 @@ def validation(self): 'message': "MRN is empty."}, 'study_instance_uid': {'status': self.is_study_instance_uid_to_be_imported_valid, 'value': self.study_instance_uid_to_be_imported, - 'message': "Study Instance UID already exists in the database."}, + 'message': "Study Instance UID used in the database."}, 'ptv': {'status': self.ptv_exists, 'value': self.ptv_names, 'message': "No PTV found."}, diff --git a/dvha/db/sql_columns.py b/dvha/db/sql_columns.py index d4fc7ba..4602066 100644 --- a/dvha/db/sql_columns.py +++ b/dvha/db/sql_columns.py @@ -92,6 +92,7 @@ 'Beam Perimeter (Mean)': {'var_name': 'perim_mean', 'table': 'Beams', 'units': 'cm'}, 'Beam Perimeter (Median)': {'var_name': 'perim_median', 'table': 'Beams', 'units': 'cm'}, 'Beam Perimeter (Max)': {'var_name': 'perim_max', 'table': 'Beams', 'units': 'cm'}, + 'Fx Group Beam Count': {'var_name': 'fx_grp_beam_count', 'table': 'Beams', 'units': ''}, 'Control Point MU (Min)': {'var_name': 'cp_mu_min', 'table': 'Beams', 'units': ''}, 'Control Point MU (Mean)': {'var_name': 'cp_mu_mean', 'table': 'Beams', 'units': ''}, 'Control Point MU (Median)': {'var_name': 'cp_mu_median', 'table': 'Beams', 'units': ''}, diff --git a/dvha/dialogs/main.py b/dvha/dialogs/main.py index 9186033..fc2f3e1 100644 --- a/dvha/dialogs/main.py +++ b/dvha/dialogs/main.py @@ -673,6 +673,10 @@ def __init__(self, parent): self.dvh_bin_max_dose = wx.TextCtrl(self, wx.ID_ANY, "") self.dvh_bin_max_dose_units = wx.ComboBox(self, wx.ID_ANY, choices=self.options.dvh_bin_max_dose_options, style=wx.CB_DROPDOWN | wx.CB_READONLY) + self.dvh_small_volume_threshold = wx.SpinCtrl(self, wx.ID_ANY, "10", min=1, max=50, style=wx.SP_ARROW_KEYS) + self.dvh_segments_between = wx.SpinCtrl(self, wx.ID_ANY, "10", min=0, max=20, style=wx.SP_ARROW_KEYS) + self.dvh_high_resolution = wx.ComboBox(self, wx.ID_ANY, choices=self.options.DVH_HIGH_RESOLUTION_FACTOR_OPTIONS, + style=wx.CB_DROPDOWN | wx.CB_READONLY) self.combo_box_colors_category = wx.ComboBox(self, wx.ID_ANY, choices=color_variables, style=wx.CB_DROPDOWN | wx.CB_READONLY) self.combo_box_colors_selection = wx.ComboBox(self, wx.ID_ANY, choices=colors, @@ -722,6 +726,15 @@ def __set_properties(self): self.dvh_bin_max_dose.SetToolTip("Prevent memory issues if dose grid contains very large, unrealistic doses") self.dvh_bin_max_dose.SetValue(str(self.options.dvh_bin_max_dose[self.options.dvh_bin_max_dose_units])) self.dvh_bin_max_dose_units.SetValue(self.options.dvh_bin_max_dose_units) + self.dvh_small_volume_threshold.SetToolTip("If ROI volume is less than this value, it will be recalculated with a " + "resolution of the dose grid spacing divided by 16") + self.dvh_small_volume_threshold.SetValue(str(self.options.DVH_SMALL_VOLUME_THRESHOLD)) + self.dvh_segments_between.SetToolTip("If ROI volume is less than threshold, it will be recalculated with a this many " + "segments interpolated between slices") + self.dvh_segments_between.SetValue(str(self.options.DVH_HIGH_RESOLUTION_SEGMENTS_BETWEEN)) + self.dvh_high_resolution.SetToolTip("If ROI volume is less than the volume threshold, the in-plane resolution will be " + "increased by this factor (e.g., interpolate in-between the dose grid)") + self.dvh_high_resolution.SetValue(str(self.options.DVH_HIGH_RESOLUTION_FACTOR)) # self.dvh_bin_width_input.SetMinSize((50, 22)) # self.dvh_bin_max_dose_units.SetMinSize((50, 22)) self.combo_box_colors_category.SetMinSize((250, self.combo_box_colors_category.GetSize()[1])) @@ -751,6 +764,9 @@ def __do_layout(self): sizer_dvh_options = wx.StaticBoxSizer(wx.StaticBox(self, wx.ID_ANY, "DVH Options"), wx.VERTICAL) sizer_dvh_bin_width = wx.BoxSizer(wx.HORIZONTAL) sizer_dvh_bin_max = wx.BoxSizer(wx.HORIZONTAL) + sizer_dvh_small_vol = wx.BoxSizer(wx.HORIZONTAL) + sizer_dvh_segments_between = wx.BoxSizer(wx.HORIZONTAL) + sizer_dvh_high_resolution = wx.BoxSizer(wx.HORIZONTAL) sizer_alpha = wx.BoxSizer(wx.VERTICAL) sizer_alpha_input = wx.BoxSizer(wx.HORIZONTAL) sizer_line_styles = wx.BoxSizer(wx.VERTICAL) @@ -788,21 +804,42 @@ def __do_layout(self): sizer_dicom_directories.Add(sizer_imported_wrapper, 1, wx.EXPAND, 0) sizer_wrapper.Add(sizer_dicom_directories, 0, wx.ALL | wx.EXPAND, 10) - sizer_dvh_options.Add(self.checkbox_dicom_dvh, 0, wx.ALL, 5) + sizer_dvh_options.Add(self.checkbox_dicom_dvh, 0, wx.LEFT, 5) sizer_dvh_options.Add((20, 10), 0, 0, 0) label_dvh_bin_width = wx.StaticText(self, wx.ID_ANY, "DVH Bin Width (cGy):") label_dvh_bin_width.SetToolTip("Value must be an integer") - sizer_dvh_bin_width.Add(label_dvh_bin_width, 0, wx.EXPAND | wx.TOP | wx.LEFT, 5) - sizer_dvh_bin_width.Add(self.dvh_bin_width_input, 1, wx.ALL, 5) + sizer_dvh_bin_width.Add(label_dvh_bin_width, 1, wx.EXPAND | wx.TOP | wx.LEFT, 5) + sizer_dvh_bin_width.Add(self.dvh_bin_width_input, 0, wx.ALL, 5) label_max_dose_bin = wx.StaticText(self, wx.ID_ANY, "Max Dose Bin Limit:") - label_max_dose_bin.SetToolTip("Prevent memory issues if dose grid contains very large, unrealistic doses") - sizer_dvh_bin_max.Add(label_max_dose_bin, 0, wx.EXPAND | wx.TOP | wx.LEFT, 5) + label_max_dose_bin.SetToolTip("Prevent memory issues if dose grid contains very large doses") + sizer_dvh_bin_max.Add(label_max_dose_bin, 1, wx.EXPAND | wx.TOP | wx.LEFT, 5) sizer_dvh_bin_max.Add((20, 20), 0, 0, 0) - sizer_dvh_bin_max.Add(self.dvh_bin_max_dose, 0, wx.EXPAND, 0) + sizer_dvh_bin_max.Add(self.dvh_bin_max_dose, 0, 0, 0) sizer_dvh_bin_max.Add((20, 20), 0, 0, 0) - sizer_dvh_bin_max.Add(self.dvh_bin_max_dose_units, 0, wx.EXPAND, 0) - sizer_dvh_options.Add(sizer_dvh_bin_width, 0, wx.BOTTOM, 10) - sizer_dvh_options.Add(sizer_dvh_bin_max, 0, 0, 0) + sizer_dvh_bin_max.Add(self.dvh_bin_max_dose_units, 0, 0, 0) + sizer_dvh_options.Add(sizer_dvh_bin_width, 0, wx.EXPAND | wx.BOTTOM, 0) + sizer_dvh_options.Add(sizer_dvh_bin_max, 0, wx.EXPAND, 0) + + label_dvh_small_volume = wx.StaticText(self, wx.ID_ANY, "Small volume threshold (cc):") + label_dvh_small_volume.SetToolTip("If ROI volume is less than this value, it will be recalculated with " + "a higher resolution using interpolation") + sizer_dvh_small_vol.Add(label_dvh_small_volume, 1, wx.EXPAND | wx.TOP, 5) + sizer_dvh_small_vol.Add(self.dvh_small_volume_threshold, 0, wx.TOP, 5) + sizer_dvh_options.Add(sizer_dvh_small_vol, 0, wx.EXPAND | wx.TOP | wx.LEFT, 5) + + label_dvh_segments_between = wx.StaticText(self, wx.ID_ANY, "Interpolated segments between planes:") + label_dvh_segments_between.SetToolTip("If ROI volume is less than threshold, it will be recalculated with a this many " + "segments interpolated between slices") + sizer_dvh_segments_between.Add(label_dvh_segments_between, 1, wx.EXPAND | wx.TOP, 5) + sizer_dvh_segments_between.Add(self.dvh_segments_between, 0, wx.TOP, 5) + sizer_dvh_options.Add(sizer_dvh_segments_between, 0, wx.EXPAND | wx.TOP | wx.LEFT, 5) + + label_dvh_high_resolution = wx.StaticText(self, wx.ID_ANY, "High resolution interpolation factor :") + label_dvh_high_resolution.SetToolTip("If ROI volume is less than the volume threshold, the in-plane resolution will be " + "increased by this factor (e.g., interpolate in-between the dose grid)") + sizer_dvh_high_resolution.Add(label_dvh_high_resolution, 1, wx.EXPAND | wx.TOP, 5) + sizer_dvh_high_resolution.Add(self.dvh_high_resolution, 0, wx.TOP, 5) + sizer_dvh_options.Add(sizer_dvh_high_resolution, 0, wx.EXPAND | wx.TOP | wx.LEFT, 5) sizer_wrapper.Add(sizer_dvh_options, 0, wx.ALL | wx.EXPAND, 10) @@ -868,6 +905,9 @@ def __do_bind(self): self.Bind(wx.EVT_TEXT, self.update_dvh_bin_width_val, id=self.dvh_bin_width_input.GetId()) self.Bind(wx.EVT_TEXT, self.update_dvh_bin_max_dose_val, id=self.dvh_bin_max_dose.GetId()) self.Bind(wx.EVT_COMBOBOX, self.update_dvh_bin_max_dose_units_val, id=self.dvh_bin_max_dose_units.GetId()) + self.Bind(wx.EVT_TEXT, self.update_dvh_small_volume_val, id=self.dvh_small_volume_threshold.GetId()) + self.Bind(wx.EVT_TEXT, self.update_dvh_segments_between, id=self.dvh_segments_between.GetId()) + self.Bind(wx.EVT_COMBOBOX, self.update_dvh_high_resolution_factor, id=self.dvh_high_resolution.GetId()) self.Bind(wx.EVT_COMBOBOX, self.update_input_colors_var, id=self.combo_box_colors_category.GetId()) self.Bind(wx.EVT_COMBOBOX, self.update_size_var, id=self.combo_box_sizes_category.GetId()) self.Bind(wx.EVT_COMBOBOX, self.update_line_width_var, id=self.combo_box_line_widths_category.GetId()) @@ -893,8 +933,8 @@ def on_ok(self, *evt): self.close() def on_cancel(self, *evt): + self.options.load() if self.is_edited: # Tracks edits since last options save - self.options.load() self.apply_and_redraw_plots() self.close() @@ -1074,6 +1114,24 @@ def update_dvh_bin_max_dose_units_val(self, *args): def update_dvh_bin_max_dose_units_var(self, *args): self.dvh_bin_max_dose_units.SetValue(self.options.dvh_bin_max_dose_units) + def update_dvh_small_volume_val(self, *args): + try: + new = int(float(self.dvh_small_volume_threshold.GetValue())) + self.options.set_option('DVH_SMALL_VOLUME_THRESHOLD', new) + except ValueError: + self.dvh_small_volume_threshold.SetValue(str(self.options.DVH_SMALL_VOLUME_THRESHOLD)) + + def update_dvh_segments_between(self, *args): + try: + new = int(float(self.dvh_segments_between.GetValue())) + self.options.set_option('DVH_HIGH_RESOLUTION_SEGMENTS_BETWEEN', new) + except ValueError: + self.dvh_small_volume_threshold.SetValue(str(self.options.DVH_HIGH_RESOLUTION_SEGMENTS_BETWEEN)) + + def update_dvh_high_resolution_factor(self, *args): + new = int(float(self.dvh_high_resolution.GetValue())) + self.options.set_option('DVH_HIGH_RESOLUTION_FACTOR', new) + def refresh_options(self): self.update_dvh_bin_width_var() self.update_dvh_bin_max_dose_var() diff --git a/dvha/models/dvh.py b/dvha/models/dvh.py index c455d8a..1fd0bd1 100644 --- a/dvha/models/dvh.py +++ b/dvha/models/dvh.py @@ -85,7 +85,7 @@ def __init__(self, uid=None, dvh_condition=None, dvh_bin_width=5, group=1): for i in range(self.count): # Process dvh_string to numpy array, and pad with zeros at the end # so that all dvhs are the same length - current_dvh = np.array(dvh_split[i], dtype='|S4').astype(np.float) + current_dvh = np.array(dvh_split[i], dtype=np.float) current_dvh_max = np.max(current_dvh) if current_dvh_max > 0: current_dvh = np.divide(current_dvh, current_dvh_max) @@ -96,7 +96,7 @@ def __init__(self, uid=None, dvh_condition=None, dvh_bin_width=5, group=1): for i in range(self.count): # Process dth_string to numpy array try: - self.dth.append(np.array(self.dth_string[i].split(','), dtype='|S4').astype(np.float)) + self.dth.append(np.array(self.dth_string[i].split(','), dtype=np.float)) except Exception: self.dth.append(np.array([0])) diff --git a/dvha/options.py b/dvha/options.py index 0474754..16b5e45 100644 --- a/dvha/options.py +++ b/dvha/options.py @@ -219,6 +219,16 @@ def __init__(self): self.SHOW_NEW_PTV_CALC_WARNING = True + self.GET_DVH_KWARGS = {'calculate_full_volume': True, + 'use_structure_extents': False, + 'interpolation_resolution': None, + 'interpolation_segments_between_planes': 0, + 'memmap_rtdose': False} + self.DVH_SMALL_VOLUME_THRESHOLD = 10 # compute high resolution DVH if volume less than this (cc) + self.DVH_HIGH_RESOLUTION_FACTOR = 8 # Must be a factor of 2 + self.DVH_HIGH_RESOLUTION_FACTOR_OPTIONS = ['2', '4', '8', '16', '32'] + self.DVH_HIGH_RESOLUTION_SEGMENTS_BETWEEN = 3 # Must be int + class Options(DefaultOptions): def __init__(self): diff --git a/dvha/tools/roi_name_manager.py b/dvha/tools/roi_name_manager.py index 1c5ef1f..dd4fc18 100644 --- a/dvha/tools/roi_name_manager.py +++ b/dvha/tools/roi_name_manager.py @@ -19,7 +19,7 @@ from dvha.paths import PREF_DIR from dvha.tools.roi_map_generator import ROIMapGenerator from dvha.tools.errors import push_to_log -from dvha.tools.utilities import flatten_list_of_lists, initialize_directories +from dvha.tools.utilities import flatten_list_of_lists, initialize_directories, csv_to_list class PhysicianROI: @@ -38,6 +38,7 @@ def __contains__(self, variation): def add_variations(self, variations): if type(variations) is not list: variations = [variations] + variations = [v.replace('"', "`") for v in variations] clean_variations = self.clean_variations for variation in variations: if clean_name(variation) not in clean_variations and variation.lower() not in {'uncategorized'}: @@ -99,6 +100,7 @@ def __contains__(self, variation): return clean_name(variation) in self.all_clean_variations def add_physician_roi(self, institutional_roi, physician_roi, variations=None, roi_type='NONE'): + physician_roi = physician_roi.replace(':', '^').replace('"', "'") self.rois[physician_roi] = PhysicianROI(physician_roi, institutional_roi, roi_type) if variations is not None: self.add_variations(physician_roi, variations) @@ -273,7 +275,8 @@ def import_physician_roi_map(self, abs_file_path, physician=None): for line in document: if not line: continue - line = str(line).strip().replace(':', ',').split(',') + line = str(line).strip().replace(':', ',', 1).replace(':', ',', 1) + line = csv_to_list(line) institutional_roi = line.pop(0) if institutional_mode: self.add_institutional_roi(institutional_roi) @@ -332,6 +335,7 @@ def get_institutional_roi(self, physician, physician_roi): return self.physicians[physician].get_institutional_roi(physician_roi) def add_institutional_roi(self, roi): + roi = roi.replace(':', '^').replace('"', "'") if clean_name(roi) not in self.clean_institutional_rois: self.institutional_rois.append(roi) self.add_physician_roi('DEFAULT', roi, roi) @@ -510,9 +514,9 @@ def physician_roi_file_data(self): for physician in list(self.physicians) + ['DEFAULT']: lines = [] for physician_roi in self.get_physician_rois(physician): - institutional_roi = self.get_institutional_roi(physician, physician_roi) - variations = ', '.join(self.get_variations(physician, physician_roi)) - lines.append(': '.join([institutional_roi, physician_roi, variations])) + institutional_roi = '"%s"' % self.get_institutional_roi(physician, physician_roi) + variations = '"%s"' % '","'.join(self.get_variations(physician, physician_roi)) + lines.append(':'.join([institutional_roi, '"%s"' % physician_roi, variations])) lines.sort() physicians_file_data[physician] = lines @@ -573,7 +577,10 @@ def get_roi_map_changes(self): for line in difflib.unified_diff(old_lines, new_data[physician]): if include: if line[0] in {'+', '-'}: - i_roi, p_roi, variations = tuple(i for i in line.split(': ')) + line_split = line.split(':') + i_roi, p_roi = line_split[0].strip(), line_split[1].strip() + variations = ':'.join(line_split[2:]) + if p_roi not in diff[physician]: diff[physician][p_roi] = {'-': {'institutional': '', 'variations': []}, '+': {'institutional': '', 'variations': []}} diff --git a/dvha/tools/utilities.py b/dvha/tools/utilities.py index d37a07c..10a3fde 100644 --- a/dvha/tools/utilities.py +++ b/dvha/tools/utilities.py @@ -932,3 +932,79 @@ def edit_study_uid(abs_file_path, study_uid): except Exception as e: push_to_log(e, abs_file_path) + +def get_csv_row(data, columns, delimiter=","): + """Convert a dictionary of data into a row for a csv file + + Parameters + ---------- + data : dict + a dictionary with values with str representations + columns : list + a list of keys dictating the order of the csv + delimiter : str + Optionally use the provided delimiter rather than a comma + + Returns + ---------- + str + a csv string delimited by delimiter + """ + str_data = [str(data[c]) for c in columns] + clean_str_data = ['"%s"' % s if delimiter in s else s for s in str_data] + clean_str_data = [s.replace("\n", "<>") for s in clean_str_data] + return delimiter.join(clean_str_data) + + +def csv_to_list(csv_str, delimiter=","): + """Split a CSV into a list + + Parameters + ---------- + csv_str : str + A comma-separated value string (with double quotes around values + containing the delimiter) + delimiter : str + The str separator between values + + Returns + ---------- + list + csv_str split by the delimiter + """ + if '"' not in csv_str: + return csv_str.split(delimiter) + + # add an empty value with another ",", but ignore it + # ensures next_csv_element always finds a "," + next_value, csv_str = next_csv_element(csv_str + ",", delimiter) + ans = [next_value.replace("<>", "\n")] + while csv_str: + next_value, csv_str = next_csv_element(csv_str, delimiter) + ans.append(next_value.replace("<>", "\n")) + + return ans + + +def next_csv_element(csv_str, delimiter=","): + """Helper function for csv_to_list + + Parameters + ---------- + csv_str : str + A comma-separated value string (with double quotes around values + containing the delimiter) + delimiter : str + The str separator between values + + Returns + ---------- + str, str + Return a tuple, the next value and remainder of csv_str + """ + if csv_str.startswith('"'): + split = csv_str[1:].find('"') + 1 + return csv_str[1:split], csv_str[split + 2 :] + + next_delimiter = csv_str.find(delimiter) + return csv_str[:next_delimiter], csv_str[next_delimiter + 1 :] diff --git a/requirements.txt b/requirements.txt index ff0568c..72bc0db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ regressors rapidfuzz selenium pandas <= 0.25.3 +scikit-image