recovery hot fixes

2021-05-31 13:54:00 +02:00
parent 656815015a
commit f0b20b90c7
7 changed files with 305 additions and 316 deletions
--- a/src/scgenerator/cli/cli.py
+++ b/src/scgenerator/cli/cli.py
@@ -32,25 +32,16 @@ def create_parser():
        action="store_true",
        help="force not to use ray",
    )
    parser.add_argument("--output-name", "-o", help="path to the final output folder", default=None)
    run_parser = subparsers.add_parser("run", help="run a simulation from a config file")
    run_parser.add_argument("configs", help="path(s) to the toml configuration file(s)", nargs="+")
    run_parser.add_argument(
        "--append-to",
        "-a",
        help="optional directory where a compatible simulation has already been ran",
        default=None,
    )
    run_parser.add_argument(
        "--output-name", "-o", help="path to the final output folder", default=None
    )
    run_parser.set_defaults(func=run_sim)
    resume_parser = subparsers.add_parser("resume", help="resume a simulation")
    resume_parser.add_argument(
-        "data_dir",
+        "sim_dir",
-        help="path to the directory where the initial_config.toml and the data is stored",
+        help="path to the directory where the initial_config.toml and the partial data is stored",
    )
    resume_parser.add_argument(
        "configs",
@@ -64,9 +55,6 @@ def create_parser():
    merge_parser.add_argument(
        "path", help="path to the final simulation folder containing 'initial_config.toml'"
    )
    merge_parser.add_argument(
        "--output-name", "-o", help="path to the final output folder", default=None
    )
    merge_parser.set_defaults(func=merge)
    return parser
@@ -107,9 +95,11 @@ def prep_ray(args):
 def resume_sim(args):
    method = prep_ray(args)
-    sim = resume_simulations(args.data_dir, method=method)
+    sim = resume_simulations(args.sim_dir, method=method)
    sim.run()
-    run_simulation_sequence(*args.configs, method=method, prev_data_folder=sim.data_folder)
+    run_simulation_sequence(
        *args.configs, method=method, prev_sim_dir=sim.data_folder, final_name=args.output_name
    )
 if __name__ == "__main__":
--- a/src/scgenerator/const.py
+++ b/src/scgenerator/const.py
@@ -1,4 +1,5 @@
 import numpy as np
 from numpy.lib.arraysetops import isin
 def pbar_format(worker_id: int):
@@ -98,11 +99,20 @@ def fit_parameters(param):
    return True
-def string(l):
+def string(l=None):
-    def _string(s):
+    if l is None:
-        return isinstance(s, str) and s.lower() in l
+
        def _string(s):
            return isinstance(s, str)
        _string.__doc__ = f"must be a str"
    else:
        def _string(s):
            return isinstance(s, str) and s.lower() in l
        _string.__doc__ = f"must be a str matching one of {l}"
    _string.__doc__ = f"must be a str matching one of {l}"
    return _string
@@ -123,8 +133,8 @@ def capillary_nested(n):
 valid_param_types = dict(
    root=dict(
-        name=lambda s: isinstance(s, str),
+        name=string(),
-        prev_data_dir=lambda s: isinstance(s, str),
+        prev_data_dir=string(),
    ),
    fiber=dict(
        input_transmission=in_range_incl(num, (0, 1)),
@@ -138,7 +148,7 @@ valid_param_types = dict(
        he_mode=he_mode,
        fit_parameters=fit_parameters,
        beta=beta,
-        dispersion_file=lambda s: isinstance(s, str),
+        dispersion_file=string(),
        model=string(["pcf", "marcatili", "marcatili_adjusted", "hasan", "custom"]),
        length=in_range_excl(num, (0, 1e9)),
        capillary_num=integer,
@@ -156,7 +166,7 @@ valid_param_types = dict(
    ),
    pulse=dict(
        field_0=field_0,
-        field_file=lambda s: isinstance(s, str),
+        field_file=string(),
        repetition_rate=num,
        peak_power=num,
        mean_power=num,
@@ -184,6 +194,7 @@ valid_param_types = dict(
        lower_wavelength_interp_limit=in_range_excl(num, (100e-9, 3000e-9)),
        upper_wavelength_interp_limit=in_range_excl(num, (100e-9, 5000e-9)),
        frep=num,
        prev_sim_dir=string(),
    ),
 )
--- a/src/scgenerator/errors.py
+++ b/src/scgenerator/errors.py
@@ -32,7 +32,7 @@ class DuplicateParameterError(Exception):
    pass
-class IncompleteDataFolderError(Exception):
+class IncompleteDataFolderError(FileNotFoundError):
    pass
--- a/src/scgenerator/initialize.py
+++ b/src/scgenerator/initialize.py
@@ -46,24 +46,26 @@ class ParamSequence(Mapping):
 class ContinuationParamSequence(ParamSequence):
-    def __init__(self, prev_data_folder: str, new_config: Dict[str, Any]):
+    def __init__(self, prev_sim_dir: str, new_config: Dict[str, Any]):
        """Parameter sequence that builds on a previous simulation but with a new configuration
        It is recommended that only the fiber and the number of points stored may be changed and
        changing other parameters could results in unexpected behaviors. The new config doesn't have to
-        be a full configuration (specify only the new parameters).
+        be a full configuration (i.e. you can specify only the parameters that change).
        Parameters
        ----------
-        prev_data_folder : str
+        prev_sim_dir : str
            path to the folder of the previous simulation containing 'initial_config.toml'
        new_config : Dict[str, Any]
            new config
        """
-        self.path = Path(prev_data_folder)
+        self.prev_sim_dir = Path(prev_sim_dir)
-        init_config = io.load_previous_parameters(os.path.join(self.path, "initial_config.toml"))
+        init_config = io.load_previous_parameters(
            os.path.join(self.prev_sim_dir, "initial_config.toml")
        )
        self.prev_variable_lists = [
-            (set(variable_list[1:]), self.path / utils.format_variable_list(variable_list))
+            (set(variable_list[1:]), self.prev_sim_dir / utils.format_variable_list(variable_list))
            for variable_list, _ in required_simulations(init_config)
        ]
@@ -74,11 +76,11 @@ class ContinuationParamSequence(ParamSequence):
        """iterates through all possible parameters, yielding a config as well as a flattened
        computed parameters set each time"""
        for variable_list, full_config in required_simulations(self.config):
-            prev_sim_folder = self.find_prev_data_folder(variable_list)
+            prev_data_dir = self.find_prev_data_dir(variable_list)
-            full_config["prev_data_dir"] = str(prev_sim_folder.resolve())
+            full_config["prev_data_dir"] = str(prev_data_dir.resolve())
-            yield variable_list, compute_subsequent_paramters(prev_sim_folder, full_config)
+            yield variable_list, compute_init_parameters(full_config)
-    def find_prev_data_folder(self, new_variable_list: List[Tuple[str, Any]]) -> Path:
+    def find_prev_data_dir(self, new_variable_list: List[Tuple[str, Any]]) -> Path:
        """finds the previous simulation data that this new config should start from
        Parameters
@@ -102,7 +104,7 @@ class ContinuationParamSequence(ParamSequence):
                return path
        raise ValueError(
-            f"cannot find a previous data folder for {new_variable_list} in {self.path}"
+            f"cannot find a previous data folder for {new_variable_list} in {self.prev_sim_dir}"
        )
@@ -114,7 +116,7 @@ class RecoveryParamSequence(ParamSequence):
        z_num = config["simulation"]["z_num"]
        started = self.num_sim
-        sub_folders = io.get_data_subfolders(io.get_data_folder(self.id))
+        sub_folders = io.get_data_subfolders(self.id)
        pbar_store = utils.PBars(
            tqdm(
@@ -138,20 +140,63 @@ class RecoveryParamSequence(ParamSequence):
        self.num_steps += started * z_num
        self.single_sim = self.num_sim == 1
-    def __iter__(self) -> Iterator[Tuple[List[Tuple[str, Any]], dict]]:
+        self.prev_sim_dir = None
-        for variable_list, full_config in required_simulations(self.config):
+        if "prev_sim_dir" in self.config.get("simulation", {}):
-
+            self.prev_sim_dir = Path(self.config["simulation"]["prev_sim_dir"])
-            data_dir = os.path.join(
+            init_config = io.load_previous_parameters(
-                io.get_data_folder(self.id), utils.format_variable_list(variable_list)
+                os.path.join(self.prev_sim_dir, "initial_config.toml")
            )
            self.prev_variable_lists = [
                (
                    set(variable_list[1:]),
                    self.prev_sim_dir / utils.format_variable_list(variable_list),
                )
                for variable_list, _ in required_simulations(init_config)
            ]
-            if not io.propagation_initiated(data_dir):
+    def __iter__(self) -> Iterator[Tuple[List[Tuple[str, Any]], dict]]:
-                yield variable_list, compute_init_parameters(full_config)
+        for variable_list, params in required_simulations(self.config):
            data_dir = io.get_data_folder(self.id) / utils.format_variable_list(variable_list)
            if not data_dir.is_dir() or io.find_last_spectrum_num(data_dir) == 0:
                if (prev_data_dir := self.find_prev_data_dir(variable_list)) is not None:
                    params["prev_data_dir"] = str(prev_data_dir)
                yield variable_list, compute_init_parameters(params)
            elif io.num_left_to_propagate(data_dir, self.config["simulation"]["z_num"]) != 0:
-                yield variable_list, recover_params(full_config, data_dir)
+                yield variable_list, recover_params(params, data_dir)
            else:
                continue
    def find_prev_data_dir(self, new_variable_list: List[Tuple[str, Any]]) -> Path:
        """finds the previous simulation data that this new config should start from
        Parameters
        ----------
        new_variable_list : List[Tuple[str, Any]]
            as yielded by required_simulations
        Returns
        -------
        Path
            path to the data folder
        Raises
        ------
        ValueError
            no data folder found
        """
        if self.prev_sim_dir is None:
            return None
        to_test = set(new_variable_list[1:])
        for old_v_list, path in self.prev_variable_lists:
            if to_test.issuperset(old_v_list):
                return path
        raise ValueError(
            f"cannot find a previous data folder for {new_variable_list} in {self.prev_sim_dir}"
        )
 def validate(config: dict) -> dict:
    """validates a configuration dictionary and attempts to fill in defaults
@@ -517,20 +562,19 @@ def _ensure_consistency(config):
    return config
-def recover_params(config: Dict[str, Any], data_folder: os.PathLike) -> Dict[str, Any]:
+def recover_params(config: Dict[str, Any], data_folder: Path) -> Dict[str, Any]:
    path = Path(data_folder)
    params = compute_init_parameters(config)
    try:
-        prev_params = io.load_toml(path / "params.toml")
+        prev_params = io.load_toml(data_folder / "params.toml")
    except FileNotFoundError:
        prev_params = {}
    for k, v in prev_params.items():
        params.setdefault(k, v)
-    num, last_spectrum = io.load_last_spectrum(str(path))
+    num, last_spectrum = io.load_last_spectrum(data_folder)
    params["spec_0"] = last_spectrum
    params["field_0"] = np.fft.ifft(last_spectrum)
    params["recovery_last_stored"] = num
-    params["cons_qty"] = np.load(os.path.join(data_folder, "cons_qty.npy"))
+    params["cons_qty"] = np.load(data_folder / "cons_qty.npy")
    return params
@@ -561,26 +605,7 @@ def compute_init_parameters(config: Dict[str, Any]) -> Dict[str, Any]:
    params = _generate_sim_grid(params)
    # Initial field may influence the grid
-    custom_field = False
+    custom_field = setup_custom_field(params)
    if "field_file" in params:
        custom_field = True
        field_data = np.load(params["field_file"])
        field_interp = interp1d(
            field_data["time"], field_data["field"], bounds_error=False, fill_value=(0, 0)
        )
        params["field_0"] = field_interp(params["t"])
        params = _comform_custom_field(params)
    elif "field_0" in params:
        custom_field = True
        params = _evalutate_custom_field_equation(params)
        params = _comform_custom_field(params)
    # central wavelength may be off with custom fields
    if custom_field:
        delta_w = params["w_c"][np.argmax(abs2(np.fft.fft(params["field_0"])))]
        logger.debug(f"had to adjust w by {delta_w}")
        params["wavelength"] = units.m.inv(units.m(params["wavelength"]) - delta_w)
        _update_frequency_domain(params)
    if "step_size" in params:
        params["error_ok"] = params["step_size"]
@@ -650,28 +675,39 @@ def compute_init_parameters(config: Dict[str, Any]) -> Dict[str, Any]:
    return params
-def compute_subsequent_paramters(sim_folder: str, config: Dict[str, Any]) -> Dict[str, Any]:
+def setup_custom_field(params: Dict[str, Any]) -> bool:
-    params = compute_init_parameters(config)
+    logger = get_logger(__name__)
-    spec = io.load_last_spectrum(sim_folder)[1]
+    custom_field = True
-    params["field_0"] = np.fft.ifft(spec) * params["input_transmission"]
+    if "prev_data_dir" in params:
-    params["spec_0"] = np.fft.fft(params["field_0"])
+        spec = io.load_last_spectrum(Path(params["prev_data_dir"]))[1]
        params["field_0"] = np.fft.ifft(spec) * params["input_transmission"]
    elif "field_file" in params:
        field_data = np.load(params["field_file"])
        field_interp = interp1d(
            field_data["time"], field_data["field"], bounds_error=False, fill_value=(0, 0)
        )
        params["field_0"] = field_interp(params["t"])
    elif "field_0" in params:
        params = _evalutate_custom_field_equation(params)
    else:
        custom_field = False
-    return params
+    if custom_field:
-
+        params["field_0"] = params["field_0"] * pulse.modify_field_ratio(
-
+            params["t"],
-def _comform_custom_field(params):
+            params["field_0"],
-    params["field_0"] = params["field_0"] * pulse.modify_field_ratio(
+            params.get("peak_power"),
-        params["t"],
+            params.get("energy"),
-        params["field_0"],
+            params.get("intensity_noise"),
-        params.get("peak_power"),
+        )
-        params.get("energy"),
+        params["width"], params["peak_power"], params["energy"] = pulse.measure_field(
-        params.get("intensity_noise"),
+            params["t"], params["field_0"]
-    )
+        )
-    params["width"], params["peak_power"], params["energy"] = pulse.measure_field(
+        delta_w = params["w_c"][np.argmax(abs2(np.fft.fft(params["field_0"])))]
-        params["t"], params["field_0"]
+        logger.debug(f"had to adjust w by {delta_w}")
-    )
+        params["wavelength"] = units.m.inv(units.m(params["wavelength"]) - delta_w)
-
+        _update_frequency_domain(params)
-    return params
+    return custom_field
 def _update_pulse_parameters(params):
--- a/src/scgenerator/io.py
+++ b/src/scgenerator/io.py
@@ -126,21 +126,24 @@ class DataBuffer:
 #     return os.path.normpath(p)
 def conform_toml_path(path: os.PathLike) -> Path:
    path = Path(path)
    if not path.name.lower().endswith(".toml"):
        path = path.parent / (path.name + ".toml")
    return path
 def load_toml(path: os.PathLike):
    """returns a dictionary parsed from the specified toml file"""
-    path = str(path)
+    path = conform_toml_path(path)
    if not path.lower().endswith(".toml"):
        path += ".toml"
    with open(path, mode="r") as file:
        dico = toml.load(file)
    return dico
-def save_toml(path, dico):
+def save_toml(path: os.PathLike, dico):
    """saves a dictionary into a toml file"""
-    path = str(path)
+    path = conform_toml_path(path)
    if not path.lower().endswith(".toml"):
        path += ".toml"
    with open(path, mode="w") as file:
        toml.dump(dico, file)
    return dico
@@ -156,7 +159,7 @@ def serializable(val):
    return out
-def _prepare_for_serialization(dico):
+def _prepare_for_serialization(dico: Dict[str, Any]):
    """prepares a dictionary for serialization. Some keys may not be preserved
    (dropped due to no conversion available)
@@ -183,40 +186,56 @@ def _prepare_for_serialization(dico):
    return out
-def save_parameters(param_dict, file_name="param"):
+def save_parameters(param_dict: Dict[str, Any], task_id: int, data_dir_name: str):
    """Writes the flattened parameters dictionary specific to a single simulation into a toml file
    Parameters
    ----------
        param_dict : dictionary of parameters. Only floats, int and arrays of
                     non complex values are stored in the json
        folder_name : folder where to save the files (relative to cwd)
        file_name : name of the readable file.
    """
    param = param_dict.copy()
-
+    file_path = generate_file_path("params.toml", task_id, data_dir_name)
    folder_name, file_name = os.path.split(file_name)
    folder_name = "tmp" if folder_name == "" else folder_name
    file_name = os.path.splitext(file_name)[0]
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    param = _prepare_for_serialization(param)
    param["datetime"] = datetime.now()
    file_path.parent.mkdir(exist_ok=True)
    # save toml of the simulation
-    with open(os.path.join(folder_name, file_name + ".toml"), "w") as file:
+    with open(file_path, "w") as file:
        toml.dump(param, file, encoder=toml.TomlNumpyEncoder())
-    return os.path.join(folder_name, file_name)
+    return file_path
-def load_previous_parameters(path: str):
+# def save_parameters_old(param_dict, file_name="param"):
 #     """Writes the flattened parameters dictionary specific to a single simulation into a toml file
 #     Parameters
 #     ----------
 #         param_dict : dictionary of parameters. Only floats, int and arrays of
 #                      non complex values are stored in the json
 #         folder_name : folder where to save the files (relative to cwd)
 #         file_name : name of the readable file.
 #     """
 #     param = param_dict.copy()
 #     folder_name, file_name = os.path.split(file_name)
 #     folder_name = "tmp" if folder_name == "" else folder_name
 #     file_name = os.path.splitext(file_name)[0]
 #     if not os.path.exists(folder_name):
 #         os.makedirs(folder_name)
 #     param = _prepare_for_serialization(param)
 #     param["datetime"] = datetime.now()
 #     # save toml of the simulation
 #     with open(os.path.join(folder_name, file_name + ".toml"), "w") as file:
 #         toml.dump(param, file, encoder=toml.TomlNumpyEncoder())
 #     return os.path.join(folder_name, file_name)
 def load_previous_parameters(path: os.PathLike):
    """loads a parameters toml files and converts data to appropriate type
    Parameters
    ----------
-    path : str
+    path : PathLike
        path to the toml
    Returns
@@ -248,31 +267,17 @@ def load_material_dico(name):
        return toml.loads(Paths.gets("gas"))[name]
 # def set_environ(config: dict):
 #     """sets environment variables specified in the config
 #     Parameters
 #     ----------
 #     config : dict
 #         whole simulation config file
 #     """
 #     environ = config.get("environment", {})
 #     for k, v in environ.get("path_prefixes", {}).items():
 #         os.environ[(PREFIX_KEY_BASE + k).upper()] = v
 def get_all_environ() -> Dict[str, str]:
    """returns a dictionary of all environment variables set by any instance of scgenerator"""
    d = dict(filter(lambda el: el[0].startswith(ENVIRON_KEY_BASE), os.environ.items()))
    print(d)
    return d
-def load_single_spectrum(folder, index) -> np.ndarray:
+def load_single_spectrum(folder: Path, index) -> np.ndarray:
-    return np.load(os.path.join(folder, f"spectra_{index}.npy"))
+    return np.load(folder / f"spectra_{index}.npy")
-def get_data_subfolders(path: str) -> List[str]:
+def get_data_subfolders(task_id: int) -> List[Path]:
    """returns a list of relative path/subfolders in the specified directory
    Parameters
@@ -285,12 +290,11 @@ def get_data_subfolders(path: str) -> List[str]:
    List[str]
        paths to sub folders
    """
-    sub_folders = glob(os.path.join(path, "*"))
+
-    sub_folders = list(filter(os.path.isdir, sub_folders))
+    return [p.resolve() for p in get_data_folder(task_id).glob("*") if p.is_dir()]
    return sub_folders
-def check_data_integrity(sub_folders: List[str], init_z_num: int):
+def check_data_integrity(sub_folders: List[Path], init_z_num: int):
    """checks the integrity and completeness of a simulation data folder
    Parameters
@@ -312,18 +316,12 @@ def check_data_integrity(sub_folders: List[str], init_z_num: int):
            )
-def propagation_initiated(sub_folder) -> bool:
+def num_left_to_propagate(sub_folder: Path, init_z_num: int) -> int:
    if os.path.isdir(sub_folder):
        return find_last_spectrum_num(sub_folder) > 0
    return False
 def num_left_to_propagate(sub_folder: str, init_z_num: int) -> int:
    """checks if a propagation has completed
    Parameters
    ----------
-    sub_folder : str
+    sub_folder : Path
        path to the sub folder containing the spectra
    init_z_num : int
        number of z position to store as specified in the master config file
@@ -338,7 +336,7 @@ def num_left_to_propagate(sub_folder: str, init_z_num: int) -> int:
    IncompleteDataFolderError
        raised if init_z_num doesn't match that specified in the individual parameter file
    """
-    params = load_toml(os.path.join(sub_folder, "params.toml"))
+    params = load_toml(sub_folder / "params.toml")
    z_num = params["z_num"]
    num_spectra = find_last_spectrum_num(sub_folder) + 1  # because of zero-indexing
@@ -352,8 +350,9 @@ def num_left_to_propagate(sub_folder: str, init_z_num: int) -> int:
 def find_last_spectrum_num(data_dir: Path):
-    for num in itertools.count():
+    for num in itertools.count(1):
-        if not (data_dir / f"spectrum_{num}.npy").is_file():
+        p_to_test = data_dir / f"spectrum_{num}.npy"
        if not p_to_test.is_file() or len(p_to_test.read_bytes()) == 0:
            return num - 1
@@ -363,18 +362,6 @@ def load_last_spectrum(data_dir: Path) -> Tuple[int, np.ndarray]:
    return num, np.load(data_dir / f"spectrum_{num}.npy")
 def last_spectrum_path(path: Path) -> Path:
    num = find_last_spectrum_num(path)
    return path / f"spectrum_{num}.npy"
 def merge(paths: Union[str, List[str]], delete=False):
    if isinstance(paths, (str, Path)):
        paths = [paths]
    for path in paths:
        merge_same_simulations(path, delete=delete)
 def append_and_merge(final_sim_path: os.PathLike, new_name=None):
    final_sim_path = Path(final_sim_path).resolve()
    if new_name is None:
@@ -419,7 +406,7 @@ def append_and_merge(final_sim_path: os.PathLike, new_name=None):
    merge(destination_path, delete=True)
-def update_appended_params(param_path, new_path, z):
+def update_appended_params(param_path: Path, new_path: Path, z):
    z_num = len(z)
    params = load_toml(param_path)
    if "simulation" in params:
@@ -431,24 +418,24 @@ def update_appended_params(param_path, new_path, z):
    save_toml(new_path, params)
-def merge_same_simulations(path: str, delete=True):
+def merge(paths: Union[Path, List[Path]], delete=False):
    if isinstance(paths, Path):
        paths = [paths]
    for path in paths:
        merge_same_simulations(path, delete=delete)
 def merge_same_simulations(path: Path, delete=True):
    logger = get_logger(__name__)
    num_separator = PARAM_SEPARATOR + "num" + PARAM_SEPARATOR
-    sub_folders = get_data_subfolders(path)
+    sub_folders = [p for p in path.glob("*") if p.is_dir()]
-    config = load_toml(os.path.join(path, "initial_config.toml"))
+    config = load_toml(path / "initial_config.toml")
    repeat = config["simulation"].get("repeat", 1)
    max_repeat_id = repeat - 1
    z_num = config["simulation"]["z_num"]
    check_data_integrity(sub_folders, z_num)
    base_folders = set()
    for sub_folder in sub_folders:
        splitted_base_path = sub_folder.split(num_separator)[:-1]
        base_folder = num_separator.join(splitted_base_path)
        if len(base_folder) > 0:
            base_folders.add(base_folder)
    sim_num, param_num = utils.count_variations(config)
    pbar = utils.PBars(tqdm(total=sim_num * z_num, desc="Merging data", ncols=100))
@@ -461,51 +448,49 @@ def merge_same_simulations(path: str, delete=True):
            if repeat_id == 0:
                spectra = []
-            in_path = os.path.join(path, utils.format_variable_list(variable_and_ind))
+            in_path = path / utils.format_variable_list(variable_and_ind)
-            spectra.append(np.load(os.path.join(in_path, f"spectrum_{z_id}.npy")))
+            spectra.append(np.load(in_path / f"spectrum_{z_id}.npy"))
            pbar.update()
            # write new files only once all those from one parameter set are collected
            if repeat_id == max_repeat_id:
-                out_path = os.path.join(
+                out_path = path / (
-                    path,
+                    utils.format_variable_list(variable_and_ind[1:-1]) + PARAM_SEPARATOR + "merged"
                    utils.format_variable_list(variable_and_ind[1:-1]) + PARAM_SEPARATOR + "merged",
                )
                out_path = ensure_folder(out_path, prevent_overwrite=False)
                spectra = np.array(spectra).reshape(repeat, len(spectra[0]))
-                np.save(os.path.join(out_path, f"spectra_{z_id}.npy"), spectra.squeeze())
+                np.save(out_path / f"spectra_{z_id}.npy", spectra.squeeze())
                # copy other files only once
                if z_id == 0:
                    for file_name in ["z.npy", "params.toml"]:
-                        shutil.copy(
+                        shutil.copy(in_path / file_name, out_path)
                            os.path.join(in_path, file_name),
                            os.path.join(out_path, ""),
                        )
    pbar.close()
    if delete:
-        try:
+        for sub_folder in sub_folders:
-            for sub_folder in sub_folders:
+            try:
-                send2trash(sub_folder)
+                send2trash(str(sub_folder))
-        except TrashPermissionError:
+            except TrashPermissionError:
-            logger.warning(f"could not send send {len(base_folders)} folder(s) to trash")
+                logger.warning(f"could not send send {sub_folder} to trash")
-def get_data_folder(task_id: int, name_if_new: str = "data"):
+def get_data_folder(task_id: int, name_if_new: str = "data") -> Path:
    if name_if_new == "":
        name_if_new = "data"
    idstr = str(int(task_id))
    tmp = os.getenv(TMP_FOLDER_KEY_BASE + idstr)
    if tmp is None:
-        tmp = ensure_folder("scgenerator " + name_if_new)
+        tmp = ensure_folder(Path("scgenerator" + PARAM_SEPARATOR + name_if_new))
-        os.environ[TMP_FOLDER_KEY_BASE + idstr] = tmp
+        os.environ[TMP_FOLDER_KEY_BASE + idstr] = str(tmp)
-    elif not os.path.exists(tmp):
+    tmp = Path(tmp).resolve()
-        os.mkdir(tmp)
+    if not tmp.exists():
        tmp.mkdir()
    return tmp
-def set_data_folder(task_id: int, path: str):
+def set_data_folder(task_id: int, path: os.PathLike):
    """stores the path to an existing data folder in the environment
    Parameters
@@ -516,10 +501,10 @@ def set_data_folder(task_id: int, path: str):
        path to the root of the data folder
    """
    idstr = str(int(task_id))
-    os.environ[TMP_FOLDER_KEY_BASE + idstr] = path
+    os.environ[TMP_FOLDER_KEY_BASE + idstr] = str(path)
-def generate_file_path(file_name: str, task_id: int, identifier: str = "") -> str:
+def generate_file_path(file_name: str, task_id: int, identifier: str = "") -> Path:
    """generates a path for the desired file name
    Parameters
@@ -536,20 +521,8 @@ def generate_file_path(file_name: str, task_id: int, identifier: str = "") -> st
    str
        the full path
    """
-    # base_name, ext = os.path.splitext(file_name)
+    path = get_data_folder(task_id) / identifier / file_name
-    # folder = get_data_folder(task_id)
+    path.parent.mkdir(exist_ok=True)
    # folder = os.path.join(folder, identifier)
    # folder = ensure_folder(folder, prevent_overwrite=False)
    # i = 0
    # base_name = os.path.join(folder, base_name)
    # new_name = base_name + ext
    # while os.path.exists(new_name):
    #     new_name = f"{base_name}_{i}{ext}"
    #     i += 1
    path = os.path.join(get_data_folder(task_id), identifier)
    os.makedirs(path, exist_ok=True)
    path = os.path.join(path, file_name)
    return path
@@ -574,33 +547,40 @@ def save_data(data: np.ndarray, file_name: str, task_id: int, identifier: str =
    return
-def ensure_folder(name, i=0, suffix="", prevent_overwrite=True):
+def ensure_folder(path: Path, prevent_overwrite: bool = True) -> Path:
-    """creates a folder for simulation data named name and prevents overwrite
+    """ensure a folder exists and doesn't overwrite anything if required
    by adding a suffix if necessary and returning the name"""
    prefix, last_dir = os.path.split(name)
    exploded = [prefix]
    sub_prefix = prefix
    while not _end_of_path_tree(sub_prefix):
        sub_prefix, _ = os.path.split(sub_prefix)
        exploded.append(sub_prefix)
    if any(os.path.isfile(el) for el in exploded):
        prefix = ensure_folder(prefix)
        name = os.path.join(prefix, last_dir)
    folder_name = name
    if i > 0:
        folder_name += f"_{i}"
    folder_name += suffix
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    else:
        if prevent_overwrite:
            return ensure_folder(name, i + 1)
        else:
            return folder_name
    return folder_name
    Parameters
    ----------
    path : Path
        desired path
    prevent_overwrite : bool, optional
        whether to create a new directory when one already exists, by default True
-def _end_of_path_tree(path):
+    Returns
-    out = path == os.path.abspath(os.sep)
+    -------
-    out |= path == ""
+    Path
-    return out
+        final path
    """
    path = path.resolve()
    # is path root ?
    if len(path.parts) < 2:
        return path
    # is a part of path an existing *file* ?
    parts = path.parts
    path = Path(path.root)
    for part in parts:
        if path.is_file():
            path = ensure_folder(path, prevent_overwrite=False)
        path /= part
    folder_name = path.name
    for i in itertools.count():
        if not path.is_file() and (not prevent_overwrite or not path.is_dir()):
            path.mkdir(exist_ok=True)
            return path
        path = path.parent / (folder_name + f"_{i}")
--- a/src/scgenerator/physics/simulate.py
+++ b/src/scgenerator/physics/simulate.py
@@ -405,6 +405,26 @@ class Simulations:
    def is_available(cls) -> bool:
        return False
    @classmethod
    def new(
        cls, param_seq: initialize.ParamSequence, task_id, method: Type["Simulations"] = None
    ) -> "Simulations":
        """Prefered method to create a new simulations object
        Returns
        -------
        Simulations
            obj that uses the best available parallelization method
        """
        if method is not None:
            if isinstance(method, str):
                method = Simulations.simulation_methods_dict[method]
            return method(param_seq, task_id)
        elif param_seq.num_sim > 1 and param_seq["simulation", "parallel"] and using_ray:
            return Simulations.get_best_method()(param_seq, task_id)
        else:
            return SequencialSimulations(param_seq, task_id)
    def __init__(self, param_seq: initialize.ParamSequence, task_id=0):
        """
        Parameters
@@ -432,7 +452,7 @@ class Simulations:
    def finished_and_complete(self):
        try:
            io.check_data_integrity(
-                io.get_data_subfolders(self.data_folder), self.param_seq["simulation", "z_num"]
+                io.get_data_subfolders(self.id), self.param_seq["simulation", "z_num"]
            )
            return True
        except IncompleteDataFolderError:
@@ -450,10 +470,8 @@ class Simulations:
    def _run_available(self):
        for variable, params in self.param_seq:
-            io.save_parameters(
+            io.save_parameters(params, self.id, utils.format_variable_list(variable))
-                params,
+
                io.generate_file_path("params.toml", self.id, utils.format_variable_list(variable)),
            )
            self.new_sim(variable, params)
        self.finish()
@@ -576,29 +594,6 @@ class MultiProcSimulations(Simulations, priority=1):
            ).run()
            queue.task_done()
    # @staticmethod
    # def progress_worker(num_steps: int, progress_queue: multiprocessing.Queue):
    #     pbars: Dict[int, tqdm] = {}
    #     with tqdm(total=num_steps, desc="Simulating", unit="step", position=0) as tq:
    #         while True:
    #             raw = progress_queue.get()
    #             if raw == 0:
    #                 for pbar in pbars.values():
    #                     pbar.close()
    #                 return
    #             i, rel_pos = raw
    #             if i not in pbars:
    #                 pbars[i] = tqdm(
    #                     total=1,
    #                     desc=f"Worker {i}",
    #                     position=i,
    #                     bar_format="{l_bar}{bar}"
    #                     "|[{elapsed}<{remaining}, "
    #                     "{rate_fmt}{postfix}]",
    #                 )
    #             pbars[i].update(rel_pos - pbars[i].n)
    #             tq.update()
 class RaySimulations(Simulations, priority=2):
    """runs simulation with the help of the ray module. ray must be initialized before creating an instance of RaySimulations"""
@@ -716,9 +711,9 @@ def run_simulation_sequence(
    *config_files: os.PathLike,
    method=None,
    final_name: str = None,
-    prev_data_folder: os.PathLike = None,
+    prev_sim_dir: os.PathLike = None,
 ):
-    prev = prev_data_folder
+    prev = prev_sim_dir
    for config_file in config_files:
        sim = new_simulation(config_file, prev, method)
        sim.run()
@@ -728,46 +723,36 @@ def run_simulation_sequence(
 def new_simulation(
    config_file: os.PathLike,
-    prev_data_folder=None,
+    prev_sim_dir=None,
    method: Type[Simulations] = None,
 ) -> Simulations:
    config = io.load_toml(config_file)
    if prev_sim_dir is not None:
        config.setdefault("simulation", {})
        config["simulation"]["prev_sim_dir"] = str(prev_sim_dir)
    task_id = np.random.randint(1e9, 1e12)
-    if prev_data_folder is None:
+    if prev_sim_dir is None:
        param_seq = initialize.ParamSequence(config)
    else:
-        param_seq = initialize.ContinuationParamSequence(prev_data_folder, config)
+        param_seq = initialize.ContinuationParamSequence(prev_sim_dir, config)
    print(f"{param_seq.name=}")
-    return _new_simulations(param_seq, task_id, method)
+    return Simulations.new(param_seq, task_id, method)
-def resume_simulations(data_folder: str, method: Type[Simulations] = None) -> Simulations:
+def resume_simulations(sim_dir: str, method: Type[Simulations] = None) -> Simulations:
    task_id = np.random.randint(1e9, 1e12)
-    config = io.load_toml(os.path.join(data_folder, "initial_config.toml"))
+    config = io.load_toml(os.path.join(sim_dir, "initial_config.toml"))
-    io.set_data_folder(task_id, data_folder)
+    io.set_data_folder(task_id, sim_dir)
    param_seq = initialize.RecoveryParamSequence(config, task_id)
-    return _new_simulations(param_seq, task_id, method)
+    return Simulations.new(param_seq, task_id, method)
 def _new_simulations(
    param_seq: initialize.ParamSequence,
    task_id,
    method: Type[Simulations],
 ) -> Simulations:
    if method is not None:
        if isinstance(method, str):
            method = Simulations.simulation_methods_dict[method]
        return method(param_seq, task_id)
    elif param_seq.num_sim > 1 and param_seq["simulation", "parallel"] and using_ray:
        return Simulations.get_best_method()(param_seq, task_id)
    else:
        return SequencialSimulations(param_seq, task_id)
 if __name__ == "__main__":
--- a/src/scgenerator/spectra.py
+++ b/src/scgenerator/spectra.py
@@ -2,6 +2,7 @@ import os
 from collections.abc import Mapping, Sequence
 from glob import glob
 from typing import Any, Dict, List, Tuple
 from pathlib import Path
 import numpy as np
@@ -30,17 +31,17 @@ class Spectrum(np.ndarray):
 class Pulse(Sequence):
-    def __init__(self, path: str, ensure_2d=True):
+    def __init__(self, path: os.PathLike, ensure_2d=True):
        self.logger = get_logger(__name__)
-        self.path = str(path)
+        self.path = Path(path)
        self.__ensure_2d = ensure_2d
-        if not os.path.isdir(self.path):
+        if not self.path.is_dir():
            raise FileNotFoundError(f"Folder {self.path} does not exist")
        self.params = None
        try:
-            self.params = io.load_previous_parameters(os.path.join(self.path, "params.toml"))
+            self.params = io.load_previous_parameters(self.path / "params.toml")
        except FileNotFoundError:
            self.logger.info(f"parameters corresponding to {self.path} not found")
@@ -52,7 +53,7 @@ class Pulse(Sequence):
            else:
                raise
        self.cache: Dict[int, Spectrum] = {}
-        self.nmax = len(glob(os.path.join(self.path, "spectra_*.npy")))
+        self.nmax = len(list(self.path.glob("spectra_*.npy")))
        if self.nmax <= 0:
            raise FileNotFoundError(f"No appropriate file in specified folder {self.path}")
@@ -77,7 +78,7 @@ class Pulse(Sequence):
        return self.nmax
    def __getitem__(self, key):
-        return self.all_spectra(ind=range(self.nmax)[key])
+        return self.all_spectra(ind=range(self.nmax)[key]).squeeze()
    def intensity(self, unit):
        if unit.type in ["WL", "FREQ", "AFREQ"]:
@@ -187,6 +188,8 @@ class Pulse(Sequence):
        return np.fft.ifft(self.all_spectra(ind=ind), axis=-1)
    def _load1(self, i: int):
        if i < 0:
            i = self.nmax + i
        if i in self.cache:
            return self.cache[i]
        spec = io.load_single_spectrum(self.path, i)
@@ -195,19 +198,3 @@ class Pulse(Sequence):
        spec = Spectrum(spec, self.wl, self.params["frep"])
        self.cache[i] = spec
        return spec
 class SpectraCollection(Mapping, Sequence):
    def __init__(self, path: str):
        self.path = path
        self.collection: List[Spectra] = []
        if not os.path.isdir(self.path):
            raise FileNotFoundError(f"Folder {self.path} does not exist")
        self.variable_list
    def __getitem__(self, key):
        return self.collection[key]
    def __len__(self):
        pass