recovery hot fixes

This commit is contained in:
Benoît Sierro
2021-05-31 13:54:00 +02:00
parent 656815015a
commit f0b20b90c7
7 changed files with 305 additions and 316 deletions

View File

@@ -32,25 +32,16 @@ def create_parser():
action="store_true", action="store_true",
help="force not to use ray", help="force not to use ray",
) )
parser.add_argument("--output-name", "-o", help="path to the final output folder", default=None)
run_parser = subparsers.add_parser("run", help="run a simulation from a config file") run_parser = subparsers.add_parser("run", help="run a simulation from a config file")
run_parser.add_argument("configs", help="path(s) to the toml configuration file(s)", nargs="+") run_parser.add_argument("configs", help="path(s) to the toml configuration file(s)", nargs="+")
run_parser.add_argument(
"--append-to",
"-a",
help="optional directory where a compatible simulation has already been ran",
default=None,
)
run_parser.add_argument(
"--output-name", "-o", help="path to the final output folder", default=None
)
run_parser.set_defaults(func=run_sim) run_parser.set_defaults(func=run_sim)
resume_parser = subparsers.add_parser("resume", help="resume a simulation") resume_parser = subparsers.add_parser("resume", help="resume a simulation")
resume_parser.add_argument( resume_parser.add_argument(
"data_dir", "sim_dir",
help="path to the directory where the initial_config.toml and the data is stored", help="path to the directory where the initial_config.toml and the partial data is stored",
) )
resume_parser.add_argument( resume_parser.add_argument(
"configs", "configs",
@@ -64,9 +55,6 @@ def create_parser():
merge_parser.add_argument( merge_parser.add_argument(
"path", help="path to the final simulation folder containing 'initial_config.toml'" "path", help="path to the final simulation folder containing 'initial_config.toml'"
) )
merge_parser.add_argument(
"--output-name", "-o", help="path to the final output folder", default=None
)
merge_parser.set_defaults(func=merge) merge_parser.set_defaults(func=merge)
return parser return parser
@@ -107,9 +95,11 @@ def prep_ray(args):
def resume_sim(args): def resume_sim(args):
method = prep_ray(args) method = prep_ray(args)
sim = resume_simulations(args.data_dir, method=method) sim = resume_simulations(args.sim_dir, method=method)
sim.run() sim.run()
run_simulation_sequence(*args.configs, method=method, prev_data_folder=sim.data_folder) run_simulation_sequence(
*args.configs, method=method, prev_sim_dir=sim.data_folder, final_name=args.output_name
)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,4 +1,5 @@
import numpy as np import numpy as np
from numpy.lib.arraysetops import isin
def pbar_format(worker_id: int): def pbar_format(worker_id: int):
@@ -98,11 +99,20 @@ def fit_parameters(param):
return True return True
def string(l): def string(l=None):
def _string(s): if l is None:
return isinstance(s, str) and s.lower() in l
def _string(s):
return isinstance(s, str)
_string.__doc__ = f"must be a str"
else:
def _string(s):
return isinstance(s, str) and s.lower() in l
_string.__doc__ = f"must be a str matching one of {l}"
_string.__doc__ = f"must be a str matching one of {l}"
return _string return _string
@@ -123,8 +133,8 @@ def capillary_nested(n):
valid_param_types = dict( valid_param_types = dict(
root=dict( root=dict(
name=lambda s: isinstance(s, str), name=string(),
prev_data_dir=lambda s: isinstance(s, str), prev_data_dir=string(),
), ),
fiber=dict( fiber=dict(
input_transmission=in_range_incl(num, (0, 1)), input_transmission=in_range_incl(num, (0, 1)),
@@ -138,7 +148,7 @@ valid_param_types = dict(
he_mode=he_mode, he_mode=he_mode,
fit_parameters=fit_parameters, fit_parameters=fit_parameters,
beta=beta, beta=beta,
dispersion_file=lambda s: isinstance(s, str), dispersion_file=string(),
model=string(["pcf", "marcatili", "marcatili_adjusted", "hasan", "custom"]), model=string(["pcf", "marcatili", "marcatili_adjusted", "hasan", "custom"]),
length=in_range_excl(num, (0, 1e9)), length=in_range_excl(num, (0, 1e9)),
capillary_num=integer, capillary_num=integer,
@@ -156,7 +166,7 @@ valid_param_types = dict(
), ),
pulse=dict( pulse=dict(
field_0=field_0, field_0=field_0,
field_file=lambda s: isinstance(s, str), field_file=string(),
repetition_rate=num, repetition_rate=num,
peak_power=num, peak_power=num,
mean_power=num, mean_power=num,
@@ -184,6 +194,7 @@ valid_param_types = dict(
lower_wavelength_interp_limit=in_range_excl(num, (100e-9, 3000e-9)), lower_wavelength_interp_limit=in_range_excl(num, (100e-9, 3000e-9)),
upper_wavelength_interp_limit=in_range_excl(num, (100e-9, 5000e-9)), upper_wavelength_interp_limit=in_range_excl(num, (100e-9, 5000e-9)),
frep=num, frep=num,
prev_sim_dir=string(),
), ),
) )

View File

@@ -32,7 +32,7 @@ class DuplicateParameterError(Exception):
pass pass
class IncompleteDataFolderError(Exception): class IncompleteDataFolderError(FileNotFoundError):
pass pass

View File

@@ -46,24 +46,26 @@ class ParamSequence(Mapping):
class ContinuationParamSequence(ParamSequence): class ContinuationParamSequence(ParamSequence):
def __init__(self, prev_data_folder: str, new_config: Dict[str, Any]): def __init__(self, prev_sim_dir: str, new_config: Dict[str, Any]):
"""Parameter sequence that builds on a previous simulation but with a new configuration """Parameter sequence that builds on a previous simulation but with a new configuration
It is recommended that only the fiber and the number of points stored may be changed and It is recommended that only the fiber and the number of points stored may be changed and
changing other parameters could results in unexpected behaviors. The new config doesn't have to changing other parameters could results in unexpected behaviors. The new config doesn't have to
be a full configuration (specify only the new parameters). be a full configuration (i.e. you can specify only the parameters that change).
Parameters Parameters
---------- ----------
prev_data_folder : str prev_sim_dir : str
path to the folder of the previous simulation containing 'initial_config.toml' path to the folder of the previous simulation containing 'initial_config.toml'
new_config : Dict[str, Any] new_config : Dict[str, Any]
new config new config
""" """
self.path = Path(prev_data_folder) self.prev_sim_dir = Path(prev_sim_dir)
init_config = io.load_previous_parameters(os.path.join(self.path, "initial_config.toml")) init_config = io.load_previous_parameters(
os.path.join(self.prev_sim_dir, "initial_config.toml")
)
self.prev_variable_lists = [ self.prev_variable_lists = [
(set(variable_list[1:]), self.path / utils.format_variable_list(variable_list)) (set(variable_list[1:]), self.prev_sim_dir / utils.format_variable_list(variable_list))
for variable_list, _ in required_simulations(init_config) for variable_list, _ in required_simulations(init_config)
] ]
@@ -74,11 +76,11 @@ class ContinuationParamSequence(ParamSequence):
"""iterates through all possible parameters, yielding a config as well as a flattened """iterates through all possible parameters, yielding a config as well as a flattened
computed parameters set each time""" computed parameters set each time"""
for variable_list, full_config in required_simulations(self.config): for variable_list, full_config in required_simulations(self.config):
prev_sim_folder = self.find_prev_data_folder(variable_list) prev_data_dir = self.find_prev_data_dir(variable_list)
full_config["prev_data_dir"] = str(prev_sim_folder.resolve()) full_config["prev_data_dir"] = str(prev_data_dir.resolve())
yield variable_list, compute_subsequent_paramters(prev_sim_folder, full_config) yield variable_list, compute_init_parameters(full_config)
def find_prev_data_folder(self, new_variable_list: List[Tuple[str, Any]]) -> Path: def find_prev_data_dir(self, new_variable_list: List[Tuple[str, Any]]) -> Path:
"""finds the previous simulation data that this new config should start from """finds the previous simulation data that this new config should start from
Parameters Parameters
@@ -102,7 +104,7 @@ class ContinuationParamSequence(ParamSequence):
return path return path
raise ValueError( raise ValueError(
f"cannot find a previous data folder for {new_variable_list} in {self.path}" f"cannot find a previous data folder for {new_variable_list} in {self.prev_sim_dir}"
) )
@@ -114,7 +116,7 @@ class RecoveryParamSequence(ParamSequence):
z_num = config["simulation"]["z_num"] z_num = config["simulation"]["z_num"]
started = self.num_sim started = self.num_sim
sub_folders = io.get_data_subfolders(io.get_data_folder(self.id)) sub_folders = io.get_data_subfolders(self.id)
pbar_store = utils.PBars( pbar_store = utils.PBars(
tqdm( tqdm(
@@ -138,20 +140,63 @@ class RecoveryParamSequence(ParamSequence):
self.num_steps += started * z_num self.num_steps += started * z_num
self.single_sim = self.num_sim == 1 self.single_sim = self.num_sim == 1
def __iter__(self) -> Iterator[Tuple[List[Tuple[str, Any]], dict]]: self.prev_sim_dir = None
for variable_list, full_config in required_simulations(self.config): if "prev_sim_dir" in self.config.get("simulation", {}):
self.prev_sim_dir = Path(self.config["simulation"]["prev_sim_dir"])
data_dir = os.path.join( init_config = io.load_previous_parameters(
io.get_data_folder(self.id), utils.format_variable_list(variable_list) os.path.join(self.prev_sim_dir, "initial_config.toml")
) )
self.prev_variable_lists = [
(
set(variable_list[1:]),
self.prev_sim_dir / utils.format_variable_list(variable_list),
)
for variable_list, _ in required_simulations(init_config)
]
if not io.propagation_initiated(data_dir): def __iter__(self) -> Iterator[Tuple[List[Tuple[str, Any]], dict]]:
yield variable_list, compute_init_parameters(full_config) for variable_list, params in required_simulations(self.config):
data_dir = io.get_data_folder(self.id) / utils.format_variable_list(variable_list)
if not data_dir.is_dir() or io.find_last_spectrum_num(data_dir) == 0:
if (prev_data_dir := self.find_prev_data_dir(variable_list)) is not None:
params["prev_data_dir"] = str(prev_data_dir)
yield variable_list, compute_init_parameters(params)
elif io.num_left_to_propagate(data_dir, self.config["simulation"]["z_num"]) != 0: elif io.num_left_to_propagate(data_dir, self.config["simulation"]["z_num"]) != 0:
yield variable_list, recover_params(full_config, data_dir) yield variable_list, recover_params(params, data_dir)
else: else:
continue continue
def find_prev_data_dir(self, new_variable_list: List[Tuple[str, Any]]) -> Path:
"""finds the previous simulation data that this new config should start from
Parameters
----------
new_variable_list : List[Tuple[str, Any]]
as yielded by required_simulations
Returns
-------
Path
path to the data folder
Raises
------
ValueError
no data folder found
"""
if self.prev_sim_dir is None:
return None
to_test = set(new_variable_list[1:])
for old_v_list, path in self.prev_variable_lists:
if to_test.issuperset(old_v_list):
return path
raise ValueError(
f"cannot find a previous data folder for {new_variable_list} in {self.prev_sim_dir}"
)
def validate(config: dict) -> dict: def validate(config: dict) -> dict:
"""validates a configuration dictionary and attempts to fill in defaults """validates a configuration dictionary and attempts to fill in defaults
@@ -517,20 +562,19 @@ def _ensure_consistency(config):
return config return config
def recover_params(config: Dict[str, Any], data_folder: os.PathLike) -> Dict[str, Any]: def recover_params(config: Dict[str, Any], data_folder: Path) -> Dict[str, Any]:
path = Path(data_folder)
params = compute_init_parameters(config) params = compute_init_parameters(config)
try: try:
prev_params = io.load_toml(path / "params.toml") prev_params = io.load_toml(data_folder / "params.toml")
except FileNotFoundError: except FileNotFoundError:
prev_params = {} prev_params = {}
for k, v in prev_params.items(): for k, v in prev_params.items():
params.setdefault(k, v) params.setdefault(k, v)
num, last_spectrum = io.load_last_spectrum(str(path)) num, last_spectrum = io.load_last_spectrum(data_folder)
params["spec_0"] = last_spectrum params["spec_0"] = last_spectrum
params["field_0"] = np.fft.ifft(last_spectrum) params["field_0"] = np.fft.ifft(last_spectrum)
params["recovery_last_stored"] = num params["recovery_last_stored"] = num
params["cons_qty"] = np.load(os.path.join(data_folder, "cons_qty.npy")) params["cons_qty"] = np.load(data_folder / "cons_qty.npy")
return params return params
@@ -561,26 +605,7 @@ def compute_init_parameters(config: Dict[str, Any]) -> Dict[str, Any]:
params = _generate_sim_grid(params) params = _generate_sim_grid(params)
# Initial field may influence the grid # Initial field may influence the grid
custom_field = False custom_field = setup_custom_field(params)
if "field_file" in params:
custom_field = True
field_data = np.load(params["field_file"])
field_interp = interp1d(
field_data["time"], field_data["field"], bounds_error=False, fill_value=(0, 0)
)
params["field_0"] = field_interp(params["t"])
params = _comform_custom_field(params)
elif "field_0" in params:
custom_field = True
params = _evalutate_custom_field_equation(params)
params = _comform_custom_field(params)
# central wavelength may be off with custom fields
if custom_field:
delta_w = params["w_c"][np.argmax(abs2(np.fft.fft(params["field_0"])))]
logger.debug(f"had to adjust w by {delta_w}")
params["wavelength"] = units.m.inv(units.m(params["wavelength"]) - delta_w)
_update_frequency_domain(params)
if "step_size" in params: if "step_size" in params:
params["error_ok"] = params["step_size"] params["error_ok"] = params["step_size"]
@@ -650,28 +675,39 @@ def compute_init_parameters(config: Dict[str, Any]) -> Dict[str, Any]:
return params return params
def compute_subsequent_paramters(sim_folder: str, config: Dict[str, Any]) -> Dict[str, Any]: def setup_custom_field(params: Dict[str, Any]) -> bool:
params = compute_init_parameters(config) logger = get_logger(__name__)
spec = io.load_last_spectrum(sim_folder)[1] custom_field = True
params["field_0"] = np.fft.ifft(spec) * params["input_transmission"] if "prev_data_dir" in params:
params["spec_0"] = np.fft.fft(params["field_0"]) spec = io.load_last_spectrum(Path(params["prev_data_dir"]))[1]
params["field_0"] = np.fft.ifft(spec) * params["input_transmission"]
elif "field_file" in params:
field_data = np.load(params["field_file"])
field_interp = interp1d(
field_data["time"], field_data["field"], bounds_error=False, fill_value=(0, 0)
)
params["field_0"] = field_interp(params["t"])
elif "field_0" in params:
params = _evalutate_custom_field_equation(params)
else:
custom_field = False
return params if custom_field:
params["field_0"] = params["field_0"] * pulse.modify_field_ratio(
params["t"],
def _comform_custom_field(params): params["field_0"],
params["field_0"] = params["field_0"] * pulse.modify_field_ratio( params.get("peak_power"),
params["t"], params.get("energy"),
params["field_0"], params.get("intensity_noise"),
params.get("peak_power"), )
params.get("energy"), params["width"], params["peak_power"], params["energy"] = pulse.measure_field(
params.get("intensity_noise"), params["t"], params["field_0"]
) )
params["width"], params["peak_power"], params["energy"] = pulse.measure_field( delta_w = params["w_c"][np.argmax(abs2(np.fft.fft(params["field_0"])))]
params["t"], params["field_0"] logger.debug(f"had to adjust w by {delta_w}")
) params["wavelength"] = units.m.inv(units.m(params["wavelength"]) - delta_w)
_update_frequency_domain(params)
return params return custom_field
def _update_pulse_parameters(params): def _update_pulse_parameters(params):

View File

@@ -126,21 +126,24 @@ class DataBuffer:
# return os.path.normpath(p) # return os.path.normpath(p)
def conform_toml_path(path: os.PathLike) -> Path:
path = Path(path)
if not path.name.lower().endswith(".toml"):
path = path.parent / (path.name + ".toml")
return path
def load_toml(path: os.PathLike): def load_toml(path: os.PathLike):
"""returns a dictionary parsed from the specified toml file""" """returns a dictionary parsed from the specified toml file"""
path = str(path) path = conform_toml_path(path)
if not path.lower().endswith(".toml"):
path += ".toml"
with open(path, mode="r") as file: with open(path, mode="r") as file:
dico = toml.load(file) dico = toml.load(file)
return dico return dico
def save_toml(path, dico): def save_toml(path: os.PathLike, dico):
"""saves a dictionary into a toml file""" """saves a dictionary into a toml file"""
path = str(path) path = conform_toml_path(path)
if not path.lower().endswith(".toml"):
path += ".toml"
with open(path, mode="w") as file: with open(path, mode="w") as file:
toml.dump(dico, file) toml.dump(dico, file)
return dico return dico
@@ -156,7 +159,7 @@ def serializable(val):
return out return out
def _prepare_for_serialization(dico): def _prepare_for_serialization(dico: Dict[str, Any]):
"""prepares a dictionary for serialization. Some keys may not be preserved """prepares a dictionary for serialization. Some keys may not be preserved
(dropped due to no conversion available) (dropped due to no conversion available)
@@ -183,40 +186,56 @@ def _prepare_for_serialization(dico):
return out return out
def save_parameters(param_dict, file_name="param"): def save_parameters(param_dict: Dict[str, Any], task_id: int, data_dir_name: str):
"""Writes the flattened parameters dictionary specific to a single simulation into a toml file
Parameters
----------
param_dict : dictionary of parameters. Only floats, int and arrays of
non complex values are stored in the json
folder_name : folder where to save the files (relative to cwd)
file_name : name of the readable file.
"""
param = param_dict.copy() param = param_dict.copy()
file_path = generate_file_path("params.toml", task_id, data_dir_name)
folder_name, file_name = os.path.split(file_name)
folder_name = "tmp" if folder_name == "" else folder_name
file_name = os.path.splitext(file_name)[0]
if not os.path.exists(folder_name):
os.makedirs(folder_name)
param = _prepare_for_serialization(param) param = _prepare_for_serialization(param)
param["datetime"] = datetime.now() param["datetime"] = datetime.now()
file_path.parent.mkdir(exist_ok=True)
# save toml of the simulation # save toml of the simulation
with open(os.path.join(folder_name, file_name + ".toml"), "w") as file: with open(file_path, "w") as file:
toml.dump(param, file, encoder=toml.TomlNumpyEncoder()) toml.dump(param, file, encoder=toml.TomlNumpyEncoder())
return os.path.join(folder_name, file_name) return file_path
def load_previous_parameters(path: str): # def save_parameters_old(param_dict, file_name="param"):
# """Writes the flattened parameters dictionary specific to a single simulation into a toml file
# Parameters
# ----------
# param_dict : dictionary of parameters. Only floats, int and arrays of
# non complex values are stored in the json
# folder_name : folder where to save the files (relative to cwd)
# file_name : name of the readable file.
# """
# param = param_dict.copy()
# folder_name, file_name = os.path.split(file_name)
# folder_name = "tmp" if folder_name == "" else folder_name
# file_name = os.path.splitext(file_name)[0]
# if not os.path.exists(folder_name):
# os.makedirs(folder_name)
# param = _prepare_for_serialization(param)
# param["datetime"] = datetime.now()
# # save toml of the simulation
# with open(os.path.join(folder_name, file_name + ".toml"), "w") as file:
# toml.dump(param, file, encoder=toml.TomlNumpyEncoder())
# return os.path.join(folder_name, file_name)
def load_previous_parameters(path: os.PathLike):
"""loads a parameters toml files and converts data to appropriate type """loads a parameters toml files and converts data to appropriate type
Parameters Parameters
---------- ----------
path : str path : PathLike
path to the toml path to the toml
Returns Returns
@@ -248,31 +267,17 @@ def load_material_dico(name):
return toml.loads(Paths.gets("gas"))[name] return toml.loads(Paths.gets("gas"))[name]
# def set_environ(config: dict):
# """sets environment variables specified in the config
# Parameters
# ----------
# config : dict
# whole simulation config file
# """
# environ = config.get("environment", {})
# for k, v in environ.get("path_prefixes", {}).items():
# os.environ[(PREFIX_KEY_BASE + k).upper()] = v
def get_all_environ() -> Dict[str, str]: def get_all_environ() -> Dict[str, str]:
"""returns a dictionary of all environment variables set by any instance of scgenerator""" """returns a dictionary of all environment variables set by any instance of scgenerator"""
d = dict(filter(lambda el: el[0].startswith(ENVIRON_KEY_BASE), os.environ.items())) d = dict(filter(lambda el: el[0].startswith(ENVIRON_KEY_BASE), os.environ.items()))
print(d)
return d return d
def load_single_spectrum(folder, index) -> np.ndarray: def load_single_spectrum(folder: Path, index) -> np.ndarray:
return np.load(os.path.join(folder, f"spectra_{index}.npy")) return np.load(folder / f"spectra_{index}.npy")
def get_data_subfolders(path: str) -> List[str]: def get_data_subfolders(task_id: int) -> List[Path]:
"""returns a list of relative path/subfolders in the specified directory """returns a list of relative path/subfolders in the specified directory
Parameters Parameters
@@ -285,12 +290,11 @@ def get_data_subfolders(path: str) -> List[str]:
List[str] List[str]
paths to sub folders paths to sub folders
""" """
sub_folders = glob(os.path.join(path, "*"))
sub_folders = list(filter(os.path.isdir, sub_folders)) return [p.resolve() for p in get_data_folder(task_id).glob("*") if p.is_dir()]
return sub_folders
def check_data_integrity(sub_folders: List[str], init_z_num: int): def check_data_integrity(sub_folders: List[Path], init_z_num: int):
"""checks the integrity and completeness of a simulation data folder """checks the integrity and completeness of a simulation data folder
Parameters Parameters
@@ -312,18 +316,12 @@ def check_data_integrity(sub_folders: List[str], init_z_num: int):
) )
def propagation_initiated(sub_folder) -> bool: def num_left_to_propagate(sub_folder: Path, init_z_num: int) -> int:
if os.path.isdir(sub_folder):
return find_last_spectrum_num(sub_folder) > 0
return False
def num_left_to_propagate(sub_folder: str, init_z_num: int) -> int:
"""checks if a propagation has completed """checks if a propagation has completed
Parameters Parameters
---------- ----------
sub_folder : str sub_folder : Path
path to the sub folder containing the spectra path to the sub folder containing the spectra
init_z_num : int init_z_num : int
number of z position to store as specified in the master config file number of z position to store as specified in the master config file
@@ -338,7 +336,7 @@ def num_left_to_propagate(sub_folder: str, init_z_num: int) -> int:
IncompleteDataFolderError IncompleteDataFolderError
raised if init_z_num doesn't match that specified in the individual parameter file raised if init_z_num doesn't match that specified in the individual parameter file
""" """
params = load_toml(os.path.join(sub_folder, "params.toml")) params = load_toml(sub_folder / "params.toml")
z_num = params["z_num"] z_num = params["z_num"]
num_spectra = find_last_spectrum_num(sub_folder) + 1 # because of zero-indexing num_spectra = find_last_spectrum_num(sub_folder) + 1 # because of zero-indexing
@@ -352,8 +350,9 @@ def num_left_to_propagate(sub_folder: str, init_z_num: int) -> int:
def find_last_spectrum_num(data_dir: Path): def find_last_spectrum_num(data_dir: Path):
for num in itertools.count(): for num in itertools.count(1):
if not (data_dir / f"spectrum_{num}.npy").is_file(): p_to_test = data_dir / f"spectrum_{num}.npy"
if not p_to_test.is_file() or len(p_to_test.read_bytes()) == 0:
return num - 1 return num - 1
@@ -363,18 +362,6 @@ def load_last_spectrum(data_dir: Path) -> Tuple[int, np.ndarray]:
return num, np.load(data_dir / f"spectrum_{num}.npy") return num, np.load(data_dir / f"spectrum_{num}.npy")
def last_spectrum_path(path: Path) -> Path:
num = find_last_spectrum_num(path)
return path / f"spectrum_{num}.npy"
def merge(paths: Union[str, List[str]], delete=False):
if isinstance(paths, (str, Path)):
paths = [paths]
for path in paths:
merge_same_simulations(path, delete=delete)
def append_and_merge(final_sim_path: os.PathLike, new_name=None): def append_and_merge(final_sim_path: os.PathLike, new_name=None):
final_sim_path = Path(final_sim_path).resolve() final_sim_path = Path(final_sim_path).resolve()
if new_name is None: if new_name is None:
@@ -419,7 +406,7 @@ def append_and_merge(final_sim_path: os.PathLike, new_name=None):
merge(destination_path, delete=True) merge(destination_path, delete=True)
def update_appended_params(param_path, new_path, z): def update_appended_params(param_path: Path, new_path: Path, z):
z_num = len(z) z_num = len(z)
params = load_toml(param_path) params = load_toml(param_path)
if "simulation" in params: if "simulation" in params:
@@ -431,24 +418,24 @@ def update_appended_params(param_path, new_path, z):
save_toml(new_path, params) save_toml(new_path, params)
def merge_same_simulations(path: str, delete=True): def merge(paths: Union[Path, List[Path]], delete=False):
if isinstance(paths, Path):
paths = [paths]
for path in paths:
merge_same_simulations(path, delete=delete)
def merge_same_simulations(path: Path, delete=True):
logger = get_logger(__name__) logger = get_logger(__name__)
num_separator = PARAM_SEPARATOR + "num" + PARAM_SEPARATOR num_separator = PARAM_SEPARATOR + "num" + PARAM_SEPARATOR
sub_folders = get_data_subfolders(path) sub_folders = [p for p in path.glob("*") if p.is_dir()]
config = load_toml(os.path.join(path, "initial_config.toml")) config = load_toml(path / "initial_config.toml")
repeat = config["simulation"].get("repeat", 1) repeat = config["simulation"].get("repeat", 1)
max_repeat_id = repeat - 1 max_repeat_id = repeat - 1
z_num = config["simulation"]["z_num"] z_num = config["simulation"]["z_num"]
check_data_integrity(sub_folders, z_num) check_data_integrity(sub_folders, z_num)
base_folders = set()
for sub_folder in sub_folders:
splitted_base_path = sub_folder.split(num_separator)[:-1]
base_folder = num_separator.join(splitted_base_path)
if len(base_folder) > 0:
base_folders.add(base_folder)
sim_num, param_num = utils.count_variations(config) sim_num, param_num = utils.count_variations(config)
pbar = utils.PBars(tqdm(total=sim_num * z_num, desc="Merging data", ncols=100)) pbar = utils.PBars(tqdm(total=sim_num * z_num, desc="Merging data", ncols=100))
@@ -461,51 +448,49 @@ def merge_same_simulations(path: str, delete=True):
if repeat_id == 0: if repeat_id == 0:
spectra = [] spectra = []
in_path = os.path.join(path, utils.format_variable_list(variable_and_ind)) in_path = path / utils.format_variable_list(variable_and_ind)
spectra.append(np.load(os.path.join(in_path, f"spectrum_{z_id}.npy"))) spectra.append(np.load(in_path / f"spectrum_{z_id}.npy"))
pbar.update() pbar.update()
# write new files only once all those from one parameter set are collected # write new files only once all those from one parameter set are collected
if repeat_id == max_repeat_id: if repeat_id == max_repeat_id:
out_path = os.path.join( out_path = path / (
path, utils.format_variable_list(variable_and_ind[1:-1]) + PARAM_SEPARATOR + "merged"
utils.format_variable_list(variable_and_ind[1:-1]) + PARAM_SEPARATOR + "merged",
) )
out_path = ensure_folder(out_path, prevent_overwrite=False) out_path = ensure_folder(out_path, prevent_overwrite=False)
spectra = np.array(spectra).reshape(repeat, len(spectra[0])) spectra = np.array(spectra).reshape(repeat, len(spectra[0]))
np.save(os.path.join(out_path, f"spectra_{z_id}.npy"), spectra.squeeze()) np.save(out_path / f"spectra_{z_id}.npy", spectra.squeeze())
# copy other files only once # copy other files only once
if z_id == 0: if z_id == 0:
for file_name in ["z.npy", "params.toml"]: for file_name in ["z.npy", "params.toml"]:
shutil.copy( shutil.copy(in_path / file_name, out_path)
os.path.join(in_path, file_name),
os.path.join(out_path, ""),
)
pbar.close() pbar.close()
if delete: if delete:
try: for sub_folder in sub_folders:
for sub_folder in sub_folders: try:
send2trash(sub_folder) send2trash(str(sub_folder))
except TrashPermissionError: except TrashPermissionError:
logger.warning(f"could not send send {len(base_folders)} folder(s) to trash") logger.warning(f"could not send send {sub_folder} to trash")
def get_data_folder(task_id: int, name_if_new: str = "data"): def get_data_folder(task_id: int, name_if_new: str = "data") -> Path:
if name_if_new == "": if name_if_new == "":
name_if_new = "data" name_if_new = "data"
idstr = str(int(task_id)) idstr = str(int(task_id))
tmp = os.getenv(TMP_FOLDER_KEY_BASE + idstr) tmp = os.getenv(TMP_FOLDER_KEY_BASE + idstr)
if tmp is None: if tmp is None:
tmp = ensure_folder("scgenerator " + name_if_new) tmp = ensure_folder(Path("scgenerator" + PARAM_SEPARATOR + name_if_new))
os.environ[TMP_FOLDER_KEY_BASE + idstr] = tmp os.environ[TMP_FOLDER_KEY_BASE + idstr] = str(tmp)
elif not os.path.exists(tmp): tmp = Path(tmp).resolve()
os.mkdir(tmp) if not tmp.exists():
tmp.mkdir()
return tmp return tmp
def set_data_folder(task_id: int, path: str): def set_data_folder(task_id: int, path: os.PathLike):
"""stores the path to an existing data folder in the environment """stores the path to an existing data folder in the environment
Parameters Parameters
@@ -516,10 +501,10 @@ def set_data_folder(task_id: int, path: str):
path to the root of the data folder path to the root of the data folder
""" """
idstr = str(int(task_id)) idstr = str(int(task_id))
os.environ[TMP_FOLDER_KEY_BASE + idstr] = path os.environ[TMP_FOLDER_KEY_BASE + idstr] = str(path)
def generate_file_path(file_name: str, task_id: int, identifier: str = "") -> str: def generate_file_path(file_name: str, task_id: int, identifier: str = "") -> Path:
"""generates a path for the desired file name """generates a path for the desired file name
Parameters Parameters
@@ -536,20 +521,8 @@ def generate_file_path(file_name: str, task_id: int, identifier: str = "") -> st
str str
the full path the full path
""" """
# base_name, ext = os.path.splitext(file_name) path = get_data_folder(task_id) / identifier / file_name
# folder = get_data_folder(task_id) path.parent.mkdir(exist_ok=True)
# folder = os.path.join(folder, identifier)
# folder = ensure_folder(folder, prevent_overwrite=False)
# i = 0
# base_name = os.path.join(folder, base_name)
# new_name = base_name + ext
# while os.path.exists(new_name):
# new_name = f"{base_name}_{i}{ext}"
# i += 1
path = os.path.join(get_data_folder(task_id), identifier)
os.makedirs(path, exist_ok=True)
path = os.path.join(path, file_name)
return path return path
@@ -574,33 +547,40 @@ def save_data(data: np.ndarray, file_name: str, task_id: int, identifier: str =
return return
def ensure_folder(name, i=0, suffix="", prevent_overwrite=True): def ensure_folder(path: Path, prevent_overwrite: bool = True) -> Path:
"""creates a folder for simulation data named name and prevents overwrite """ensure a folder exists and doesn't overwrite anything if required
by adding a suffix if necessary and returning the name"""
prefix, last_dir = os.path.split(name)
exploded = [prefix]
sub_prefix = prefix
while not _end_of_path_tree(sub_prefix):
sub_prefix, _ = os.path.split(sub_prefix)
exploded.append(sub_prefix)
if any(os.path.isfile(el) for el in exploded):
prefix = ensure_folder(prefix)
name = os.path.join(prefix, last_dir)
folder_name = name
if i > 0:
folder_name += f"_{i}"
folder_name += suffix
if not os.path.exists(folder_name):
os.makedirs(folder_name)
else:
if prevent_overwrite:
return ensure_folder(name, i + 1)
else:
return folder_name
return folder_name
Parameters
----------
path : Path
desired path
prevent_overwrite : bool, optional
whether to create a new directory when one already exists, by default True
def _end_of_path_tree(path): Returns
out = path == os.path.abspath(os.sep) -------
out |= path == "" Path
return out final path
"""
path = path.resolve()
# is path root ?
if len(path.parts) < 2:
return path
# is a part of path an existing *file* ?
parts = path.parts
path = Path(path.root)
for part in parts:
if path.is_file():
path = ensure_folder(path, prevent_overwrite=False)
path /= part
folder_name = path.name
for i in itertools.count():
if not path.is_file() and (not prevent_overwrite or not path.is_dir()):
path.mkdir(exist_ok=True)
return path
path = path.parent / (folder_name + f"_{i}")

View File

@@ -405,6 +405,26 @@ class Simulations:
def is_available(cls) -> bool: def is_available(cls) -> bool:
return False return False
@classmethod
def new(
cls, param_seq: initialize.ParamSequence, task_id, method: Type["Simulations"] = None
) -> "Simulations":
"""Prefered method to create a new simulations object
Returns
-------
Simulations
obj that uses the best available parallelization method
"""
if method is not None:
if isinstance(method, str):
method = Simulations.simulation_methods_dict[method]
return method(param_seq, task_id)
elif param_seq.num_sim > 1 and param_seq["simulation", "parallel"] and using_ray:
return Simulations.get_best_method()(param_seq, task_id)
else:
return SequencialSimulations(param_seq, task_id)
def __init__(self, param_seq: initialize.ParamSequence, task_id=0): def __init__(self, param_seq: initialize.ParamSequence, task_id=0):
""" """
Parameters Parameters
@@ -432,7 +452,7 @@ class Simulations:
def finished_and_complete(self): def finished_and_complete(self):
try: try:
io.check_data_integrity( io.check_data_integrity(
io.get_data_subfolders(self.data_folder), self.param_seq["simulation", "z_num"] io.get_data_subfolders(self.id), self.param_seq["simulation", "z_num"]
) )
return True return True
except IncompleteDataFolderError: except IncompleteDataFolderError:
@@ -450,10 +470,8 @@ class Simulations:
def _run_available(self): def _run_available(self):
for variable, params in self.param_seq: for variable, params in self.param_seq:
io.save_parameters( io.save_parameters(params, self.id, utils.format_variable_list(variable))
params,
io.generate_file_path("params.toml", self.id, utils.format_variable_list(variable)),
)
self.new_sim(variable, params) self.new_sim(variable, params)
self.finish() self.finish()
@@ -576,29 +594,6 @@ class MultiProcSimulations(Simulations, priority=1):
).run() ).run()
queue.task_done() queue.task_done()
# @staticmethod
# def progress_worker(num_steps: int, progress_queue: multiprocessing.Queue):
# pbars: Dict[int, tqdm] = {}
# with tqdm(total=num_steps, desc="Simulating", unit="step", position=0) as tq:
# while True:
# raw = progress_queue.get()
# if raw == 0:
# for pbar in pbars.values():
# pbar.close()
# return
# i, rel_pos = raw
# if i not in pbars:
# pbars[i] = tqdm(
# total=1,
# desc=f"Worker {i}",
# position=i,
# bar_format="{l_bar}{bar}"
# "|[{elapsed}<{remaining}, "
# "{rate_fmt}{postfix}]",
# )
# pbars[i].update(rel_pos - pbars[i].n)
# tq.update()
class RaySimulations(Simulations, priority=2): class RaySimulations(Simulations, priority=2):
"""runs simulation with the help of the ray module. ray must be initialized before creating an instance of RaySimulations""" """runs simulation with the help of the ray module. ray must be initialized before creating an instance of RaySimulations"""
@@ -716,9 +711,9 @@ def run_simulation_sequence(
*config_files: os.PathLike, *config_files: os.PathLike,
method=None, method=None,
final_name: str = None, final_name: str = None,
prev_data_folder: os.PathLike = None, prev_sim_dir: os.PathLike = None,
): ):
prev = prev_data_folder prev = prev_sim_dir
for config_file in config_files: for config_file in config_files:
sim = new_simulation(config_file, prev, method) sim = new_simulation(config_file, prev, method)
sim.run() sim.run()
@@ -728,46 +723,36 @@ def run_simulation_sequence(
def new_simulation( def new_simulation(
config_file: os.PathLike, config_file: os.PathLike,
prev_data_folder=None, prev_sim_dir=None,
method: Type[Simulations] = None, method: Type[Simulations] = None,
) -> Simulations: ) -> Simulations:
config = io.load_toml(config_file) config = io.load_toml(config_file)
if prev_sim_dir is not None:
config.setdefault("simulation", {})
config["simulation"]["prev_sim_dir"] = str(prev_sim_dir)
task_id = np.random.randint(1e9, 1e12) task_id = np.random.randint(1e9, 1e12)
if prev_data_folder is None: if prev_sim_dir is None:
param_seq = initialize.ParamSequence(config) param_seq = initialize.ParamSequence(config)
else: else:
param_seq = initialize.ContinuationParamSequence(prev_data_folder, config) param_seq = initialize.ContinuationParamSequence(prev_sim_dir, config)
print(f"{param_seq.name=}") print(f"{param_seq.name=}")
return _new_simulations(param_seq, task_id, method) return Simulations.new(param_seq, task_id, method)
def resume_simulations(data_folder: str, method: Type[Simulations] = None) -> Simulations: def resume_simulations(sim_dir: str, method: Type[Simulations] = None) -> Simulations:
task_id = np.random.randint(1e9, 1e12) task_id = np.random.randint(1e9, 1e12)
config = io.load_toml(os.path.join(data_folder, "initial_config.toml")) config = io.load_toml(os.path.join(sim_dir, "initial_config.toml"))
io.set_data_folder(task_id, data_folder) io.set_data_folder(task_id, sim_dir)
param_seq = initialize.RecoveryParamSequence(config, task_id) param_seq = initialize.RecoveryParamSequence(config, task_id)
return _new_simulations(param_seq, task_id, method) return Simulations.new(param_seq, task_id, method)
def _new_simulations(
param_seq: initialize.ParamSequence,
task_id,
method: Type[Simulations],
) -> Simulations:
if method is not None:
if isinstance(method, str):
method = Simulations.simulation_methods_dict[method]
return method(param_seq, task_id)
elif param_seq.num_sim > 1 and param_seq["simulation", "parallel"] and using_ray:
return Simulations.get_best_method()(param_seq, task_id)
else:
return SequencialSimulations(param_seq, task_id)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -2,6 +2,7 @@ import os
from collections.abc import Mapping, Sequence from collections.abc import Mapping, Sequence
from glob import glob from glob import glob
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
from pathlib import Path
import numpy as np import numpy as np
@@ -30,17 +31,17 @@ class Spectrum(np.ndarray):
class Pulse(Sequence): class Pulse(Sequence):
def __init__(self, path: str, ensure_2d=True): def __init__(self, path: os.PathLike, ensure_2d=True):
self.logger = get_logger(__name__) self.logger = get_logger(__name__)
self.path = str(path) self.path = Path(path)
self.__ensure_2d = ensure_2d self.__ensure_2d = ensure_2d
if not os.path.isdir(self.path): if not self.path.is_dir():
raise FileNotFoundError(f"Folder {self.path} does not exist") raise FileNotFoundError(f"Folder {self.path} does not exist")
self.params = None self.params = None
try: try:
self.params = io.load_previous_parameters(os.path.join(self.path, "params.toml")) self.params = io.load_previous_parameters(self.path / "params.toml")
except FileNotFoundError: except FileNotFoundError:
self.logger.info(f"parameters corresponding to {self.path} not found") self.logger.info(f"parameters corresponding to {self.path} not found")
@@ -52,7 +53,7 @@ class Pulse(Sequence):
else: else:
raise raise
self.cache: Dict[int, Spectrum] = {} self.cache: Dict[int, Spectrum] = {}
self.nmax = len(glob(os.path.join(self.path, "spectra_*.npy"))) self.nmax = len(list(self.path.glob("spectra_*.npy")))
if self.nmax <= 0: if self.nmax <= 0:
raise FileNotFoundError(f"No appropriate file in specified folder {self.path}") raise FileNotFoundError(f"No appropriate file in specified folder {self.path}")
@@ -77,7 +78,7 @@ class Pulse(Sequence):
return self.nmax return self.nmax
def __getitem__(self, key): def __getitem__(self, key):
return self.all_spectra(ind=range(self.nmax)[key]) return self.all_spectra(ind=range(self.nmax)[key]).squeeze()
def intensity(self, unit): def intensity(self, unit):
if unit.type in ["WL", "FREQ", "AFREQ"]: if unit.type in ["WL", "FREQ", "AFREQ"]:
@@ -187,6 +188,8 @@ class Pulse(Sequence):
return np.fft.ifft(self.all_spectra(ind=ind), axis=-1) return np.fft.ifft(self.all_spectra(ind=ind), axis=-1)
def _load1(self, i: int): def _load1(self, i: int):
if i < 0:
i = self.nmax + i
if i in self.cache: if i in self.cache:
return self.cache[i] return self.cache[i]
spec = io.load_single_spectrum(self.path, i) spec = io.load_single_spectrum(self.path, i)
@@ -195,19 +198,3 @@ class Pulse(Sequence):
spec = Spectrum(spec, self.wl, self.params["frep"]) spec = Spectrum(spec, self.wl, self.params["frep"])
self.cache[i] = spec self.cache[i] = spec
return spec return spec
class SpectraCollection(Mapping, Sequence):
def __init__(self, path: str):
self.path = path
self.collection: List[Spectra] = []
if not os.path.isdir(self.path):
raise FileNotFoundError(f"Folder {self.path} does not exist")
self.variable_list
def __getitem__(self, key):
return self.collection[key]
def __len__(self):
pass