From 627a7ec61d19b13c29070a6b9e98cee3fb7075f8 Mon Sep 17 00:00:00 2001 From: Farhan Saif Date: Thu, 16 Apr 2026 15:08:29 -0500 Subject: [PATCH 1/5] Replace dict based make_kv with shasta typed objects Signed-off-by: Farhan Saif --- src/pash/compiler/ir.py | 6 ++-- src/pash/compiler/ir_defs/file_id.py | 9 +++-- src/pash/compiler/ir_defs/redirection.py | 7 ++-- src/pash/compiler/ir_to_ast.py | 8 ++--- src/pash/compiler/util.py | 46 ++++++++++++++---------- 5 files changed, 41 insertions(+), 35 deletions(-) diff --git a/src/pash/compiler/ir.py b/src/pash/compiler/ir.py index 29e31dd41..46698038f 100644 --- a/src/pash/compiler/ir.py +++ b/src/pash/compiler/ir.py @@ -22,8 +22,6 @@ CommandInvocationWithIOVars, ) -from shasta.ast_node import ast_node_to_untyped_deep -from shasta.json_to_ast import to_ast_node from annotations_utils.util_parsing import parse_arg_list_to_command_invocation from annotations_utils.util_cmd_invocations import ( @@ -544,8 +542,8 @@ def to_ast(self, drain_streams) -> "list[AstNode]": asts.append(assignment) ## TODO: Ideally we would like to make them as typed nodes already - class_asts = [to_ast_node(ast_node_to_untyped_deep(ast)) for ast in asts] - return class_asts + ## Solved: returns are shashta typed + return asts def collect_pid_assignment(self): ## Creates: diff --git a/src/pash/compiler/ir_defs/file_id.py b/src/pash/compiler/ir_defs/file_id.py index 9966e6771..20a7424b1 100644 --- a/src/pash/compiler/ir_defs/file_id.py +++ b/src/pash/compiler/ir_defs/file_id.py @@ -2,7 +2,11 @@ import os import uuid -from util import string_to_argument, make_kv +from util import string_to_argument +from shasta.ast_node import QArgChar + + + from ir_defs.resource import ( Resource, FileDescriptorResource, @@ -89,7 +93,8 @@ def to_ast(self, stdin_dash=False): suffix = self.get_fifo_suffix() string = os.path.join(config.PASH_TMP_PREFIX, suffix) ## Quote the argument - argument = [make_kv("Q", string_to_argument(string))] + argument = [QArgChar(arg=string_to_argument(string))] + elif isinstance(self.resource, FileDescriptorResource): if self.resource.is_stdin() and stdin_dash: argument = string_to_argument("-") diff --git a/src/pash/compiler/ir_defs/redirection.py b/src/pash/compiler/ir_defs/redirection.py index f3c6f7717..6e8822f3f 100644 --- a/src/pash/compiler/ir_defs/redirection.py +++ b/src/pash/compiler/ir_defs/redirection.py @@ -1,6 +1,6 @@ from shasta.ast_node import RedirectionNode, FileRedirNode from ir_defs.arg import Arg -from util import make_kv, UnparallelizableError +from util import UnparallelizableError class Redirection: def __init__(self, redirection: RedirectionNode): @@ -24,10 +24,7 @@ def __repr__(self): ) def to_ast(self): - redir = make_kv( - self.redir_type, - [self.redir_subtype, self.stream_id, self.file_arg.to_ast()], - ) + redir = FileRedirNode(self.redir_subtype, self.stream_id, self.file_arg.to_ast()) return redir def is_to_file(self): diff --git a/src/pash/compiler/ir_to_ast.py b/src/pash/compiler/ir_to_ast.py index e36143ebc..d92507ad8 100644 --- a/src/pash/compiler/ir_to_ast.py +++ b/src/pash/compiler/ir_to_ast.py @@ -12,7 +12,7 @@ make_subshell, redir_append_stderr_to_string_file, ) -from shasta.json_to_ast import to_ast_node + from parse import from_ast_objects_to_shell import config @@ -111,8 +111,7 @@ def make_ir_prologue(ephemeral_fids) -> "list[AstNode]": call_mkfifos = make_command([string_to_argument(MKFIFO_PASH_FIFOS_NAME)]) asts.append(call_mkfifos) - class_asts = [to_ast_node(ast) for ast in asts] - return class_asts + return asts def make_ir_epilogue(ephemeral_fids, clean_up_graph, log_file) -> "list[AstNode]": @@ -148,8 +147,7 @@ def make_ir_epilogue(ephemeral_fids, clean_up_graph, log_file) -> "list[AstNode] exit_ec_ast = make_exit_ec_ast() asts.append(exit_ec_ast) - class_asts = [to_ast_node(ast) for ast in asts] - return class_asts + return asts def make_exit_ec_ast(): diff --git a/src/pash/compiler/util.py b/src/pash/compiler/util.py index 38190a139..e9f57b803 100644 --- a/src/pash/compiler/util.py +++ b/src/pash/compiler/util.py @@ -11,7 +11,18 @@ import tempfile import config -from shasta.ast_node import CArgChar +from shasta.ast_node import ( + CArgChar, + VArgChar, + QArgChar, + CommandNode, + BackgroundNode, + SubshellNode, + SemiNode, + DefunNode, + AssignNode, + FileRedirNode, + ) # === List utilities === @@ -135,44 +146,42 @@ def string_to_argument(string): def char_to_arg_char(char): - return ["C", ord(char)] + return CArgChar(ord(char)) def standard_var_ast(string): - return make_kv("V", ["Normal", False, string, []]) + return VArgChar("Normal", False, string, []) def make_quoted_variable(string): - return make_kv("Q", [standard_var_ast(string)]) + return QArgChar(arg=[standard_var_ast(string)]) def quote_arg(arg): - return make_kv("Q", arg) + return QArgChar(arg=arg) def redir_append_stderr_to_string_file(string): - return make_kv("File", ["Append", 2, string_to_argument(string)]) - + return FileRedirNode("Append", ("fixed", 2), string_to_argument(string)) def redir_stdout_to_file(arg): - return make_kv("File", ["To", 1, arg]) + return FileRedirNode("To", ("fixed", 1), arg) def redir_file_to_stdin(arg): - return make_kv("File", ["From", 0, arg]) - + return FileRedirNode("From", ("fixed", 0), arg) def make_background(body, redirections=None): redirections = [] if redirections is None else redirections lineno = 0 - node = make_kv("Background", [lineno, body, redirections]) + node = BackgroundNode(lineno, body, redirections) return node def make_subshell(body, redirections=None): redirections = [] if redirections is None else redirections lineno = 0 - node = make_kv("Subshell", [lineno, body, redirections]) + node = SubshellNode(lineno, body, redirections) return node @@ -180,16 +189,15 @@ def make_command(arguments, redirections=None, assignments=None): redirections = [] if redirections is None else redirections assignments = [] if assignments is None else assignments lineno = 0 - node = make_kv("Command", [lineno, assignments, arguments, redirections]) + node = CommandNode(lineno, assignments, arguments, redirections) return node def make_assignment(var, value): lineno = 0 - assignment = (var, value) - assignments = [assignment] - node = make_kv("Command", [lineno, assignments, [], []]) - return node + assignment = AssignNode(var, value) + return CommandNode(lineno, [assignment],[],[]) + def make_semi_sequence(asts): @@ -203,11 +211,11 @@ def make_semi_sequence(asts): # Remove the last ast iter_asts = asts[:-1] for ast in iter_asts[::-1]: - acc = make_kv("Semi", [ast, acc]) + acc = SemiNode(ast, acc) return acc def make_defun(name, body): lineno = 0 - node = make_kv("Defun", [lineno, name, body]) + node = DefunNode(lineno, string_to_argument(name), body ) return node From 0c81a5e6ab4882cada0b972e67700ce9fc0b3062 Mon Sep 17 00:00:00 2001 From: Farhan Saif Date: Thu, 16 Apr 2026 15:57:30 -0500 Subject: [PATCH 2/5] Bypass CommandInvocationIOVars __init__ deepcopy via __new__ Signed-off-by: Farhan Saif --- src/pash/compiler/ir.py | 16 ++++++++++-- src/pash/compiler/ir_defs/nodes/dgsh_tee.py | 11 ++++++++ src/pash/compiler/ir_defs/nodes/eager.py | 12 +++++++++ src/pash/compiler/ir_defs/nodes/pash_split.py | 12 +++++++++ src/pash/compiler/ir_defs/nodes/r_merge.py | 11 ++++++++ src/pash/compiler/ir_defs/nodes/r_split.py | 12 +++++++++ src/pash/compiler/ir_defs/nodes/r_unwrap.py | 12 +++++++++ src/pash/compiler/ir_defs/nodes/r_wrap.py | 25 +++++++++++++------ 8 files changed, 101 insertions(+), 10 deletions(-) diff --git a/src/pash/compiler/ir.py b/src/pash/compiler/ir.py index 46698038f..b513be61b 100644 --- a/src/pash/compiler/ir.py +++ b/src/pash/compiler/ir.py @@ -236,7 +236,7 @@ def add_var_for_descriptor(operand): ) else: new_implicit_use_of_streaming_output = None - + """ command_invocation_with_io_vars = CommandInvocationWithIOVars( cmd_name=command_invocation_with_io.cmd_name, flag_option_list=new_flagoption_list, @@ -245,7 +245,19 @@ def add_var_for_descriptor(operand): implicit_use_of_streaming_output=new_implicit_use_of_streaming_output, access_map=access_map, ) - return command_invocation_with_io_vars, dfg_edges + """ + + cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) + cmd_inv_with_io_vars.cmd_name = command_invocation_with_io.cmd_name + cmd_inv_with_io_vars.flag_option_list=new_flagoption_list + cmd_inv_with_io_vars.operand_list = new_operand_list + cmd_inv_with_io_vars.implicit_use_of_streaming_input = new_implicit_use_of_streaming_input + cmd_inv_with_io_vars.implicit_use_of_streaming_output = new_implicit_use_of_streaming_output + cmd_inv_with_io_vars.access_map = access_map + + + #return command_invocation_with_io_vars, dfg_edges + return cmd_inv_with_io_vars, dfg_edges def compile_command_to_DFG(fileIdGen, command, options, redirections=None): diff --git a/src/pash/compiler/ir_defs/nodes/dgsh_tee.py b/src/pash/compiler/ir_defs/nodes/dgsh_tee.py index a6c408ff1..0f0d398e6 100644 --- a/src/pash/compiler/ir_defs/nodes/dgsh_tee.py +++ b/src/pash/compiler/ir_defs/nodes/dgsh_tee.py @@ -43,6 +43,7 @@ def make_dgsh_tee_node(input_id, output_id): ), ] + """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=dgsh_tee_bin, flag_option_list=flag_option_list, @@ -51,4 +52,14 @@ def make_dgsh_tee_node(input_id, output_id): implicit_use_of_streaming_output=None, access_map=access_map, ) + """ + + cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) + cmd_inv_with_io_vars.cmd_name = dgsh_tee_bin + cmd_inv_with_io_vars.flag_option_list=flag_option_list + cmd_inv_with_io_vars.operand_list = [] + cmd_inv_with_io_vars.implicit_use_of_streaming_input = None + cmd_inv_with_io_vars.implicit_use_of_streaming_output = None + cmd_inv_with_io_vars.access_map = access_map + return DGSHTee(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/eager.py b/src/pash/compiler/ir_defs/nodes/eager.py index a7910e822..134398748 100644 --- a/src/pash/compiler/ir_defs/nodes/eager.py +++ b/src/pash/compiler/ir_defs/nodes/eager.py @@ -32,6 +32,7 @@ def make_eager_node(input_id, output_id, intermediate_file_id, eager_exec_path): input_id: make_stream_input(), intermediate_file_id_id: make_other_output(), } + """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=eager_name, flag_option_list=[], @@ -40,4 +41,15 @@ def make_eager_node(input_id, output_id, intermediate_file_id, eager_exec_path): implicit_use_of_streaming_output=None, access_map=access_map, ) + """ + + + cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) + cmd_inv_with_io_vars.cmd_name = eager_name + cmd_inv_with_io_vars.flag_option_list=[] + cmd_inv_with_io_vars.operand_list = operand_list + cmd_inv_with_io_vars.implicit_use_of_streaming_input = None + cmd_inv_with_io_vars.implicit_use_of_streaming_output = None + cmd_inv_with_io_vars.access_map = access_map + return Eager(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/pash_split.py b/src/pash/compiler/ir_defs/nodes/pash_split.py index b3d799a39..49e945373 100644 --- a/src/pash/compiler/ir_defs/nodes/pash_split.py +++ b/src/pash/compiler/ir_defs/nodes/pash_split.py @@ -37,6 +37,7 @@ def make_split_file(input_id, out_ids): operand_list.extend(out_ids) access_map = {output_id: make_stream_output() for output_id in out_ids} access_map[input_id] = make_stream_input() + """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=auto_split_bin, flag_option_list=[], @@ -45,4 +46,15 @@ def make_split_file(input_id, out_ids): implicit_use_of_streaming_output=None, access_map=access_map, ) + """ + + + cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) + cmd_inv_with_io_vars.cmd_name = auto_split_bin + cmd_inv_with_io_vars.flag_option_list=[] + cmd_inv_with_io_vars.operand_list = operand_list + cmd_inv_with_io_vars.implicit_use_of_streaming_input = None + cmd_inv_with_io_vars.implicit_use_of_streaming_output = None + cmd_inv_with_io_vars.access_map = access_map + return Split(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/r_merge.py b/src/pash/compiler/ir_defs/nodes/r_merge.py index fdc7c05c4..2f8ef75f9 100644 --- a/src/pash/compiler/ir_defs/nodes/r_merge.py +++ b/src/pash/compiler/ir_defs/nodes/r_merge.py @@ -36,6 +36,7 @@ def make_r_merge_node(inputs, output): # TODO: assume that the inputs and output is provided as operands access_map = {input_id: make_stream_input() for input_id in inputs} access_map[output] = make_stream_output() + """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=r_merge_bin, flag_option_list=[], @@ -44,4 +45,14 @@ def make_r_merge_node(inputs, output): implicit_use_of_streaming_output=output, access_map=access_map, ) + """ + + cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) + cmd_inv_with_io_vars.cmd_name = r_merge_bin + cmd_inv_with_io_vars.flag_option_list=[] + cmd_inv_with_io_vars.operand_list = inputs + cmd_inv_with_io_vars.implicit_use_of_streaming_input = None + cmd_inv_with_io_vars.implicit_use_of_streaming_output = output + cmd_inv_with_io_vars.access_map = access_map + return RMerge(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/r_split.py b/src/pash/compiler/ir_defs/nodes/r_split.py index cafdfb2c4..12f37c036 100644 --- a/src/pash/compiler/ir_defs/nodes/r_split.py +++ b/src/pash/compiler/ir_defs/nodes/r_split.py @@ -47,6 +47,7 @@ def make_r_split(input_id, out_ids, r_split_batch_size): operand_list.extend(out_ids) access_map = {output_id: make_stream_output() for output_id in out_ids} access_map[input_id] = make_stream_input() + """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=r_split_bin, flag_option_list=[], @@ -55,6 +56,17 @@ def make_r_split(input_id, out_ids, r_split_batch_size): implicit_use_of_streaming_output=None, access_map=access_map, ) + """ + + cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) + cmd_inv_with_io_vars.cmd_name = r_split_bin + cmd_inv_with_io_vars.flag_option_list=[] + cmd_inv_with_io_vars.operand_list = operand_list + cmd_inv_with_io_vars.implicit_use_of_streaming_input = None + cmd_inv_with_io_vars.implicit_use_of_streaming_output = None + cmd_inv_with_io_vars.access_map = access_map + + return RSplit(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/r_unwrap.py b/src/pash/compiler/ir_defs/nodes/r_unwrap.py index d88333303..ca80b61f4 100644 --- a/src/pash/compiler/ir_defs/nodes/r_unwrap.py +++ b/src/pash/compiler/ir_defs/nodes/r_unwrap.py @@ -36,6 +36,7 @@ def make_unwrap_node(inputs, output): r_unwrap_bin = os.path.join( config.PASH_TOP, config.config["runtime"]["r_unwrap_binary"] ) + """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=r_unwrap_bin, flag_option_list=[], @@ -44,4 +45,15 @@ def make_unwrap_node(inputs, output): implicit_use_of_streaming_output=output, access_map=access_map, ) + """ + + cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) + cmd_inv_with_io_vars.cmd_name = r_unwrap_bin + cmd_inv_with_io_vars.flag_option_list=[] + cmd_inv_with_io_vars.operand_list = [] + cmd_inv_with_io_vars.implicit_use_of_streaming_input = input_id + cmd_inv_with_io_vars.implicit_use_of_streaming_output = output + cmd_inv_with_io_vars.access_map = access_map + + return RUnwrap(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/r_wrap.py b/src/pash/compiler/ir_defs/nodes/r_wrap.py index 724e1cd03..433aa21d8 100644 --- a/src/pash/compiler/ir_defs/nodes/r_wrap.py +++ b/src/pash/compiler/ir_defs/nodes/r_wrap.py @@ -75,15 +75,24 @@ def wrap_node(node: DFGNode, edges): bash_command_arg = [Arg.string_to_arg("bash -c")] operand_list = bash_command_arg + [cmd] - + """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( - cmd_name=r_wrap_bin, - flag_option_list=[], - operand_list=operand_list, - implicit_use_of_streaming_input=input_id, - implicit_use_of_streaming_output=output_id, - access_map=access_map, - ) + cmd_name=r_wrap_bin, + flag_option_list=[], + operand_list=operand_list, + implicit_use_of_streaming_input=input_id, + implicit_use_of_streaming_output=output_id, access_map=access_map, + ) + """ + + cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) + cmd_inv_with_io_vars.cmd_name = r_wrap_bin + cmd_inv_with_io_vars.flag_option_list=[] + cmd_inv_with_io_vars.operand_list = operand_list + cmd_inv_with_io_vars.implicit_use_of_streaming_input = input_id + cmd_inv_with_io_vars.implicit_use_of_streaming_output = output_id + cmd_inv_with_io_vars.access_map = access_map + ## TODO: It is not clear if it is safe to just pass redirections and assignments down the line as is redirs = node.com_redirs From 30cab208d03d2cb8ba77fb6345fa0c4bb98faa3e Mon Sep 17 00:00:00 2001 From: Farhan Saif Date: Thu, 16 Apr 2026 17:33:22 -0500 Subject: [PATCH 3/5] update comments deepcopy bypass Signed-off-by: Farhan Saif --- src/pash/compiler/ir.py | 1 + src/pash/compiler/ir_defs/nodes/dgsh_tee.py | 1 + src/pash/compiler/ir_defs/nodes/eager.py | 1 + src/pash/compiler/ir_defs/nodes/pash_split.py | 1 + src/pash/compiler/ir_defs/nodes/r_merge.py | 2 +- src/pash/compiler/ir_defs/nodes/r_split.py | 1 + src/pash/compiler/ir_defs/nodes/r_unwrap.py | 1 + src/pash/compiler/ir_defs/nodes/r_wrap.py | 1 + 8 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/pash/compiler/ir.py b/src/pash/compiler/ir.py index b513be61b..dd3100077 100644 --- a/src/pash/compiler/ir.py +++ b/src/pash/compiler/ir.py @@ -247,6 +247,7 @@ def add_var_for_descriptor(operand): ) """ + # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) cmd_inv_with_io_vars.cmd_name = command_invocation_with_io.cmd_name cmd_inv_with_io_vars.flag_option_list=new_flagoption_list diff --git a/src/pash/compiler/ir_defs/nodes/dgsh_tee.py b/src/pash/compiler/ir_defs/nodes/dgsh_tee.py index 0f0d398e6..fe26f7dd4 100644 --- a/src/pash/compiler/ir_defs/nodes/dgsh_tee.py +++ b/src/pash/compiler/ir_defs/nodes/dgsh_tee.py @@ -54,6 +54,7 @@ def make_dgsh_tee_node(input_id, output_id): ) """ + # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) cmd_inv_with_io_vars.cmd_name = dgsh_tee_bin cmd_inv_with_io_vars.flag_option_list=flag_option_list diff --git a/src/pash/compiler/ir_defs/nodes/eager.py b/src/pash/compiler/ir_defs/nodes/eager.py index 134398748..0f60e22fa 100644 --- a/src/pash/compiler/ir_defs/nodes/eager.py +++ b/src/pash/compiler/ir_defs/nodes/eager.py @@ -44,6 +44,7 @@ def make_eager_node(input_id, output_id, intermediate_file_id, eager_exec_path): """ + # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) cmd_inv_with_io_vars.cmd_name = eager_name cmd_inv_with_io_vars.flag_option_list=[] diff --git a/src/pash/compiler/ir_defs/nodes/pash_split.py b/src/pash/compiler/ir_defs/nodes/pash_split.py index 49e945373..7799fdd1a 100644 --- a/src/pash/compiler/ir_defs/nodes/pash_split.py +++ b/src/pash/compiler/ir_defs/nodes/pash_split.py @@ -49,6 +49,7 @@ def make_split_file(input_id, out_ids): """ + # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) cmd_inv_with_io_vars.cmd_name = auto_split_bin cmd_inv_with_io_vars.flag_option_list=[] diff --git a/src/pash/compiler/ir_defs/nodes/r_merge.py b/src/pash/compiler/ir_defs/nodes/r_merge.py index 2f8ef75f9..10322c4b0 100644 --- a/src/pash/compiler/ir_defs/nodes/r_merge.py +++ b/src/pash/compiler/ir_defs/nodes/r_merge.py @@ -46,7 +46,7 @@ def make_r_merge_node(inputs, output): access_map=access_map, ) """ - + # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) cmd_inv_with_io_vars.cmd_name = r_merge_bin cmd_inv_with_io_vars.flag_option_list=[] diff --git a/src/pash/compiler/ir_defs/nodes/r_split.py b/src/pash/compiler/ir_defs/nodes/r_split.py index 12f37c036..e884c0895 100644 --- a/src/pash/compiler/ir_defs/nodes/r_split.py +++ b/src/pash/compiler/ir_defs/nodes/r_split.py @@ -58,6 +58,7 @@ def make_r_split(input_id, out_ids, r_split_batch_size): ) """ + # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) cmd_inv_with_io_vars.cmd_name = r_split_bin cmd_inv_with_io_vars.flag_option_list=[] diff --git a/src/pash/compiler/ir_defs/nodes/r_unwrap.py b/src/pash/compiler/ir_defs/nodes/r_unwrap.py index ca80b61f4..19eb1218a 100644 --- a/src/pash/compiler/ir_defs/nodes/r_unwrap.py +++ b/src/pash/compiler/ir_defs/nodes/r_unwrap.py @@ -47,6 +47,7 @@ def make_unwrap_node(inputs, output): ) """ + # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) cmd_inv_with_io_vars.cmd_name = r_unwrap_bin cmd_inv_with_io_vars.flag_option_list=[] diff --git a/src/pash/compiler/ir_defs/nodes/r_wrap.py b/src/pash/compiler/ir_defs/nodes/r_wrap.py index 433aa21d8..0a699ae45 100644 --- a/src/pash/compiler/ir_defs/nodes/r_wrap.py +++ b/src/pash/compiler/ir_defs/nodes/r_wrap.py @@ -85,6 +85,7 @@ def wrap_node(node: DFGNode, edges): ) """ + # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) cmd_inv_with_io_vars.cmd_name = r_wrap_bin cmd_inv_with_io_vars.flag_option_list=[] From 7bce6e98e72ab287bcb494b65ec4aa0bed9b0138 Mon Sep 17 00:00:00 2001 From: Farhan Saif Date: Wed, 22 Apr 2026 11:54:15 -0500 Subject: [PATCH 4/5] revert the deepcopy __init__ bypass, restore normal constructor --- src/pash/compiler/ir.py | 17 ++---------- src/pash/compiler/ir_defs/nodes/dgsh_tee.py | 12 --------- src/pash/compiler/ir_defs/nodes/eager.py | 13 ---------- src/pash/compiler/ir_defs/nodes/pash_split.py | 13 ---------- src/pash/compiler/ir_defs/nodes/r_merge.py | 11 -------- src/pash/compiler/ir_defs/nodes/r_split.py | 13 ---------- src/pash/compiler/ir_defs/nodes/r_unwrap.py | 13 ---------- src/pash/compiler/ir_defs/nodes/r_wrap.py | 26 ++++++------------- 8 files changed, 10 insertions(+), 108 deletions(-) diff --git a/src/pash/compiler/ir.py b/src/pash/compiler/ir.py index dd3100077..46698038f 100644 --- a/src/pash/compiler/ir.py +++ b/src/pash/compiler/ir.py @@ -236,7 +236,7 @@ def add_var_for_descriptor(operand): ) else: new_implicit_use_of_streaming_output = None - """ + command_invocation_with_io_vars = CommandInvocationWithIOVars( cmd_name=command_invocation_with_io.cmd_name, flag_option_list=new_flagoption_list, @@ -245,20 +245,7 @@ def add_var_for_descriptor(operand): implicit_use_of_streaming_output=new_implicit_use_of_streaming_output, access_map=access_map, ) - """ - - # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. - cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) - cmd_inv_with_io_vars.cmd_name = command_invocation_with_io.cmd_name - cmd_inv_with_io_vars.flag_option_list=new_flagoption_list - cmd_inv_with_io_vars.operand_list = new_operand_list - cmd_inv_with_io_vars.implicit_use_of_streaming_input = new_implicit_use_of_streaming_input - cmd_inv_with_io_vars.implicit_use_of_streaming_output = new_implicit_use_of_streaming_output - cmd_inv_with_io_vars.access_map = access_map - - - #return command_invocation_with_io_vars, dfg_edges - return cmd_inv_with_io_vars, dfg_edges + return command_invocation_with_io_vars, dfg_edges def compile_command_to_DFG(fileIdGen, command, options, redirections=None): diff --git a/src/pash/compiler/ir_defs/nodes/dgsh_tee.py b/src/pash/compiler/ir_defs/nodes/dgsh_tee.py index fe26f7dd4..a6c408ff1 100644 --- a/src/pash/compiler/ir_defs/nodes/dgsh_tee.py +++ b/src/pash/compiler/ir_defs/nodes/dgsh_tee.py @@ -43,7 +43,6 @@ def make_dgsh_tee_node(input_id, output_id): ), ] - """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=dgsh_tee_bin, flag_option_list=flag_option_list, @@ -52,15 +51,4 @@ def make_dgsh_tee_node(input_id, output_id): implicit_use_of_streaming_output=None, access_map=access_map, ) - """ - - # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. - cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) - cmd_inv_with_io_vars.cmd_name = dgsh_tee_bin - cmd_inv_with_io_vars.flag_option_list=flag_option_list - cmd_inv_with_io_vars.operand_list = [] - cmd_inv_with_io_vars.implicit_use_of_streaming_input = None - cmd_inv_with_io_vars.implicit_use_of_streaming_output = None - cmd_inv_with_io_vars.access_map = access_map - return DGSHTee(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/eager.py b/src/pash/compiler/ir_defs/nodes/eager.py index 0f60e22fa..a7910e822 100644 --- a/src/pash/compiler/ir_defs/nodes/eager.py +++ b/src/pash/compiler/ir_defs/nodes/eager.py @@ -32,7 +32,6 @@ def make_eager_node(input_id, output_id, intermediate_file_id, eager_exec_path): input_id: make_stream_input(), intermediate_file_id_id: make_other_output(), } - """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=eager_name, flag_option_list=[], @@ -41,16 +40,4 @@ def make_eager_node(input_id, output_id, intermediate_file_id, eager_exec_path): implicit_use_of_streaming_output=None, access_map=access_map, ) - """ - - - # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. - cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) - cmd_inv_with_io_vars.cmd_name = eager_name - cmd_inv_with_io_vars.flag_option_list=[] - cmd_inv_with_io_vars.operand_list = operand_list - cmd_inv_with_io_vars.implicit_use_of_streaming_input = None - cmd_inv_with_io_vars.implicit_use_of_streaming_output = None - cmd_inv_with_io_vars.access_map = access_map - return Eager(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/pash_split.py b/src/pash/compiler/ir_defs/nodes/pash_split.py index 7799fdd1a..b3d799a39 100644 --- a/src/pash/compiler/ir_defs/nodes/pash_split.py +++ b/src/pash/compiler/ir_defs/nodes/pash_split.py @@ -37,7 +37,6 @@ def make_split_file(input_id, out_ids): operand_list.extend(out_ids) access_map = {output_id: make_stream_output() for output_id in out_ids} access_map[input_id] = make_stream_input() - """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=auto_split_bin, flag_option_list=[], @@ -46,16 +45,4 @@ def make_split_file(input_id, out_ids): implicit_use_of_streaming_output=None, access_map=access_map, ) - """ - - - # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. - cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) - cmd_inv_with_io_vars.cmd_name = auto_split_bin - cmd_inv_with_io_vars.flag_option_list=[] - cmd_inv_with_io_vars.operand_list = operand_list - cmd_inv_with_io_vars.implicit_use_of_streaming_input = None - cmd_inv_with_io_vars.implicit_use_of_streaming_output = None - cmd_inv_with_io_vars.access_map = access_map - return Split(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/r_merge.py b/src/pash/compiler/ir_defs/nodes/r_merge.py index 10322c4b0..fdc7c05c4 100644 --- a/src/pash/compiler/ir_defs/nodes/r_merge.py +++ b/src/pash/compiler/ir_defs/nodes/r_merge.py @@ -36,7 +36,6 @@ def make_r_merge_node(inputs, output): # TODO: assume that the inputs and output is provided as operands access_map = {input_id: make_stream_input() for input_id in inputs} access_map[output] = make_stream_output() - """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=r_merge_bin, flag_option_list=[], @@ -45,14 +44,4 @@ def make_r_merge_node(inputs, output): implicit_use_of_streaming_output=output, access_map=access_map, ) - """ - # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. - cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) - cmd_inv_with_io_vars.cmd_name = r_merge_bin - cmd_inv_with_io_vars.flag_option_list=[] - cmd_inv_with_io_vars.operand_list = inputs - cmd_inv_with_io_vars.implicit_use_of_streaming_input = None - cmd_inv_with_io_vars.implicit_use_of_streaming_output = output - cmd_inv_with_io_vars.access_map = access_map - return RMerge(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/r_split.py b/src/pash/compiler/ir_defs/nodes/r_split.py index e884c0895..cafdfb2c4 100644 --- a/src/pash/compiler/ir_defs/nodes/r_split.py +++ b/src/pash/compiler/ir_defs/nodes/r_split.py @@ -47,7 +47,6 @@ def make_r_split(input_id, out_ids, r_split_batch_size): operand_list.extend(out_ids) access_map = {output_id: make_stream_output() for output_id in out_ids} access_map[input_id] = make_stream_input() - """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=r_split_bin, flag_option_list=[], @@ -56,18 +55,6 @@ def make_r_split(input_id, out_ids, r_split_batch_size): implicit_use_of_streaming_output=None, access_map=access_map, ) - """ - - # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. - cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) - cmd_inv_with_io_vars.cmd_name = r_split_bin - cmd_inv_with_io_vars.flag_option_list=[] - cmd_inv_with_io_vars.operand_list = operand_list - cmd_inv_with_io_vars.implicit_use_of_streaming_input = None - cmd_inv_with_io_vars.implicit_use_of_streaming_output = None - cmd_inv_with_io_vars.access_map = access_map - - return RSplit(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/r_unwrap.py b/src/pash/compiler/ir_defs/nodes/r_unwrap.py index 19eb1218a..d88333303 100644 --- a/src/pash/compiler/ir_defs/nodes/r_unwrap.py +++ b/src/pash/compiler/ir_defs/nodes/r_unwrap.py @@ -36,7 +36,6 @@ def make_unwrap_node(inputs, output): r_unwrap_bin = os.path.join( config.PASH_TOP, config.config["runtime"]["r_unwrap_binary"] ) - """ cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=r_unwrap_bin, flag_option_list=[], @@ -45,16 +44,4 @@ def make_unwrap_node(inputs, output): implicit_use_of_streaming_output=output, access_map=access_map, ) - """ - - # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. - cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) - cmd_inv_with_io_vars.cmd_name = r_unwrap_bin - cmd_inv_with_io_vars.flag_option_list=[] - cmd_inv_with_io_vars.operand_list = [] - cmd_inv_with_io_vars.implicit_use_of_streaming_input = input_id - cmd_inv_with_io_vars.implicit_use_of_streaming_output = output - cmd_inv_with_io_vars.access_map = access_map - - return RUnwrap(cmd_inv_with_io_vars) diff --git a/src/pash/compiler/ir_defs/nodes/r_wrap.py b/src/pash/compiler/ir_defs/nodes/r_wrap.py index 0a699ae45..724e1cd03 100644 --- a/src/pash/compiler/ir_defs/nodes/r_wrap.py +++ b/src/pash/compiler/ir_defs/nodes/r_wrap.py @@ -75,25 +75,15 @@ def wrap_node(node: DFGNode, edges): bash_command_arg = [Arg.string_to_arg("bash -c")] operand_list = bash_command_arg + [cmd] - """ - cmd_inv_with_io_vars = CommandInvocationWithIOVars( - cmd_name=r_wrap_bin, - flag_option_list=[], - operand_list=operand_list, - implicit_use_of_streaming_input=input_id, - implicit_use_of_streaming_output=output_id, access_map=access_map, - ) - """ - - # Skip __init__ to avoid its deepcopy; inputs are freshly constructed here. - cmd_inv_with_io_vars = CommandInvocationWithIOVars.__new__(CommandInvocationWithIOVars) - cmd_inv_with_io_vars.cmd_name = r_wrap_bin - cmd_inv_with_io_vars.flag_option_list=[] - cmd_inv_with_io_vars.operand_list = operand_list - cmd_inv_with_io_vars.implicit_use_of_streaming_input = input_id - cmd_inv_with_io_vars.implicit_use_of_streaming_output = output_id - cmd_inv_with_io_vars.access_map = access_map + cmd_inv_with_io_vars = CommandInvocationWithIOVars( + cmd_name=r_wrap_bin, + flag_option_list=[], + operand_list=operand_list, + implicit_use_of_streaming_input=input_id, + implicit_use_of_streaming_output=output_id, + access_map=access_map, + ) ## TODO: It is not clear if it is safe to just pass redirections and assignments down the line as is redirs = node.com_redirs From f926e8083ec9ece7fe119117a25f5cae426599dc Mon Sep 17 00:00:00 2001 From: Farhan Saif Date: Wed, 22 Apr 2026 12:05:26 -0500 Subject: [PATCH 5/5] rev back to original __init__, ir.py comments deleted Signed-off-by: Farhan Saif --- src/pash/compiler/ir.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pash/compiler/ir.py b/src/pash/compiler/ir.py index 46698038f..4b0f15e29 100644 --- a/src/pash/compiler/ir.py +++ b/src/pash/compiler/ir.py @@ -541,8 +541,6 @@ def to_ast(self, drain_streams) -> "list[AstNode]": assignment = self.collect_pid_assignment() asts.append(assignment) - ## TODO: Ideally we would like to make them as typed nodes already - ## Solved: returns are shashta typed return asts def collect_pid_assignment(self):