Utils

Originally taken from Michael Hall's tbpore https://github.com/mbhall88/tbpore/blob/main/tbpore/external_tools.py

Also used by a variety of other tools (Dnaapler, Plassembler, Pharokka)

`ExternalTool`

Class for running external tools.

Parameters:

Name	Type	Description	Default
`tool`	`str`	The path to the tool to run.	required
`input`	`str`	The input file.	required
`output`	`str`	The output file.	required
`params`	`str`	The parameters to pass to the tool.	required
`logdir`	`Path`	The directory to store log files.	required

Attributes:

Name	Type	Description
`command`	`List[str]`	The command to run.
`out_log`	`str`	The path to the stdout log file.
`err_log`	`str`	The path to the stderr log file.

Examples:

>>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
>>> tool.command
["tool", "params", "output", "input"]
>>> tool.out_log
"logdir/tool_1234567890abcdef1234567890abcdef.out"
>>> tool.err_log
"logdir/tool_1234567890abcdef1234567890abcdef.err"

Source code in src/baktfold/utils/external_tools.py

class ExternalTool:
    """
    Class for running external tools.

    Args:
      tool (str): The path to the tool to run.
      input (str): The input file.
      output (str): The output file.
      params (str): The parameters to pass to the tool.
      logdir (Path): The directory to store log files.

    Attributes:
      command (List[str]): The command to run.
      out_log (str): The path to the stdout log file.
      err_log (str): The path to the stderr log file.

    Examples:
      >>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
      >>> tool.command
      ["tool", "params", "output", "input"]
      >>> tool.out_log
      "logdir/tool_1234567890abcdef1234567890abcdef.out"
      >>> tool.err_log
      "logdir/tool_1234567890abcdef1234567890abcdef.err"
    """
    def __init__(
        self,
        tool: str,
        input: str,
        output: str,
        params: str,
        logdir: Path,
        env: Optional[Dict[str, str]] = None,
    ):
        """
        Initializes an ExternalTool object.

        Args:
          tool (str): The path to the tool to run.
          input (str): The input file.
          output (str): The output file.
          params (str): The parameters to pass to the tool.
          logdir (Path): The directory to store log files.
          env (Optional[Dict[str, str]]): Extra env vars merged with
            os.environ for the subprocess (e.g. CUDA_VISIBLE_DEVICES
            for multi-GPU foldseek). None == inherit unchanged.

        Attributes:
          command (List[str]): The command to run.
          out_log (str): The path to the stdout log file.
          err_log (str): The path to the stderr log file.
          env (Optional[Dict[str, str]]): Extra subprocess env vars.

        Examples:
          >>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
          >>> tool.command
          ["tool", "params", "output", "input"]
          >>> tool.out_log
          "logdir/tool_1234567890abcdef1234567890abcdef.out"
          >>> tool.err_log
          "logdir/tool_1234567890abcdef1234567890abcdef.err"
        """
        logdir = Path(logdir)
        self.command: List[str] = self._build_command(tool, input, output, params)
        Path(logdir).mkdir(parents=True, exist_ok=True)
        command_hash = hashlib.sha256(self.command_as_str.encode("utf-8")).hexdigest()
        tool_name = Path(tool).name
        logfile_prefix: Path = logdir / f"{tool_name}_{command_hash}"
        self.out_log = f"{logfile_prefix}.out"
        self.err_log = f"{logfile_prefix}.err"
        self.env = env

    @property
    def command_as_str(self) -> str:
        """
        Returns the command as a string.

        Returns:
          str: The command as a string.

        Examples:
          >>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
          >>> tool.command_as_str
          "tool params output input"
        """
        return shlex.join(self.command)

    @staticmethod
    def _build_command(tool: str, input: str, output: str, params: str) -> List[str]:
        """
        Builds the command to run.

        Args:
          tool (str): The path to the tool to run.
          input (str): The input file.
          output (str): The output file.
          params (str): The parameters to pass to the tool.

        Returns:
          List[str]: The command to run.

        Examples:
          >>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
          >>> tool._build_command("tool", "input", "output", "params")
          ["tool", "params", "output", "input"]
        """
        # note: shlex.join does not allow us to shlex.split() later
        # this is explicitly a " ".join()
        command = " ".join([tool, params, output, input])
        escaped_command = shlex.split(command)
        return escaped_command

    def run(self) -> None:
        """
        Runs the tool.

        Examples:
          >>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
          >>> tool.run()
        """
        with open(self.out_log, "w") as stdout_fh, open(self.err_log, "w") as stderr_fh:
            print(f"Command line: {self.command_as_str}", file=stderr_fh)
            if self.env:
                print(f"Extra env: {self.env}", file=stderr_fh)
            logger.info(f"Started running {self.command_as_str} ...")
            self._run_core(
                self.command,
                stdout_fh=stdout_fh,
                stderr_fh=stderr_fh,
                env=self.env,
            )
            logger.info(f"Done running {self.command_as_str}")

    """
    stream to terminal (aria2c) so the user knows how long it is taking
    """

    def run_stream(self) -> None:
        """
        Runs the tool and streams the output to the terminal.

        Examples:
          >>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
          >>> tool.run_stream()
        """
        with open(self.out_log, "w") as stdout_fh, open(self.err_log, "w") as stderr_fh:
            print(f"Command line: {self.command_as_str}", file=stderr_fh)
            logger.info(f"Started running {self.command_as_str} ...")

            # ``with subprocess.Popen(...)`` guarantees stdout is closed and
            # ``wait()`` is called on every exit path including exceptions.
            # The inner ``try/except BaseException`` kills the child before
            # ``Popen.__exit__`` calls ``wait()`` — without it an unkilled
            # child can hang the wait and leave a zombie. BaseException (not
            # Exception) is intentional so Ctrl-C / SystemExit also trigger.
            with subprocess.Popen(
                self.command,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=1,
                universal_newlines=True,
            ) as process:
                try:
                    for line in process.stdout:
                        print(line, end="")        # Live output to terminal
                        stdout_fh.write(line)      # Also write to stdout log
                except BaseException:
                    process.kill()
                    raise

            # Popen.__exit__ has called wait(); returncode is now set.
            return_code = process.returncode
            logger.info(f"Done running {self.command_as_str}")

            if return_code != 0:
                raise subprocess.CalledProcessError(return_code, self.command)


    @staticmethod
    def _run_core(
        command: List[str],
        stdout_fh,
        stderr_fh,
        env: Optional[Dict[str, str]] = None,
    ) -> None:
        """
        Runs the tool.

        Args:
          command (List[str]): The command to run.
          stdout_fh: The file handle to write stdout to.
          stderr_fh: The file handle to write stderr to.
          env (Optional[Dict[str, str]]): Extra env vars merged with
            os.environ for the subprocess. None == inherit.

        Examples:
          >>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
          >>> tool._run_core(["tool", "params", "output", "input"], stdout_fh, stderr_fh)
        """
        merged_env = {**os.environ, **env} if env else None
        subprocess.check_call(
            command, stdout=stdout_fh, stderr=stderr_fh, env=merged_env
        )

    @staticmethod
    def run_tools(
        tools_to_run: Tuple["ExternalTool", ...], ctx: Optional[click.Context] = None
    ) -> None:
        """
        Runs a list of tools.

        Args:
          tools_to_run (Tuple[ExternalTool]): The list of tools to run.
          ctx (Optional[click.Context]): The click context.

        Examples:
          >>> tool1 = ExternalTool("tool1", "input1", "output1", "params1", "logdir")
          >>> tool2 = ExternalTool("tool2", "input2", "output2", "params2", "logdir")
          >>> ExternalTool.run_tools((tool1, tool2))
          >>> ExternalTool.run_tools((tool1, tool2), ctx)
        """
        for tool in tools_to_run:
            try:
                tool.run()
            except subprocess.CalledProcessError as error:
                logger.error(
                    f"Error calling {tool.command_as_str} (return code {error.returncode})"
                )
                logger.error(f"Please check stdout log file: {tool.out_log}")
                logger.error(f"Please check stderr log file: {tool.err_log}")
                logger.error("Temporary files are preserved for debugging")
                logger.error("Exiting...")

                if ctx:
                    ctx.exit(1)
                else:
                    sys.exit(1)

    """
    Only one toolf
    """

    @staticmethod
    def run_tool(tool: "ExternalTool", ctx: Optional[click.Context] = None) -> None:
        """
        Runs the given external tool.

        Args:
          tool (ExternalTool): The external tool to run.
          ctx (Optional[click.Context]): The click context to use. Defaults to None.

        Returns:
          None.

        Raises:
          subprocess.CalledProcessError: If there is an error calling the external tool.

        Examples:
          >>> tool = ExternalTool()
          >>> ExternalTool.run_tool(tool)
          None
        """
        try:
            tool.run()
        except subprocess.CalledProcessError as error:
            logger.error(
                f"Error calling {tool.command_as_str} (return code {error.returncode})"
            )
            logger.error(f"Please check stdout log file: {tool.out_log}")
            logger.error(f"Please check stderr log file: {tool.err_log}")
            logger.error("Temporary files are preserved for debugging")
            logger.error("Exiting...")

            if ctx:
                ctx.exit(1)
            else:
                sys.exit(1)


    """
    Only download - so can print the aria2c output to screen
    """

    @staticmethod
    def run_download(tool: "ExternalTool", ctx: Optional[click.Context] = None) -> None:
        """
        Runs the given external tool and prints the aria2c output to the screen.

        Args:
          tool (ExternalTool): The external tool to run.
          ctx (Optional[click.Context]): The click context to use. Defaults to None.

        Returns:
          None.

        Raises:
          subprocess.CalledProcessError: If there is an error calling the external tool.

        Examples:
          >>> tool = ExternalTool()
          >>> ExternalTool.run_download(tool)
          None
        """
        try:
            tool.run_stream()
        except subprocess.CalledProcessError as error:
            logger.error(
                f"Error calling {tool.command_as_str} (return code {error.returncode})"
            )
            logger.error(f"Please check stdout log file: {tool.out_log}")
            logger.error(f"Please check stderr log file: {tool.err_log}")
            logger.error("Temporary files are preserved for debugging")
            logger.error("Exiting...")

            if ctx:
                ctx.exit(1)
            else:
                sys.exit(1)

`command_as_str: str` `property`

Returns the command as a string.

Returns:

Name	Type	Description
`str`	`str`	The command as a string.

Examples:

>>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
>>> tool.command_as_str
"tool params output input"

`init(tool, input, output, params, logdir, env=None)`

Initializes an ExternalTool object.

Parameters:

Name	Type	Description	Default
`tool`	`str`	The path to the tool to run.	required
`input`	`str`	The input file.	required
`output`	`str`	The output file.	required
`params`	`str`	The parameters to pass to the tool.	required
`logdir`	`Path`	The directory to store log files.	required
`env`	`Optional[Dict[str, str]]`	Extra env vars merged with os.environ for the subprocess (e.g. CUDA_VISIBLE_DEVICES for multi-GPU foldseek). None == inherit unchanged.	`None`

Attributes:

Name	Type	Description
`command`	`List[str]`	The command to run.
`out_log`	`str`	The path to the stdout log file.
`err_log`	`str`	The path to the stderr log file.
`env`	`Optional[Dict[str, str]]`	Extra subprocess env vars.

Examples:

>>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
>>> tool.command
["tool", "params", "output", "input"]
>>> tool.out_log
"logdir/tool_1234567890abcdef1234567890abcdef.out"
>>> tool.err_log
"logdir/tool_1234567890abcdef1234567890abcdef.err"

Source code in src/baktfold/utils/external_tools.py

def __init__(
    self,
    tool: str,
    input: str,
    output: str,
    params: str,
    logdir: Path,
    env: Optional[Dict[str, str]] = None,
):
    """
    Initializes an ExternalTool object.

    Args:
      tool (str): The path to the tool to run.
      input (str): The input file.
      output (str): The output file.
      params (str): The parameters to pass to the tool.
      logdir (Path): The directory to store log files.
      env (Optional[Dict[str, str]]): Extra env vars merged with
        os.environ for the subprocess (e.g. CUDA_VISIBLE_DEVICES
        for multi-GPU foldseek). None == inherit unchanged.

    Attributes:
      command (List[str]): The command to run.
      out_log (str): The path to the stdout log file.
      err_log (str): The path to the stderr log file.
      env (Optional[Dict[str, str]]): Extra subprocess env vars.

    Examples:
      >>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
      >>> tool.command
      ["tool", "params", "output", "input"]
      >>> tool.out_log
      "logdir/tool_1234567890abcdef1234567890abcdef.out"
      >>> tool.err_log
      "logdir/tool_1234567890abcdef1234567890abcdef.err"
    """
    logdir = Path(logdir)
    self.command: List[str] = self._build_command(tool, input, output, params)
    Path(logdir).mkdir(parents=True, exist_ok=True)
    command_hash = hashlib.sha256(self.command_as_str.encode("utf-8")).hexdigest()
    tool_name = Path(tool).name
    logfile_prefix: Path = logdir / f"{tool_name}_{command_hash}"
    self.out_log = f"{logfile_prefix}.out"
    self.err_log = f"{logfile_prefix}.err"
    self.env = env

`run()`

Runs the tool.

Examples:

>>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
>>> tool.run()

Source code in src/baktfold/utils/external_tools.py

def run(self) -> None:
    """
    Runs the tool.

    Examples:
      >>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
      >>> tool.run()
    """
    with open(self.out_log, "w") as stdout_fh, open(self.err_log, "w") as stderr_fh:
        print(f"Command line: {self.command_as_str}", file=stderr_fh)
        if self.env:
            print(f"Extra env: {self.env}", file=stderr_fh)
        logger.info(f"Started running {self.command_as_str} ...")
        self._run_core(
            self.command,
            stdout_fh=stdout_fh,
            stderr_fh=stderr_fh,
            env=self.env,
        )
        logger.info(f"Done running {self.command_as_str}")

`run_download(tool, ctx=None)` `staticmethod`

Runs the given external tool and prints the aria2c output to the screen.

Parameters:

Name	Type	Description	Default
`tool`	`ExternalTool`	The external tool to run.	required
`ctx`	`Optional[click.Context]`	The click context to use. Defaults to None.	`None`

Returns:

Type	Description
`None`	None.

Raises:

Type	Description
`subprocess.CalledProcessError`	If there is an error calling the external tool.

Examples:

>>> tool = ExternalTool()
>>> ExternalTool.run_download(tool)
None

Source code in src/baktfold/utils/external_tools.py

@staticmethod
def run_download(tool: "ExternalTool", ctx: Optional[click.Context] = None) -> None:
    """
    Runs the given external tool and prints the aria2c output to the screen.

    Args:
      tool (ExternalTool): The external tool to run.
      ctx (Optional[click.Context]): The click context to use. Defaults to None.

    Returns:
      None.

    Raises:
      subprocess.CalledProcessError: If there is an error calling the external tool.

    Examples:
      >>> tool = ExternalTool()
      >>> ExternalTool.run_download(tool)
      None
    """
    try:
        tool.run_stream()
    except subprocess.CalledProcessError as error:
        logger.error(
            f"Error calling {tool.command_as_str} (return code {error.returncode})"
        )
        logger.error(f"Please check stdout log file: {tool.out_log}")
        logger.error(f"Please check stderr log file: {tool.err_log}")
        logger.error("Temporary files are preserved for debugging")
        logger.error("Exiting...")

        if ctx:
            ctx.exit(1)
        else:
            sys.exit(1)

`run_stream()`

Runs the tool and streams the output to the terminal.

Examples:

>>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
>>> tool.run_stream()

Source code in src/baktfold/utils/external_tools.py

def run_stream(self) -> None:
    """
    Runs the tool and streams the output to the terminal.

    Examples:
      >>> tool = ExternalTool("tool", "input", "output", "params", "logdir")
      >>> tool.run_stream()
    """
    with open(self.out_log, "w") as stdout_fh, open(self.err_log, "w") as stderr_fh:
        print(f"Command line: {self.command_as_str}", file=stderr_fh)
        logger.info(f"Started running {self.command_as_str} ...")

        # ``with subprocess.Popen(...)`` guarantees stdout is closed and
        # ``wait()`` is called on every exit path including exceptions.
        # The inner ``try/except BaseException`` kills the child before
        # ``Popen.__exit__`` calls ``wait()`` — without it an unkilled
        # child can hang the wait and leave a zombie. BaseException (not
        # Exception) is intentional so Ctrl-C / SystemExit also trigger.
        with subprocess.Popen(
            self.command,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            bufsize=1,
            universal_newlines=True,
        ) as process:
            try:
                for line in process.stdout:
                    print(line, end="")        # Live output to terminal
                    stdout_fh.write(line)      # Also write to stdout log
            except BaseException:
                process.kill()
                raise

        # Popen.__exit__ has called wait(); returncode is now set.
        return_code = process.returncode
        logger.info(f"Done running {self.command_as_str}")

        if return_code != 0:
            raise subprocess.CalledProcessError(return_code, self.command)

`run_tool(tool, ctx=None)` `staticmethod`

Runs the given external tool.

Parameters:

Name	Type	Description	Default
`tool`	`ExternalTool`	The external tool to run.	required
`ctx`	`Optional[click.Context]`	The click context to use. Defaults to None.	`None`

Returns:

Type	Description
`None`	None.

Raises:

Type	Description
`subprocess.CalledProcessError`	If there is an error calling the external tool.

Examples:

>>> tool = ExternalTool()
>>> ExternalTool.run_tool(tool)
None

Source code in src/baktfold/utils/external_tools.py

@staticmethod
def run_tool(tool: "ExternalTool", ctx: Optional[click.Context] = None) -> None:
    """
    Runs the given external tool.

    Args:
      tool (ExternalTool): The external tool to run.
      ctx (Optional[click.Context]): The click context to use. Defaults to None.

    Returns:
      None.

    Raises:
      subprocess.CalledProcessError: If there is an error calling the external tool.

    Examples:
      >>> tool = ExternalTool()
      >>> ExternalTool.run_tool(tool)
      None
    """
    try:
        tool.run()
    except subprocess.CalledProcessError as error:
        logger.error(
            f"Error calling {tool.command_as_str} (return code {error.returncode})"
        )
        logger.error(f"Please check stdout log file: {tool.out_log}")
        logger.error(f"Please check stderr log file: {tool.err_log}")
        logger.error("Temporary files are preserved for debugging")
        logger.error("Exiting...")

        if ctx:
            ctx.exit(1)
        else:
            sys.exit(1)

`run_tools(tools_to_run, ctx=None)` `staticmethod`

Runs a list of tools.

Parameters:

Name	Type	Description	Default
`tools_to_run`	`Tuple[ExternalTool]`	The list of tools to run.	required
`ctx`	`Optional[click.Context]`	The click context.	`None`

Examples:

>>> tool1 = ExternalTool("tool1", "input1", "output1", "params1", "logdir")
>>> tool2 = ExternalTool("tool2", "input2", "output2", "params2", "logdir")
>>> ExternalTool.run_tools((tool1, tool2))
>>> ExternalTool.run_tools((tool1, tool2), ctx)

Source code in src/baktfold/utils/external_tools.py

@staticmethod
def run_tools(
    tools_to_run: Tuple["ExternalTool", ...], ctx: Optional[click.Context] = None
) -> None:
    """
    Runs a list of tools.

    Args:
      tools_to_run (Tuple[ExternalTool]): The list of tools to run.
      ctx (Optional[click.Context]): The click context.

    Examples:
      >>> tool1 = ExternalTool("tool1", "input1", "output1", "params1", "logdir")
      >>> tool2 = ExternalTool("tool2", "input2", "output2", "params2", "logdir")
      >>> ExternalTool.run_tools((tool1, tool2))
      >>> ExternalTool.run_tools((tool1, tool2), ctx)
    """
    for tool in tools_to_run:
        try:
            tool.run()
        except subprocess.CalledProcessError as error:
            logger.error(
                f"Error calling {tool.command_as_str} (return code {error.returncode})"
            )
            logger.error(f"Please check stdout log file: {tool.out_log}")
            logger.error(f"Please check stderr log file: {tool.err_log}")
            logger.error("Temporary files are preserved for debugging")
            logger.error("Exiting...")

            if ctx:
                ctx.exit(1)
            else:
                sys.exit(1)

`OrderedCommands`

Bases: click.Group

This class will preserve the order of subcommands, which is useful when printing --help

Source code in src/baktfold/utils/util.py

class OrderedCommands(click.Group):
    """This class will preserve the order of subcommands, which is useful when printing --help"""

    def list_commands(self, ctx: click.Context):
        """
        Returns a list of subcommands in the order they were added.

        Args:
          ctx (click.Context): The click context.

        Returns:
          list: A list of subcommands in the order they were added.
        """
        return list(self.commands)

`list_commands(ctx)`

Returns a list of subcommands in the order they were added.

Parameters:

Name	Type	Description	Default
`ctx`	`click.Context`	The click context.	required

Returns:

Name	Type	Description
`list`		A list of subcommands in the order they were added.

Source code in src/baktfold/utils/util.py

def list_commands(self, ctx: click.Context):
    """
    Returns a list of subcommands in the order they were added.

    Args:
      ctx (click.Context): The click context.

    Returns:
      list: A list of subcommands in the order they were added.
    """
    return list(self.commands)

`atomic_write_path(target)`

Yield a sibling temp path that is renamed over target on success.

On any exception (including KeyboardInterrupt), the temp is removed and target is left exactly as it was before the with-block.

Source code in src/baktfold/utils/util.py

@contextmanager
def atomic_write_path(target: Union[str, Path]) -> Iterator[Path]:
    """Yield a sibling temp path that is renamed over ``target`` on success.

    On any exception (including KeyboardInterrupt), the temp is removed and
    ``target`` is left exactly as it was before the with-block.
    """
    target = Path(target)
    target.parent.mkdir(parents=True, exist_ok=True)
    fd, tmp_name = tempfile.mkstemp(
        prefix=f".{target.name}.",
        suffix=".tmp",
        dir=str(target.parent),
    )
    os.close(fd)
    tmp_path = Path(tmp_name)
    try:
        yield tmp_path
    except BaseException:
        try:
            tmp_path.unlink()
        except FileNotFoundError:
            pass
        raise
    else:
        os.replace(tmp_path, target)

`baktfold_base(rel_path)`

Returns the absolute path to the given relative path.

Parameters:

Name	Type	Description	Default
`rel_path`	`str`	The relative path to the file.	required

Returns:

Name	Type	Description
`str`		The absolute path to the file.

Source code in src/baktfold/utils/util.py

def baktfold_base(rel_path):
    """
    Returns the absolute path to the given relative path.

    Args:
      rel_path (str): The relative path to the file.

    Returns:
      str: The absolute path to the file.
    """
    return os.path.join(os.path.dirname(os.path.realpath(__file__)), rel_path)

`begin_baktfold(params, subcommand, no_log=False)`

Begin baktfold process.

Parameters:

Name	Type	Description	Default
`params`	`Dict[str, Any]`	A dictionary of parameters for baktfold.	required
`subcommand`	`str`	Subcommand indicating the baktfold operation.	required
`no_log`	`bool`	No log file	`False`

Returns:

Name	Type	Description
`int`	`int`	Start time of the baktfold process.

Source code in src/baktfold/utils/util.py

def begin_baktfold(params: Dict[str, Any], subcommand: str, no_log: bool = False) -> int:
    """
    Begin baktfold process.

    Parameters:
        params (Dict[str, Any]): A dictionary of parameters for baktfold.
        subcommand (str): Subcommand indicating the baktfold operation.
        no_log (bool): No log file

    Returns:
        int: Start time of the baktfold process.
    """
    # Tear down any sinks from a prior call before installing fresh ones.
    _remove_baktfold_sinks()

    # get start time
    start_time = time.time()

    cfg.run_start = datetime.now()

    # initial logging stuff — track ids so they can be removed in end_baktfold.
    if not no_log:
        log_file = os.path.join(params["--output"], f"baktfold_{subcommand}_{start_time}.log")
        _BAKTFOLD_SINK_IDS.append(logger.add(log_file))
    _BAKTFOLD_SINK_IDS.append(logger.add(lambda _: sys.exit(1), level="ERROR"))

    print_splash()
    logger.info("baktfold: rapid & standardized annotation of bacterial genomes, MAGs & plasmids using protein structural information")

    logger.info(f"You are using baktfold version {get_version()}")
    logger.info("Repository homepage is https://github.com/gbouras13/baktfold")
    logger.info(f"You are running baktfold {subcommand}")
    logger.info(f"Listing parameters")
    for key, value in params.items():
        logger.info(f"Parameter: {key} {value}")

    return start_time

`clean_up_temporary_files(output, prefix)`

Clean up temporary files generated during the baktfold process.

Parameters:

Name	Type	Description	Default
`output`	`Path`	Path to the output directory.	required
`prefix`	`str`	prefix str	required

Returns:

Type	Description
`None`	None

Source code in src/baktfold/utils/util.py

def clean_up_temporary_files(output: Path, prefix: str) -> None:
    """
    Clean up temporary files generated during the baktfold process.

    Parameters:
        output (Path): Path to the output directory.
        prefix (str): prefix str


    Returns:
        None
    """

    baktfold_aa: Path = Path(output) / f"{prefix}_aa.fasta"
    result_tsv_swissprot: Path = Path(output) / "foldseek_results_swissprot.tsv"
    result_tsv_afdb: Path = Path(output) / "foldseek_results_afdb_clusters.tsv"
    result_tsv_pdb: Path = Path(output) / "foldseek_results_pdb.tsv"
    result_tsv_cath: Path = Path(output) / "foldseek_results_cath.tsv"
    result_tsv_custom: Path = Path(output) / "foldseek_results_custom.tsv"
    foldseek_db: Path = Path(output) / "foldseek_db"
    result_db_base: Path = Path(output) / "result_db"
    temp_db: Path = Path(output) / "temp_db"

    remove_directory(result_db_base)
    remove_directory(temp_db)
    remove_directory(foldseek_db)

    remove_file(baktfold_aa)
    remove_file(result_tsv_swissprot)
    remove_file(result_tsv_afdb)
    remove_file(result_tsv_pdb)
    remove_file(result_tsv_custom)
    remove_file(result_tsv_cath)

`echo_click(msg, log=None)`

Prints a message to stdout and optionally to a log file.

Parameters:

Name	Type	Description	Default
`msg`	`str`	The message to print.	required
`log`	`str`	The path to the log file.	`None`

Returns:

Type	Description
	None

Source code in src/baktfold/utils/util.py

def echo_click(msg, log=None):
    """
    Prints a message to stdout and optionally to a log file.

    Args:
      msg (str): The message to print.
      log (str): The path to the log file.

    Returns:
      None
    """
    click.echo(msg, nl=False, err=True)
    if log:
        with open(log, "a") as lo:
            lo.write(msg)

`end_baktfold(start_time, subcommand)`

Finish baktfold process and log elapsed time.

Parameters:

Name	Type	Description	Default
`start_time`	`float`	Start time of the process.	required
`subcommand`	`str`	Subcommand name indicating the baktfold operation.	required

Returns:

Type	Description
`None`	None

Source code in src/baktfold/utils/util.py

def end_baktfold(start_time: float, subcommand: str) -> None:
    """
    Finish baktfold process and log elapsed time.

    Parameters:
        start_time (float): Start time of the process.
        subcommand (str): Subcommand name indicating the baktfold operation.

    Returns:
        None
    """

    # Determine elapsed time
    elapsed_time = time.time() - start_time
    elapsed_time = round(elapsed_time, 2)

    cfg.run_end = datetime.now()
    run_duration = (cfg.run_end - cfg.run_start).total_seconds()
    # logger.info(f'If you use these results please cite Baktfold: https://doi.org/{bc.BAKTA_DOI}')
    logger.info(f'If you use these results please cite Baktfold: https://github.com/gbouras13/baktfold')
    logger.info(f'baktfold {subcommand} successfully finished in {int(run_duration / 60):02}:{int(run_duration % 60):02} [mm:ss].')


    # Show elapsed time for the process
    logger.info(f"baktfold {subcommand} has finished")
    logger.info("Elapsed time: " + str(elapsed_time) + " seconds")

    # Clean up sinks so a subsequent call (or test) starts with a clean logger.
    _remove_baktfold_sinks()

`get_type_rank(f)`

ranks eukaryotic features 1) in order of gene -> mRNA -> CDS and gene -> tRNA dynamically adjusts if 5'UTR and 3'UTR is present

Source code in src/baktfold/utils/util.py

def get_type_rank(f):
    """
    ranks eukaryotic features 1) in order of gene -> mRNA -> CDS and gene -> tRNA
    dynamically adjusts if 5'UTR and 3'UTR is present
    """
    t = f['type']
    strand = f.get('strand', '+')  # default to + if missing

    # fixed ranks
    base_order = {
        'gene': 0,
        'mRNA': 1,
        'cds': 3,
        'tRNA': 6
    }

    # dynamic UTR ordering
    if t == bc.FEATURE_5UTR:
        return 2 if strand == '+' else 4
    if t == bc.FEATURE_3UTR:
        return 4 if strand == '+' else 2

    return base_order.get(t, 99)   # non-protein features become 99

`get_version()`

Returns the version number from the VERSION file.

Returns:

Name	Type	Description
`str`		The version number.

Source code in src/baktfold/utils/util.py

def get_version():
    """
    Returns the version number from the VERSION file.

    Returns:
      str: The version number.
    """
    with open(baktfold_base("VERSION"), "r") as f:
        version = f.readline()
    return version

`print_citation()`

Prints the contents of the CITATION file to stdout.

Returns:

Type	Description
	None

Source code in src/baktfold/utils/util.py

def print_citation():
    """
    Prints the contents of the CITATION file to stdout.

    Returns:
      None
    """
    with open(baktfold_base("CITATION"), "r") as f:
        for line in f:
            echo_click(line)

`print_splash()`

Prints the splash screen to stdout.

Returns:

Type	Description
	None

Source code in src/baktfold/utils/util.py

def print_splash():
    """
    Prints the splash screen to stdout.

    Returns:
      None
    """
    click.echo(
        """\b

  _           _    _    __      _     _ 
 | |         | |  | |  / _|    | |   | |
 | |__   __ _| | _| |_| |_ ___ | | __| |
 | '_ \ / _` | |/ / __|  _/ _ \| |/ _` |
 | |_) | (_| |   <| |_| || (_) | | (_| |
 |_.__/ \__,_|_|\_\\__|_| \___/|_|\__,_|


"""
    )

`remove_directory(dir_path)`

Remove a directory and all its contents if it exists.

Parameters:

Name	Type	Description	Default
`dir_path`	`Path`	Path to the directory to remove.	required

Returns:

Type	Description
`None`	None

Source code in src/baktfold/utils/util.py

def remove_directory(dir_path: Path) -> None:
    """
    Remove a directory and all its contents if it exists.

    Parameters:
        dir_path (Path): Path to the directory to remove.

    Returns:
        None
    """
    if dir_path.exists():
        shutil.rmtree(dir_path, ignore_errors=True)

`remove_file(file_path)`

Remove a file if it exists.

Parameters:

Name	Type	Description	Default
`file_path`	`Path`	Path to the file to remove.	required

Returns:

Type	Description
`None`	None

Source code in src/baktfold/utils/util.py

def remove_file(file_path: Path) -> None:
    """
    Remove a file if it exists.

    Parameters:
        file_path (Path): Path to the file to remove.

    Returns:
        None
    """
    if file_path.exists():
        file_path.unlink()  # Use unlink to remove the file

`replace_pipe_in_fasta(input_path)`

Replace '~PIPE~' with '|' in FASTA headers, writing atomically.

Streams line-by-line to a sibling temp file and renames it onto input_path on success. A kill mid-write leaves the original intact.

Source code in src/baktfold/utils/util.py

def replace_pipe_in_fasta(input_path):
    """Replace '~PIPE~' with '|' in FASTA headers, writing atomically.

    Streams line-by-line to a sibling temp file and renames it onto
    ``input_path`` on success.  A kill mid-write leaves the original intact.
    """
    with atomic_write_path(input_path) as tmp:
        with open(input_path, "r") as in_f, open(tmp, "w") as out_f:
            for line in in_f:
                if line.startswith(">") and "~PIPE~" in line:
                    line = line.replace("~PIPE~", "|")
                out_f.write(line)

`sort_euk_feature_key(f)`

Sorts a feature dictionary by start, locus, type rank, and stop.

Parameters:

Name	Type	Description	Default
`f`	`dict`	The feature dictionary.	required

Returns:

Name	Type	Description
`tuple`		A tuple of the sorted values.

Source code in src/baktfold/utils/util.py

def sort_euk_feature_key(f):
    """
    Sorts a feature dictionary by start, locus, type rank, and stop.

    Args:
      f (dict): The feature dictionary.

    Returns:
      tuple: A tuple of the sorted values.
    """
    start = f.get('start', float('inf'))
    stop = f.get('stop', float('inf'))
    locus = f.get('locus')
    type_rank = get_type_rank(f)

    if locus and type_rank != 99:
        # Within a locus → sort by type rank second and stop last (if multiple CDS e.g.)
        return (start, 0, locus, type_rank, stop)
    else:
        # Non-locus or non-gene features → sort only by start
        return (start, 1, '', 99, stop)

`touch_file(path)`

Update the access and modification times of a file to the current time, creating the file if it does not exist.

Parameters:

Name	Type	Description	Default
`path`	`Path`	Path to the file.	required

Returns:

Type	Description
`None`	None

Source code in src/baktfold/utils/util.py

def touch_file(path: Path) -> None:
    """
    Update the access and modification times of a file to the current time, creating the file if it does not exist.

    Parameters:
        path (Path): Path to the file.

    Returns:
        None
    """
    with open(path, "a"):
        os.utime(path, None)

`check_dependencies()`

Checks the dependencies and versions of non Python programs (i.e. Foldseek)

Returns:

Type	Description
`None`	None

Source code in src/baktfold/utils/validation.py

def check_dependencies() -> None:
    """
    Checks the dependencies and versions of non Python programs (i.e. Foldseek)

    Parameters:
        None

    Returns:
        None

    """

    #############
    # foldseek
    #############
    # Previously a bare ``except`` logged the error but fell through to
    # ``process.communicate()`` on an unbound ``process`` → UnboundLocalError.
    # Bare ``except`` also swallowed Ctrl-C. Narrowed to the errors Popen
    # actually raises for a missing binary; exit cleanly on failure.
    try:
        process = sp.Popen(["foldseek", "version"], stdout=sp.PIPE, stderr=sp.STDOUT)
    except (FileNotFoundError, PermissionError, OSError) as e:
        logger.error(
            f"Foldseek not found on PATH ({type(e).__name__}: {e}). "
            "Install foldseek and ensure it is on your PATH, then re-run baktfold."
        )
        sys.exit(1)

    foldseek_out, _ = process.communicate()
    foldseek_out = foldseek_out.decode()

    foldseek_version = foldseek_out.strip()

    if "941cd33" in foldseek_version:
        foldseek_major_version = 10
        foldseek_minor_version = "941cd33"
        logger.info(
            f"Foldseek version found is v{foldseek_major_version}.{foldseek_minor_version}"
        )
        logger.info("Foldseek version is ok")
    else:
        logger.warning(f"Foldseek version found is v{foldseek_version}")
        logger.warning(f"baktfold is recommended to be run with Foldseek v10.941cd33")
        logger.warning(f"Using a different Foldseek version is likely to work without issue, but this cannot be guaranteed.")

`check_genbank_and_prokka(filepath, euk)`

Validate that an input file is a readable GenBank file and check whether it was annotated using Prokka. The function transparently supports compressed files (e.g., .gz, .bz2, .xz, .zst) via xopen.

Validation steps

• Attempts to parse the file as GenBank using Biopython. • Logs an error and returns None if no GenBank records are found. • Checks the COMMENT field of each record for a Prokka signature ("Annotated using prokka", case-insensitive). • If no Prokka annotation is detected, a warning is logged but parsing continues as it is a valid genbank.

Parameters

str

Path to the GenBank or compressed GenBank file.

flag

whether or not the input is eukaryotic (skips prokka)

Returns

list[SeqRecord] or None A list of Biopython SeqRecord objects if parsing succeeds. Returns None if the file is not valid GenBank or cannot be parsed.

Source code in src/baktfold/utils/validation.py

def check_genbank_and_prokka(filepath, euk):
    """
    Validate that an input file is a readable GenBank file and check whether it was
    annotated using Prokka. The function transparently supports compressed files
    (e.g., .gz, .bz2, .xz, .zst) via `xopen`.

    Validation steps:
      • Attempts to parse the file as GenBank using Biopython.
      • Logs an error and returns None if no GenBank records are found.
      • Checks the COMMENT field of each record for a Prokka signature
        ("Annotated using prokka", case-insensitive).
      • If no Prokka annotation is detected, a warning is logged but parsing continues as it is a valid genbank.

    Parameters
    ----------
    filepath : str
        Path to the GenBank or compressed GenBank file.
    euk: flag
        whether or not the input is eukaryotic (skips prokka)

    Returns
    -------
    list[SeqRecord] or None
        A list of Biopython SeqRecord objects if parsing succeeds.
        Returns None if the file is not valid GenBank or cannot be parsed.
    """

    is_valid_genbank = False
    is_prokka = False

    try:
        # Use xopen so gzip/bz2/xz/zst work automatically
        with xopen(filepath, "rb") as handle:
            # SeqIO.parse expects text handle -> decode
            # Use .read() is too big; instead wrap in TextIOWrapper
            import io
            text_handle = io.TextIOWrapper(handle, encoding="utf-8", errors="replace")

            records = list(SeqIO.parse(text_handle, "genbank"))

        if not records:
            logger.error(f"Input file {filepath} is not GenBank format. Please check your input")
            return None
        else:
            is_valid_genbank = True


        # Scan comments for Prokka signature
        if not euk:
            for rec in records:
                comment = rec.annotations.get("comment", "") or ""
                if "annotated using prokka" in comment.lower():
                    is_prokka = True
                    break


            if is_prokka is False:
                logger.warning(f"Input file {filepath} does not appear to come from Prokka.")
                logger.warning(f"Conversion will proceed but no guarantee of success.")

    except Exception:
        logger.error(f"There was an error parsing {filepath}. Please check your input")
        return None

    return records

`instantiate_dirs(output_dir, force)`

Checks and instantiates the output directory.

Parameters:

Name	Type	Description	Default
`output_dir`	`Union[str, Path]`	Path to the output directory.	required
`force`	`bool`	Force flag indicating whether to overwrite existing directory.	required

Returns:

Name	Type	Description
`Path`	`Path`	Final output directory path.

Source code in src/baktfold/utils/validation.py

def instantiate_dirs(output_dir: Union[str, Path], force: bool) -> Path:
    """
    Checks and instantiates the output directory.

    Parameters:
        output_dir (Union[str, Path]): Path to the output directory.
        force (bool): Force flag indicating whether to overwrite existing directory.

    Returns:
        Path: Final output directory path.
    """

    # Checks the output directory
    # remove outdir on force
    logger.info(f"Checking the output directory {output_dir}")
    if force is True:
        if Path(output_dir).exists():
            logger.info(f"Removing {output_dir} because --force was specified")
            shutil.rmtree(output_dir)
        else:
            logger.info(
                "--force was specified even though the output directory does not already exist. Continuing"
            )
    else:
        if Path(output_dir).exists():
            logger.error(
                "Output directory already exists and force was not specified. Please specify -f or --force to overwrite the output directory"
            )

    # instantiate outdir
    if Path(output_dir).exists() is False:
        Path(output_dir).mkdir(parents=True, exist_ok=True)

`validate_outfile(outfile, force)`

Checks and instantiates the output file for baktfold convert-prokka

Parameters:

Name	Type	Description	Default
`outfile`	`Union[str, Path]`	Path to the output file.	required
`force`	`bool`	Force flag indicating whether to overwrite existing outfile.	required

Returns:

Name	Type	Description
`Path`	`Path`	Final output file path.

Source code in src/baktfold/utils/validation.py

def validate_outfile(outfile: Union[str, Path], force: bool) -> Path:
    """
    Checks and instantiates the output file for baktfold convert-prokka

    Parameters:
        outfile (Union[str, Path]): Path to the output file.
        force (bool): Force flag indicating whether to overwrite existing outfile.

    Returns:
        Path: Final output file path.
    """

    # Checks the output directory
    # remove outdir on force
    logger.info(f"Checking the output file {outfile}")
    if force is True:
        if Path(outfile).exists():
            logger.info(f"Removing {outfile} because --force was specified")
            Path(outfile).unlink()
        else:
            logger.info(
                f"--force was specified even though the output file {outfile} does not already exist. Continuing"
            )
    else:
        if Path(outfile).exists():
            logger.error(
                f"Output file {outfile} already exists and force was not specified. Please specify -f or --force to overwrite the output file"
            )

Utils

ExternalTool

command_as_str: str property

__init__(tool, input, output, params, logdir, env=None)

run()

run_download(tool, ctx=None) staticmethod

run_stream()

run_tool(tool, ctx=None) staticmethod

run_tools(tools_to_run, ctx=None) staticmethod

OrderedCommands

list_commands(ctx)

atomic_write_path(target)

baktfold_base(rel_path)

begin_baktfold(params, subcommand, no_log=False)

clean_up_temporary_files(output, prefix)

echo_click(msg, log=None)

end_baktfold(start_time, subcommand)

get_type_rank(f)

get_version()

print_citation()

print_splash()

remove_directory(dir_path)

remove_file(file_path)

replace_pipe_in_fasta(input_path)

sort_euk_feature_key(f)

touch_file(path)

check_dependencies()

check_genbank_and_prokka(filepath, euk)

Parameters

Returns

instantiate_dirs(output_dir, force)

validate_outfile(outfile, force)

`ExternalTool`

`command_as_str: str` `property`

`init(tool, input, output, params, logdir, env=None)`

`run()`

`run_download(tool, ctx=None)` `staticmethod`

`run_stream()`

`run_tool(tool, ctx=None)` `staticmethod`

`run_tools(tools_to_run, ctx=None)` `staticmethod`

`OrderedCommands`

`list_commands(ctx)`

`atomic_write_path(target)`

`baktfold_base(rel_path)`

`begin_baktfold(params, subcommand, no_log=False)`

`clean_up_temporary_files(output, prefix)`

`echo_click(msg, log=None)`

`end_baktfold(start_time, subcommand)`

`get_type_rank(f)`

`get_version()`

`print_citation()`

`print_splash()`

`remove_directory(dir_path)`

`remove_file(file_path)`

`replace_pipe_in_fasta(input_path)`

`sort_euk_feature_key(f)`

`touch_file(path)`

`check_dependencies()`

`check_genbank_and_prokka(filepath, euk)`

`instantiate_dirs(output_dir, force)`

`validate_outfile(outfile, force)`