Source code for abed.help

"""Functions for displaying command-line help

This module contains all the documentation and functionality needed for the 
'abed help' command.

"""

# Author: Gertjan van den Burg
# Date: Sat Oct  8, 2016
# License: GPL v. 2

import textwrap

# General description for the Abed help
DESCRIPTION = "Abed is a utility for Automated BEnchmark Distribution"


# Categories for the commands in the Abed help
COMMAND_CATEGORIES = [
    ("Initialization commands:", ["init", "setup"]),
    ("Compute cluster job management:", ["push", "pull", "auto", "repull"]),
    ("Task management:", ["update_tasks", "reload_tasks"]),
    ("Computations:", ["run", "local"]),
    ("Abed status:", ["status", "explain_tasks", "explain_tbd_tasks"]),
    (
        "Result management:",
        ["view_results", "compress_results", "move_results"],
    ),
    ("Manual intervention:", ["parse_results", "process_zips"]),
]


# Short help line for each command
ABED_SHORT_HELP = {
    "auto": ("Automate push and pull to facilitate " "continuous operation"),
    "compress_results": "Compress completed dataset directories.",
    "explain_tbd_tasks": (
        "Print the task ID and command " "of remaining tasks"
    ),
    "explain_tasks": ("Print the task ID and command " "of all defined tasks"),
    "init": "Initialize a skeleton for abed",
    "help": "Show help for Abed",
    "local": "Run the computations locally.",
    "parse_results": "Parse the results into summary files",
    "process_zips": "Process result zip files",
    "pull": "Pull all results from the cluster and process them",
    "push": "Push all necessary data to the cluster using fabric",
    "reload_tasks": "Reload the task file based on config and results",
    "repull": ("Repull results for all jobids in the auto log file"),
    "run": "Run the master/worker MPI program of abed on the cluster",
    "setup": (
        "Setup the remote directory structure and transfer the " "datasets"
    ),
    "status": "Status of abed task list",
    "update_tasks": "Update the task list (part of pull)",
    "view_results": "Open the HTML results in the default browser",
    "move_results": "Move any results from stagedir to result dir",
}


# Synopsis for commands which take parameters or options
ABED_SYNOPSES = {
    "help": "abed help [<topic>]",
    "parse_results": "abed parse_results [<options>]",
}


# See also definitions for the commands
ABED_SEE_ALSO = {
    "auto": ["pull", "push"],
    "compress_results": ["view_results"],
    "explain_tbd_tasks": ["explain_tasks", "status"],
    "explain_tasks": ["explain_tbd_tasks", "status"],
    "init": ["setup"],
    "help": [],
    "local": ["run"],
    "parse_results": ["view_results"],
    "process_zips": ["pull", "move_results"],
    "move_results": ["process_zips"],
    "pull": ["auto", "push", "process_zips", "update_tasks", "repull"],
    "push": ["auto", "pull", "setup", "run"],
    "reload_tasks": ["update_tasks", "status"],
    "repull": ["pull"],
    "run": ["local"],
    "setup": ["push", "init"],
    "status": [],
    "update_tasks": ["reload_tasks", "status"],
    "view_results": ["compress_results"],
}


# Documentation of options for commands which have options
ABED_OPTIONS = {
    "parse_results": [
        (
            "--skip-cache, -s",
            """\
                    When parsing the result files, Abed checks if the result 
                    cache needs to be reconstructed.  Since this can be a 
                    time-intensive task, it can be useful in some cases to skip 
                    cache regeneration and use an existing result cache.  With 
                    this flag to the parse_result command, cache regeneration 
                    can be skipped.  The user should be aware that results are 
                    potentially outdated or incomplete if this flag is used.
                    """,
        )
    ]
}


# Long help description for each command, for 'abed help <command>' calls
ABED_LONG_HELP = {
    "auto": """\
                The 'auto' command automates repeated uses of 'push' and 'pull' 
                commands. It regularly checks the compute cluster to see if the 
                current job is queued, running, or finished. If the job is 
                queued but not yet running, Abed will attempt to get the 
                expected starting time of the job and display this to the user.  
                If a job is running, Abed will attempt to get the remaining 
                computation time of the job, and display this to the user. If 
                no running job can be found, Abed assumes that the job is 
                finished and will try to get the job ID of the job from the log  
                files, and subsequently pull the results from the cluster. When 
                this is finished, Abed will mark the job ID in the AUTO_FILE,  
                to ensure it isn't pulled twice. After the pull command is done 
                and there are still tasks remaining, Abed will execute a push 
                command.

                The 'auto' command should only be run after a job has already 
                been submitted by using the 'push' command. Note that for the 
                'auto' command to work properly, password-less login to the 
                compute cluster should be configured. This can be done by 
                exchanging SSH keys with the cluster.
                """,
    "compress_results": """\
                The 'compress_results' command can be used when the disk space 
                used by the raw results is too large. This command finds out 
                for which datasets all tasks have been finished, and compresses 
                the corresponding directories with the highest compression 
                level possible. Since this can be a time-consuming command, it 
                is best to run it when you're asleep. The type of compression 
                algorithm used by Abed can be set using the COMPRESSION 
                setting.

                Note that after Abed creates a compressed archive of a results 
                directory for a dataset, it doesn't remove the original dataset 
                directory. This should be done by the user.
                """,
    "explain_tbd_tasks": """\
                Print an overview with the mapping from hash to task for the 
                tasks that remain to be done.
                """,
    "explain_tasks": """\
                Print an overview with the mapping from hash to task for all 
                tasks.
                """,
    "init": """\
                Initialize a new Abed experiment. This is the first step to 
                starting a new Abed experiment and should be done only once.  
                This command creates the initial files: the settings file, an 
                empty task file, an empty auto file, and two directories for 
                the datasets and executables respectively.  Additionally, Abed 
                will create a Git repository and add the settings file and the 
                task file to it.
                """,
    "help": """\
                Display help on the commands to Abed. For available commands, 
                simply type 'abed help', for help on a certain command type 
                'abed help <command>'.
                """,
    "local": """\
                Run the computations locally. This command is essentially the 
                same as the 'run' command, but it runs the computations 
                locally. Since the master-worker program in Abed that runs the 
                computations uses MPI, this command should be executed through 
                mpiexec: 'mpiexec abed local'. Note that this command requires 
                at least two cores on your workstation, one for the master 
                thread and the remainders for the working threads.
                """,
    "move_results": """\
                Move any results from the stage directory to the results 
                directory. This is useful when something goes wrong during 
                pull, but shouldn't be necessary to use in regular use.
                """,
    "parse_results": """\
                Process the result files into summary pages. This process will 
                be started automatically when Abed detects that there are no 
                more tasks to be done after the 'pull' command, but it can also 
                be used to generate result pages before all the results are in.  
                Both text summary files and web pages are generated. The 
                webpages can easily be viewed in the browser using 'abed 
                view_results'
                """,
    "process_zips": """\
                This command is included as a fallback command. In general, the 
                'pull' command should unpack the compressed archives of results 
                obtained from the compute cluster. However, if this fails for 
                some reason, this command can be used to unpack the archives 
                manually. Note that typically the archived files will not be 
                actual .zip files, but .bz2 (bzip) files.
                """,
    "pull": """\
                Download the results from the compute cluster and process them.  
                This command downloads the bzip2 archives from the bzips 
                directory in the current directory on the compute cluster, as 
                well as the PBS log files from the log directory on the compute 
                cluster. When the file transfers are finished, the bzip2 
                archives are first unpacked in the STAGE_DIR, after which they 
                are organized hierarchically based on dataset and method, in 
                the RESULT_DIR. After this is finished, the job ID of the 
                remote job is obtained from the log files and registred in the 
                AUTO_FILE. Finally, the list of remaining tasks is updated, 
                which is automatically registered in the Git repository.
                """,
    "push": """\
                This command transfers the Git repository to the compute 
                cluster and queues the job there. It's important to realise 
                that only the files that are registered in the Git repository 
                will be transferred. To help with this, the push command will 
                print an error is there are uncommitted changes in the Git 
                repository.

                If there are no uncommited changes, this command will continue 
                by transferring the Git repository to the compute cluster.  
                Next, it moves the datasets over that have been uploaded 
                earlier with the 'abed setup' command. Following this, if 
                compilation is required, the build command will run on the 
                compute cluster.  Finally, Abed will write the PBS batch file 
                and submit it to the job queue.
                """,
    "reload_tasks": """\
                This command should be used when the tasks need to be 
                regenerated from the settings file. Any changes to the METHODS, 
                DATASETS, PARAMETERS, or COMMANDS settings require that this 
                command is executed. A powerful feature of Abed is that you can 
                add to these variables while some existing tasks have already 
                been completed. This allows you to extend your experiment at a 
                later time when more methods, datasets, or paramter 
                configurations are necessary.

                Important: When using the CV_TT experiment type, only add to 
                the above settings at the _end_ of the lists. Due to technical 
                reasons related to the random seed that Abed generates for this 
                experiment type, if you add them anywhere else it will mess up 
                the hashes of the other tasks.
                """,
    "repull": """\
                With this command you can pull the results from previously 
                finished jobs. This is useful when you wish to download results 
                from the compute cluster on a different workstation. This will 
                read the job IDs from the AUTO_FILE, and pull the results from 
                the corresponding directories on the compute cluster.
                """,
    "run": """\
                This command starts the computations on the compute cluster, 
                and will typically not be run by the user, but through a (PBS) 
                job file. If for whatever reason you're running this command 
                manually, bare in mind that it should be run through mpiexec.  
                For running computations on your local workstation, use the 
                'local' command to Abed.
                """,
    "setup": """\
                This command sets up the directory structure that Abed uses on 
                the compute cluster, as well as transferring the datasets to 
                the cluster. Run this command after you've finished selecting 
                your datasets and have configured your Abed project, just 
                before the first time you run 'abed push'. This command should 
                be run only once for a project. If you need to add datasets to 
                your simulations at a later stage, copy them manually to the 
                'current/datasets' directory on the compute cluster.
                """,
    "status": """\
                Get an overview of the current status of an Abed project. This 
                command will give the number of tasks that have been defined, 
                as well as the number of tasks that remain to be done.

                Note that after you've added to the settings file to extend the 
                computations with more tasks, running this command will not 
                give the correct total. In that case, run the 'reload_tasks' 
                command first, to update the task list correctly.
                """,
    "update_tasks": """\
                This command updates the task list based on the definitions in 
                the settings file, and the result files in the RESULT_DIR 
                directory. It is automatically run after the pull command, but 
                if this fails for some reason, you can use this to update the 
                task list.
                """,
    "view_results": """\
                Open the default browser to view the results. This function is 
                included for convenience.
                """,
}


[docs]def bold(text): """Add bold escape sequences to text This adds the ``'\\033[1m'`` before the string, and ``'\\033[0m'`` after the string. Parameters ---------- text : str Text to include in escape sequences for bold text Returns ------- str Text surrounded by escape sequences """ return "\033[1m" + text + "\033[0m"
[docs]def paragraph_wrapper(all_text, width=70, indent="\t"): """Format text to have a maximum length while maintaining paragraphs This function is very similar to `textwrap.wrap() <https://docs.python.org/3/library/textwrap.html#textwrap.wrap>`_, with the exception that paragraphs in the triple-quoted string will be maintained. Parameters ---------- all_text : str Text to format, expected to be a triple-quoted string. width : int, optional Maximum width of the formatted text. indent : str, optional Indentation of the paragraph in the output. Returns ------- str Indented formatted paragraphs """ # split on paragraphs in the triple-quoted string texts = all_text.split("\n\n") # remove indentation of paragraphs dedents = [textwrap.dedent(text) for text in texts] # clean out double spaces cleans = [ " ".join([x for x in dedented.split(" ") if x]) for dedented in dedents ] # remove newlines cleaner = [clean.replace("\n", "") for clean in cleans] # use textwrap.fill() to wrap the text filled = [] for para in cleaner: filled.append( textwrap.fill( para, width=width, initial_indent=indent, subsequent_indent=indent, ) ) # join paragraphs return "\n\n".join(filled)
[docs]def cmd_strings(cmds): """Format commands with short help for general help text Given a list of commands, this formats the help text in the form:: command short description The formatting is done such that the short descriptions of all the commands line up in the same way, and long descriptions are continued on the next line at the correct indentation. Parameters ---------- cmds : list The command names (as strings) for which the help text should be generated Returns ------- str Formatted help text """ txt = [] space_before = " " maxlen = max((len(k) for k in ABED_SHORT_HELP)) for cmd in cmds: first = True sentences = textwrap.wrap(ABED_SHORT_HELP[cmd], 60) space_after = " " * (maxlen + 2 - len(cmd)) line = "" while sentences: arg = cmd if first else " " * len(cmd) lead = "%s%s%s" % (space_before, arg, space_after) line += lead + sentences.pop(0) + "\n" first = False line = line.rstrip("\n") txt.append(line) txt.append("") return txt
[docs]def get_help(): """Generate the main help text Generate the main help text for Abed. This is the help text that is shown with the commands ``abed help`` and ``abed``. The help text is modelled on the help text that is shown when you run ``git`` without arguments. Returns ------- str Help text for Abed """ # Generate the basis text text = [ "usage: abed <command> [<options>]", "", DESCRIPTION, "", "Available Abed commands are:", "", ] # Expand the help text with different categories for category, commands in COMMAND_CATEGORIES: text.append(category) text.extend(cmd_strings(commands)) # Add the remainder of the help text text += [ "", "Use 'abed help <command>' to learn about a specific command,", "or check the online documentation at: ", "http://gjjvdburg.github.io/abed", ] return "\n".join(text)
[docs]def get_command_help(command): """Generate help text for a command This function generates the help text for a command that is printed when you call ``abed help <command>``. This command formats the help text similar to the structure of manual pages on Linux. The help text consists of several paragraphs: name, synopsis, description, and optionally a see also and options paragraph. The content of these paragraphs comes from the variables ABED_SHORT_HELP, ABED_SYNOPSES, ABED_LONG_HELP, ABED_SEE_ALSO, and ABED_OPTIONS. Parameters ---------- command : str The command to generate help text for Returns ------- str Help text formatted appropriately, None if nonexisting command. """ if not command in ABED_LONG_HELP: return None # Build the required paragraphs synop = ABED_SYNOPSES.get(command, "abed %s" % command) txt = [ "Abed Help", "", bold("NAME"), "\tabed-%s - %s" % (command, ABED_SHORT_HELP[command]), "", bold("SYNOPSIS"), "\t%s" % (synop), "", bold("DESCRIPTION"), paragraph_wrapper(ABED_LONG_HELP[command]), ] # If the command has options, build the options paragraph if command in ABED_OPTIONS: txt += ["", bold("OPTIONS")] for option in ABED_OPTIONS[command]: txt.append("\t" + option[0]) txt.append(paragraph_wrapper(option[1], width=62, indent="\t\t")) # If the command has see also's, build that paragraph if command in ABED_SEE_ALSO and ABED_SEE_ALSO[command]: txt += ["", bold("SEE ALSO"), "\t" + ", ".join(ABED_SEE_ALSO[command])] return "\n".join(txt)