Source code for capreolus.run

import logging
import os
import sys

from docopt import docopt
from profane import DBManager, config_list_to_dict, constants

from capreolus.task import Task
from capreolus.utils.loginit import get_logger

[docs]logger = get_logger(__name__)  # pylint: disable=invalid-name


[docs]def parse_task_string(s):
    fields = s.split(".")
    task = fields[0]
    task_cls = Task.lookup(task)

    if len(fields) == 2:
        cmd = fields[1]
    else:
        cmd = task_cls.default_command

    if not hasattr(task_cls, cmd):
        print("error: invalid command:", s)
        print(f"valid commands for task={task}: {sorted(task_cls.commands)}")
        sys.exit(2)

    return task, cmd


[docs]def prepare_task(fullcommand, config):
    taskstr, commandstr = parse_task_string(fullcommand)
    task = Task.create(taskstr, config)
    task_entry_function = getattr(task, commandstr)
    return task, task_entry_function


[docs]help = """
Usage:
    capreolus COMMAND [(with CONFIG...)] [options]
    capreolus help [COMMAND]
    capreolus (-h | --help)


    Options:
      -h --help                     Print this help message and exit.
      -l VALUE --loglevel=VALUE     Set the log level: DEBUG, INFO, WARNING, ERROR, or CRITICAL.
      -p VALUE --priority=VALUE     Sets the priority for a queued up experiment. No effect without -q flag.
      -q --queue                    Queue this run, and do not start it.


    Arguments:
      PIPELINE  Name of pipeline to run, which consists of a Task and a command (see below for a list)
      CONFIG    Configuration assignments of the form foo.bar=17


    Tasks and their commands:
      rank.search             search a collection using queries from a benchmark
      rank.evaluate           evaluate the result of rank.search
      rank.searcheval         run rank.search followed by rank.evaluate

      rerank.train            run rank.search and train a model to rerank the results
      rerank.evaluate         evaluate the result of rerank.train
      rerank.traineval        run rerank.train followed by rerank.evaluate

      rererank.train          run rerank.train and train a (second) model to rerank the results
      rererank.evaluate       evaluate the result of rererank.train
      rererank.traineval      run rererank.train followed by rererank.evaluate

      tutorial.run            task from the "Getting Started" tutorial

    All tasks additionally support the following help commands: describe, print_config, print_pipeline
      e.g., capreolus rank.print_config with searcher=BM25
"""

if __name__ == "__main__":
    # hack to make docopt print full help message if no arguments are give
    if len(sys.argv) == 1:
        sys.argv.append("-h")

[docs]    arguments = docopt(help, version="TODO")

    if arguments["--loglevel"]:
        loglevel = arguments["--loglevel"].upper()
        valid_loglevels = ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL")

        if loglevel not in valid_loglevels:
            print("error: log level must be one of:", ", ".join(valid_loglevels))
            sys.exit(1)

        logging.getLogger("capreolus").setLevel(loglevel)

    # prepare task even if we're queueing, so that we validate the config
    config = config_list_to_dict(arguments["CONFIG"])
    task, task_entry_function = prepare_task(arguments["COMMAND"], config)

    if arguments["--queue"]:
        if not arguments["--priority"]:
            arguments["--priority"] = 0

        db = DBManager(os.environ.get("CAPREOLUS_DB"))
        db.queue_run(command=arguments["COMMAND"], config=config, priority=arguments["--priority"])
    else:
        logger.debug("starting command: %s", arguments["COMMAND"])
        logger.debug("config: %s", task.config)
        logger.debug("current constants: %s", constants)
        task_entry_function()