Source code for capreolus.run

import logging
import os
import sys

from docopt import docopt
from profane import DBManager, config_list_to_dict, constants

from capreolus.task import Task
from capreolus.utils.loginit import get_logger

[docs]logger = get_logger(__name__) # pylint: disable=invalid-name
[docs]def parse_task_string(s): fields = s.split(".") task = fields[0] task_cls = Task.lookup(task) if len(fields) == 2: cmd = fields[1] else: cmd = task_cls.default_command if not hasattr(task_cls, cmd): print("error: invalid command:", s) print(f"valid commands for task={task}: {sorted(task_cls.commands)}") sys.exit(2) return task, cmd
[docs]def prepare_task(fullcommand, config): taskstr, commandstr = parse_task_string(fullcommand) task = Task.create(taskstr, config) task_entry_function = getattr(task, commandstr) return task, task_entry_function
[docs]help = """ Usage: capreolus COMMAND [(with CONFIG...)] [options] capreolus help [COMMAND] capreolus (-h | --help) Options: -h --help Print this help message and exit. -l VALUE --loglevel=VALUE Set the log level: DEBUG, INFO, WARNING, ERROR, or CRITICAL. -p VALUE --priority=VALUE Sets the priority for a queued up experiment. No effect without -q flag. -q --queue Queue this run, and do not start it. Arguments: PIPELINE Name of pipeline to run, which consists of a Task and a command (see below for a list) CONFIG Configuration assignments of the form foo.bar=17 Tasks and their commands: rank.search search a collection using queries from a benchmark rank.evaluate evaluate the result of rank.search rank.searcheval run rank.search followed by rank.evaluate rerank.train run rank.search and train a model to rerank the results rerank.evaluate evaluate the result of rerank.train rerank.traineval run rerank.train followed by rerank.evaluate rererank.train run rerank.train and train a (second) model to rerank the results rererank.evaluate evaluate the result of rererank.train rererank.traineval run rererank.train followed by rererank.evaluate tutorial.run task from the "Getting Started" tutorial All tasks additionally support the following help commands: describe, print_config, print_pipeline e.g., capreolus rank.print_config with searcher=BM25 """
if __name__ == "__main__": # hack to make docopt print full help message if no arguments are give if len(sys.argv) == 1: sys.argv.append("-h")
[docs] arguments = docopt(help, version="TODO")
if arguments["--loglevel"]: loglevel = arguments["--loglevel"].upper() valid_loglevels = ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL") if loglevel not in valid_loglevels: print("error: log level must be one of:", ", ".join(valid_loglevels)) sys.exit(1) logging.getLogger("capreolus").setLevel(loglevel) # prepare task even if we're queueing, so that we validate the config config = config_list_to_dict(arguments["CONFIG"]) task, task_entry_function = prepare_task(arguments["COMMAND"], config) if arguments["--queue"]: if not arguments["--priority"]: arguments["--priority"] = 0 db = DBManager(os.environ.get("CAPREOLUS_DB")) db.queue_run(command=arguments["COMMAND"], config=config, priority=arguments["--priority"]) else: logger.debug("starting command: %s", arguments["COMMAND"]) logger.debug("config: %s", task.config) logger.debug("current constants: %s", constants) task_entry_function()