Source code for capreolus.task.tutorial

from capreolus import ConfigOption, Dependency, evaluator
from capreolus.task import Task
from capreolus.utils.loginit import get_logger

[docs]logger = get_logger(__name__) # pylint: disable=invalid-name
[docs]@Task.register class TutorialTask(Task):
[docs] module_name = "tutorial"
[docs] config_spec = [ConfigOption("optimize", "map", "metric to maximize on the validation set")]
[docs] dependencies = [ Dependency(key="benchmark", module="benchmark", name="nf", provide_this=True, provide_children=["collection"]), Dependency(key="searcher1", module="searcher", name="BM25RM3"), Dependency(key="searcher2", module="searcher", name="SDM"),
]
[docs] commands = ["run"] + Task.help_commands
[docs] default_command = "run"
[docs] def run(self): output_dir = self.get_results_path() # read the title queries from the chosen benchmark's topic file results1 = self.searcher1.query_from_file(self.benchmark.topic_file, output_dir / "searcher1") results2 = self.searcher2.query_from_file(self.benchmark.topic_file, output_dir / "searcher2") searcher_results = [results1, results2] # using the benchmark's folds, which each contain train/validation/test queries, # choose the best run in `output_dir` for the fold based on the validation queries # and return metrics calculated on the test queries best_results = evaluator.search_best_run( searcher_results, self.benchmark, primary_metric=self.config["optimize"], metrics=evaluator.DEFAULT_METRICS ) for fold, path in best_results["path"].items(): shortpath = "..." + path[-40:] logger.info("fold=%s best run: %s", fold, shortpath) logger.info("cross-validated results when optimizing for '%s':", self.config["optimize"]) for metric, score in sorted(best_results["score"].items()): logger.info("%15s: %0.4f", metric, score) return best_results