Source code for capreolus.benchmark

import json

from capreolus import ModuleBase
from capreolus.utils.trec import load_qrels, load_trec_topics


[docs]class Benchmark(ModuleBase): """Base class for Benchmark modules. The purpose of a Benchmark is to provide the data needed to run an experiment, such as queries, folds, and relevance judgments. Modules should provide: - a ``topics`` dict mapping query ids (*qids*) to *queries* - a ``qrels`` dict mapping *qids* to *docids* and *relevance labels* - a ``folds`` dict mapping a fold name to *training*, *dev* (validation), and *testing* qids - if these can be loaded from files in standard formats, they can be specified by setting the ``topic_file``, ``qrel_file``, and ``fold_file``, respectively, rather than by setting the above attributes directly """
[docs] module_type = "benchmark"
[docs] qrel_file = None
[docs] topic_file = None
[docs] fold_file = None
[docs] query_type = None
[docs] relevance_level = 1
""" Documents with a relevance label >= relevance_level will be considered relevant. This corresponds to trec_eval's --level_for_rel (and is passed to pytrec_eval as relevance_level). """ @property
[docs] def qrels(self): if not hasattr(self, "_qrels"): self._qrels = load_qrels(self.qrel_file) return self._qrels
@property
[docs] def topics(self): if not hasattr(self, "_topics"): self._topics = load_trec_topics(self.topic_file) return self._topics
@property
[docs] def folds(self): if not hasattr(self, "_folds"): self._folds = json.load(open(self.fold_file, "rt")) return self._folds
from profane import import_all_modules from .dummy import DummyBenchmark import_all_modules(__file__, __package__)