Source code for capreolus.collection.wapo

import json

from capreolus import constants
from capreolus.utils.loginit import get_logger

from . import Collection, IRDCollection

[docs]logger = get_logger(__name__)
[docs]PACKAGE_PATH = constants["PACKAGE_PATH"]
@Collection.register
[docs]class WaPo(IRDCollection): """TREC WashingtonPost v2 collection. See https://trec.nist.gov/data/wapost/"""
[docs] module_name = "wapo"
[docs] ird_dataset_name = "wapo/v2"
[docs] collection_type = "JsonCollection"
[docs] def doc_as_json(self, doc): content = " ".join((x for x in (doc.title, doc.body) if x is not None)) return json.dumps({"id": doc.doc_id, "contents": content})