import os
import math
import tqdm
import pickle
import logging
import requests
from indra.util import batch_iter
logger = logging.getLogger(__name__)
[docs]def reground_texts(texts, ont_yml, webservice, topk=10, is_canonicalized=False,
filter=True, cache_path=None):
"""Ground concept texts given an ontology with an Eidos web service.
Parameters
----------
texts : list[str]
A list of concept texts to ground.
ont_yml : str
A serialized YAML string representing the ontology.
webservice : str
The address where the Eidos web service is running, e.g.,
http://localhost:9000.
topk : Optional[int]
The number of top scoring groundings to return. Default: 10
is_canonicalized : Optional[bool]
If True, the texts are assumed to be canonicalized. If False,
Eidos will canonicalize the texts which yields much better groundings
but is slower. Default: False
filter : Optional[bool]
If True, Eidos filters the ontology to remove determiners from examples
and other similar operations. Should typically be set to True.
Default: True
Returns
-------
dict
A JSON dict of the results from the Eidos webservice.
"""
all_results = []
grounding_cache = {}
if cache_path:
if os.path.exists(cache_path):
with open(cache_path, 'rb') as fh:
grounding_cache = pickle.load(fh)
logger.info('Loaded %d groundings from cache' %
len(grounding_cache))
texts_to_ground = list(set(texts) - set(grounding_cache.keys()))
logger.info('Grounding a total of %d texts' % len(texts_to_ground))
for text_batch in tqdm.tqdm(batch_iter(texts_to_ground, batch_size=500,
return_func=list),
total=math.ceil(len(texts_to_ground)/500)):
params = {
'ontologyYaml': ont_yml,
'texts': text_batch,
'topk': topk,
'isAlreadyCanonicalized': is_canonicalized,
'filter': filter
}
res = requests.post('%s/reground' % webservice,
json=params)
res.raise_for_status()
grounding_for_texts = grounding_dict_to_list(res.json())
for txt, grounding in zip(text_batch,
grounding_for_texts):
grounding_cache[txt] = grounding
all_results = [grounding_cache[txt] for txt in texts]
if cache_path:
with open(cache_path, 'wb') as fh:
pickle.dump(grounding_cache, fh)
return all_results
[docs]def grounding_dict_to_list(groundings):
"""Transform the webservice response into a flat list."""
all_grounding_lists = []
for entry in groundings:
grounding_list = []
for grounding_dict in entry:
gr = grounding_dict['grounding']
# Strip off trailing slashes
if gr.endswith('/'):
gr = gr[:-1]
grounding_list.append((gr, grounding_dict['score']))
grounding_list = sorted(grounding_list, key=lambda x: x[1],
reverse=True)
all_grounding_lists.append(grounding_list)
return all_grounding_lists