Source code for indra_world.belief

from io import StringIO
import copy
import json
import pandas
import requests
from typing import Dict, Optional
from indra.belief import SimpleScorer, BayesianScorer
from indra.pipeline import register_pipeline
from indra_world.resources import get_resource_file


default_priors = {'hume': [13, 7], 'cwms': [13, 7], 'sofia': [13, 7]}


[docs]def load_eidos_curation_table() -> pandas.DataFrame:
    """Return a pandas table of Eidos curation data.

    Returns
    -------
    table :
        A pandas dataframe of the curation data.
    """
    url = 'https://raw.githubusercontent.com/clulab/eidos/master/' + \
        'src/main/resources/org/clulab/wm/eidos/english/confidence/' + \
        'rule_summary.tsv'
    # Load the table of scores from the URL above into a data frame
    res = StringIO(requests.get(url).text)
    table = pandas.read_table(res, sep='\t')
    # Drop the last "Grant total" row
    table = table.drop(table.index[len(table)-1])
    return table


[docs]@register_pipeline
def get_eidos_bayesian_scorer(
    prior_counts: Optional[Dict[str, Dict[str, float]]] = None,
) -> BayesianScorer:
    """Return a BayesianScorer based on Eidos curation counts.

    Returns
    -------
    scorer :
        A BayesianScorer belief scorer instance.
    """
    table = load_eidos_curation_table()
    subtype_counts = {'eidos': {r: [c, i] for r, c, i in
                              zip(table['RULE'], table['Num correct'],
                                  table['Num incorrect'])}}
    prior_counts = prior_counts if prior_counts else copy.deepcopy(
        default_priors)

    scorer = BayesianScorer(prior_counts=prior_counts,
                            subtype_counts=subtype_counts)
    return scorer


[docs]@register_pipeline
def get_eidos_scorer() -> SimpleScorer:
    """Return a SimpleScorer based on Eidos curated precision estimates.

    Returns
    -------
    scorer :
        A SimpleScorer instance loaded with default prior probabilities as
        well as prior probabilities derived from curation-based counts.
    """
    with open(get_resource_file('default_belief_probs.json'), 'r') as fh:
        prior_probs = json.load(fh)

    table = load_eidos_curation_table()

    # Get the overall precision
    total_num = table['COUNT of RULE'].sum()
    weighted_sum = table['COUNT of RULE'].dot(table['% correct'])
    precision = weighted_sum / total_num
    # We have to divide this into a random and systematic component, for now
    # in an ad-hoc manner
    syst_error = 0.05
    rand_error = 1 - precision - syst_error
    prior_probs['rand']['eidos'] = rand_error
    prior_probs['syst']['eidos'] = syst_error

    # Get a dict of rule-specific errors.
    subtype_probs = {'eidos':
                     {k: 1.0-min(v, 0.95)-syst_error for k, v
                      in zip(table['RULE'], table['% correct'])}}
    scorer = SimpleScorer(prior_probs, subtype_probs)
    return scorer