Source code for indra_world.assemblers.cag.assembler

# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals
from builtins import object, dict, str
import os
import json
import logging
import networkx as nx
from indra.statements import Influence

# Python 2
try:
    basestring
# Python 3
except:
    basestring = str

logger = logging.getLogger(__name__)


[docs]class CAGAssembler(object):
    """Assembles a causal analysis graph from INDRA Statements.

    Parameters
    ----------
    stmts : Optional[list[indra.statement.Statements]]
        A list of INDRA Statements to be assembled. Currently supports
        Influence Statements.

    Attributes
    ----------
    statements : list[indra.statements.Statement]
        A list of INDRA Statements to be assembled.
    CAG : nx.MultiDiGraph
        A networkx MultiDiGraph object representing the causal analysis graph.
    """
    def __init__(self, stmts=None):
        if not stmts:
            self.statements = []
        else:
            self.statements = stmts
        self.grounding_threshold = None
        self.grounding_ontology = 'UN'
        self.CAG = None

[docs]    def add_statements(self, stmts):
        """Add a list of Statements to the assembler."""
        self.statements += stmts

[docs]    def make_model(self, grounding_ontology='UN', grounding_threshold=None):
        """Return a networkx MultiDiGraph representing a causal analysis graph.

        Parameters
        ----------
        grounding_ontology : Optional[str]
            The ontology from which the grounding should be taken
            (e.g. UN, FAO)
        grounding_threshold : Optional[float]
            Minimum threshold score for Eidos grounding.

        Returns
        -------
        nx.MultiDiGraph
            The assembled CAG.
        """
        if grounding_threshold is not None:
            self.grounding_threshold = grounding_threshold

        self.grounding_ontology = grounding_ontology

        # Filter to Influence Statements which are currently supported
        statements = [stmt for stmt in self.statements if
                      isinstance(stmt, Influence)]

        # Initialize graph
        self.CAG = nx.MultiDiGraph()

        # Add nodes and edges to the graph
        for s in statements:
            # Get standardized name of subject and object
            # subj, obj = (self._node_name(s.subj), self._node_name(s.obj))

            # See if both subject and object have polarities given
            has_both_polarity = (s.subj.delta.polarity is not None and
                                 s.obj.delta.polarity is not None)

            # Add the nodes to the graph
            for node, delta in zip((s.subj.concept, s.obj.concept),
                                   (s.subj.delta, s.obj.delta)):
                self.CAG.add_node(self._node_name(node),
                                  simulable=has_both_polarity,
                                  mods=delta.adjectives)

            # Edge is solid if both nodes have polarity given
            linestyle = 'solid' if has_both_polarity else 'dotted'
            if has_both_polarity:
                same_polarity = (s.subj.delta.polarity ==
                                 s.obj.delta.polarity)
                if same_polarity:
                    target_arrow_shape, linecolor = ('circle', 'green')
                else:
                    target_arrow_shape, linecolor = ('tee', 'maroon')
            else:
                target_arrow_shape, linecolor = ('triangle', 'maroon')

            # Add edge to the graph with metadata from statement
            provenance = []
            if s.evidence:
                provenance = s.evidence[0].annotations.get('provenance', [])
                if provenance:
                    provenance[0]['text'] = s.evidence[0].text
            self.CAG.add_edge(
                    self._node_name(s.subj.concept),
                    self._node_name(s.obj.concept),
                    subj_polarity=s.subj.delta.polarity,
                    subj_adjectives=s.subj.delta.adjectives,
                    obj_polarity=s.obj.delta.polarity,
                    obj_adjectives=s.obj.delta.adjectives,
                    linestyle=linestyle,
                    linecolor=linecolor,
                    targetArrowShape=target_arrow_shape,
                    provenance=provenance,
                )

        return self.CAG

    def print_tsv(self, file_name):
        def _get_factor(stmt, concept, delta, evidence, raw_name):
            if evidence.source_api == 'eidos':
                if concept.db_refs[self.grounding_ontology]:
                    factor_norm = concept.db_refs[self.grounding_ontology][0][0]
                else:
                    factor_norm = ''
            elif evidence.source_api == 'hume':
                factor_norm = concept.db_refs['HUME'][0][0]
            elif evidence.source_api == 'cwms':
                factor_norm = concept.db_refs['CWMS']
            elif evidence.source_api == 'sofia':
                # TODO extract ontology catgory here
                factor_norm = concept.name
            mods = ', '.join(delta.adjectives)
            if delta.polarity == -1:
                pol = 'decrease'
            elif delta.polarity == 1:
                pol = 'increase'
            else:
                pol = ''
            name = raw_name if raw_name else concept.name
            return name, factor_norm, mods, pol

        def _get_evidence(evidence):
            # TODO: add sentence ID
            sent_id = ''
            location = evidence.annotations.get('Location')
            location = location if location is not None else ''
            time = evidence.annotations.get('Time')
            time = time if time is not None else ''
            ref = evidence.pmid if evidence.pmid is not None else ''
            return ref, evidence.source_api, sent_id, location, \
                time, evidence.text

        header = ['Source', 'System', 'Sentence ID',
                  'Factor A Text', 'Factor A Normalization',
                  'Factor A Modifiers', 'Factor A Polarity',
                  'Relation Text', 'Relation Normalization',
                  'Relation Modifiers',
                  'Factor B Text', 'Factor B Normalization',
                  'Factor B Modifiers', 'Factor B Polarity',
                  'Location', 'Time', 'Evidence',
                  'Relation ID']

        fh = open(file_name, 'w')
        fh.write('\t'.join(header) + '\n')

        # Filter to Influence Statements which are currently supported
        statements = [stmt for stmt in self.statements if
                      isinstance(stmt, Influence)]
        all_rows = []
        for idx, stmt in enumerate(statements):
            for evidence in stmt.evidence:
                source, system, sent_id, location, time, text = \
                    _get_evidence(evidence)
                factor_a, factor_a_norm, mod_a, pol_a = \
                    _get_factor(stmt, stmt.subj, stmt.subj.delta, evidence,
                                evidence.annotations['subj_text'])
                factor_b, factor_b_norm, mod_b, pol_b = \
                    _get_factor(stmt, stmt.obj, stmt.obj.delta, evidence,
                                evidence.annotations['obj_text'])
                relation_text = 'influences'
                # Can we get a more specific relation type here?
                relation_norm = ''
                relation_mod = ''
                row = [source, system, sent_id,
                       factor_a, factor_a_norm, mod_a, pol_a,
                       relation_text, relation_norm, relation_mod,
                       factor_b, factor_b_norm, mod_b, pol_b,
                       location, time, text, str(idx)]
                if row not in all_rows:
                    all_rows.append(row)
        for row in sorted(all_rows, key=lambda x: x[0]):
            fh.write('\t'.join(row) + '\n')
        fh.close()

[docs]    def export_to_cytoscapejs(self):
        """Return CAG in format readable by CytoscapeJS.

        Return
        ------
        dict
            A JSON-like dict representing the graph for use with
            CytoscapeJS.
        """
        def _create_edge_data_dict(e):
            """Return a dict from a MultiDiGraph edge for CytoscapeJS export."""
            # A hack to get rid of the redundant 'Provenance' label.
            if e[3].get('provenance'):
                tooltip = e[3]['provenance'][0]
                if tooltip.get('@type'):
                    del tooltip['@type']
            else:
                tooltip = None
            edge_data_dict = {
                    'id'               : e[0]+'_'+e[1],
                    'source'           : e[0],
                    'target'           : e[1],
                    'linestyle'        : e[3]["linestyle"],
                    'linecolor'        : e[3]["linecolor"],
                    'targetArrowShape' : e[3]["targetArrowShape"],
                    'subj_adjectives'  : e[3]["subj_adjectives"],
                    'subj_polarity'    : e[3]["subj_polarity"],
                    'obj_adjectives'   : e[3]["obj_adjectives"],
                    'obj_polarity'     : e[3]["obj_polarity"],
                    'tooltip'          : tooltip,
                    'simulable'        : False if (
                        e[3]['obj_polarity'] is None or
                        e[3]['subj_polarity'] is None) else True,
                    }
            return edge_data_dict

        return {
                'nodes': [{'data': {
                    'id': n[0],
                    'simulable': n[1]['simulable'],
                    'tooltip': 'Modifiers: '+json.dumps(n[1]['mods'])}
                    } for n in self.CAG.nodes(data=True)],

                'edges': [{'data': _create_edge_data_dict(e)}
                          for e in self.CAG.edges(data=True, keys=True)]
                }

[docs]    def generate_jupyter_js(self, cyjs_style=None, cyjs_layout=None):
        """Generate Javascript from a template to run in Jupyter notebooks.

        Parameters
        ----------
        cyjs_style : Optional[dict]
            A dict that sets CytoscapeJS style as specified in
            https://github.com/cytoscape/cytoscape.js/blob/master/documentation/md/style.md.

        cyjs_layout : Optional[dict]
            A dict that sets CytoscapeJS
            `layout parameters <http://js.cytoscape.org/#core/layout>`_.

        Returns
        -------
        str
            A Javascript string to be rendered in a Jupyter notebook cell.
        """
        # First, export the CAG to CyJS
        cyjs_elements = self.export_to_cytoscapejs()
        # Load the Javascript template
        tempf = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'cag_template.js')
        with open(tempf, 'r') as fh:
            template = fh.read()
        # Load the default style and layout
        stylef = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              'cag_style.json')
        with open(stylef, 'r') as fh:
            style = json.load(fh)
        # Apply style and layout only if arg wasn't passed in
        if cyjs_style is None:
            cyjs_style = style['style']
        if cyjs_layout is None:
            cyjs_layout = style['layout']
        # Now fill in the template
        formatted_args = tuple(json.dumps(x, indent=2) for x in
                               (cyjs_elements, cyjs_style, cyjs_layout))
        js_str = template % formatted_args
        return js_str

    def _node_name(self, concept):
        """Return a standardized name for a node given a Concept."""
        if (# grounding threshold is specified
            self.grounding_threshold is not None
            # The particular eidos ontology grounding (un/wdi/fao) is present
            and concept.db_refs[self.grounding_ontology]
            # The grounding score is above the grounding threshold
            and (concept.db_refs[self.grounding_ontology][0][1] >
                 self.grounding_threshold)):
                entry = concept.db_refs[self.grounding_ontology][0][0]
                return entry.split('/')[-1].replace('_', ' ').capitalize()
        else:
            return concept.name.capitalize()