Source code for indra_world.sources.cwms.api

import logging
from .processor import CWMSProcessor, CWMSProcessorCompositional
from indra.sources.trips import client

logger = logging.getLogger(__name__)

default_grounding_mode = 'flat'


[docs]def process_text(text, save_xml='cwms_output.xml', extract_filter=None, grounding_mode=default_grounding_mode): """Processes text using the CWMS web service. Parameters ---------- text : str Text to process save_xml : Optional[str] A file name in which to dump the output from CWMS. Default: cwms_output.xml extract_filter : Optional[list] A list of relation types to extract. Valid values in the list are 'influence', 'association', 'event' and 'migration'. If not given, only Influences are extracted since processing other relation types can be time consuming. This argument can be used if the extraction of other relation types such as Events are also of interest. grounding_mode : Optional[str] Selects whether 'flat' or 'compositional' groundings should be extracted. Default: 'flat'. Returns ------- cp : indra.sources.cwms.CWMSProcessor A CWMSProcessor, which contains a list of INDRA statements in its statements attribute. """ xml = client.send_query(text, 'cwmsreader') # There are actually two EKBs in the xml document. Extract the second. first_end = xml.find('</ekb>') # End of first EKB second_start = xml.find('<ekb', first_end) # Start of second EKB second_end = xml.find('</ekb>', second_start) # End of second EKB second_ekb = xml[second_start:second_end+len('</ekb>')] # second EKB if save_xml: with open(save_xml, 'wb') as fh: fh.write(second_ekb.encode('utf-8')) return process_ekb(second_ekb, extract_filter=extract_filter, grounding_mode=grounding_mode)
[docs]def process_ekb_file(fname, extract_filter=None, grounding_mode=default_grounding_mode): """Processes an EKB file produced by CWMS. Parameters ---------- fname : str Path to the EKB file to process. extract_filter : Optional[list] A list of relation types to extract. Valid values in the list are 'influence', 'association', 'event' and 'migration'. If not given, only Influences are extracted since processing other relation types can be time consuming. This argument can be used if the extraction of other relation types such as Events are also of interest. grounding_mode : Optional[str] Selects whether 'flat' or 'compositional' groundings should be extracted. Default: 'flat'. Returns ------- cp : indra.sources.cwms.CWMSProcessor A CWMSProcessor, which contains a list of INDRA statements in its statements attribute. """ # Process EKB XML file into statements with open(fname, 'rb') as fh: ekb_str = fh.read().decode('utf-8') return process_ekb(ekb_str, extract_filter=extract_filter, grounding_mode=grounding_mode)
[docs]def process_ekb(ekb_str, extract_filter=None, grounding_mode=default_grounding_mode): """Processes an EKB string produced by CWMS. Parameters ---------- ekb_str : str EKB string to process extract_filter : Optional[list] A list of relation types to extract. Valid values in the list are 'influence', 'association', 'event' and 'migration'. If not given, only Influences are extracted since processing other relation types can be time consuming. This argument can be used if the extraction of other relation types such as Events are also of interest. grounding_mode : Optional[str] Selects whether 'flat' or 'compositional' groundings should be extracted. Default: 'flat'. Returns ------- cp : indra.sources.cwms.CWMSProcessor A CWMSProcessor, which contains a list of INDRA statements in its statements attribute. """ # Process EKB XML into statements if grounding_mode == 'flat': cp = CWMSProcessor(ekb_str) elif grounding_mode == 'compositional': cp = CWMSProcessorCompositional(ekb_str) else: raise ValueError('Invalid grounding mode: %s' % grounding_mode) if extract_filter is None or 'influence' in extract_filter: cp.extract_causal_relations() if extract_filter is not None and 'association' in extract_filter: cp.extract_correlations() if extract_filter is not None and 'migration' in extract_filter: cp.extract_migrations() if extract_filter is not None and 'event' in extract_filter: cp.extract_events() return cp