Source code for indra_world.sources.sofia.api

import json
import time
import openpyxl
import requests
from indra.config import get_config
from .processor import SofiaJsonProcessor, SofiaExcelProcessor

default_grounding_mode = 'compositional'


[docs]def process_table(fname, extract_filter=None, grounding_mode=default_grounding_mode): """Return processor by processing a given sheet of a spreadsheet file. Parameters ---------- fname : str The name of the Excel file (typically .xlsx extension) to process extract_filter : Optional[list] A list of relation types to extract. Valid values in the list are 'influence' and 'event'. If not given, all relation types are extracted. This argument can be used if, for instance, only Influence statements are of interest. Default: None grounding_mode : Optional[str] Selects whether 'flat' or 'compositional' groundings should be extracted. Default: 'flat'. Returns ------- sp : indra.sources.sofia.processor.SofiaProcessor A SofiaProcessor object which has a list of extracted INDRA Statements as its statements attribute. """ book = openpyxl.load_workbook(fname, read_only=True) try: rel_sheet = book['Relations'] except Exception as e: rel_sheet = book['Causal'] event_sheet = book['Events'] entities_sheet = book['Entities'] sp = SofiaExcelProcessor(rel_sheet.rows, event_sheet.rows, entities_sheet.rows, grounding_mode=grounding_mode) if extract_filter is None or 'influence' in extract_filter: sp.extract_relations(rel_sheet.rows) if extract_filter is None or 'event' in extract_filter: sp.extract_events(event_sheet.rows, rel_sheet.rows) return sp
[docs]def process_text(text, out_file='sofia_output.json', auth=None, extract_filter=None, grounding_mode=default_grounding_mode): """Return processor by processing text given as a string. Parameters ---------- text : str A string containing the text to be processed with Sofia. out_file : Optional[str] The path to a file to save the reader's output into. Default: sofia_output.json auth : Optional[list] A username/password pair for the Sofia web service. If not given, the SOFIA_USERNAME and SOFIA_PASSWORD values are loaded from either the INDRA config or the environment. extract_filter : Optional[list] A list of relation types to extract. Valid values in the list are 'influence' and 'event'. If not given, all relation types are extracted. This argument can be used if, for instance, only Influence statements are of interest. Default: None grounding_mode : Optional[str] Selects whether 'flat' or 'compositional' groundings should be extracted. Default: 'flat'. Returns ------- sp : indra.sources.sofia.processor.SofiaProcessor A SofiaProcessor object which has a list of extracted INDRA Statements as its statements attribute. If the API did not process the text, None is returned. """ text_json = {'text': text} if not auth: user, password = _get_sofia_auth() else: user, password = auth if not user or not password: raise ValueError('Could not use SOFIA web service since' ' authentication information is missing. Please' ' set SOFIA_USERNAME and SOFIA_PASSWORD in the' ' INDRA configuration file or as environmental' ' variables.') json_response, status_code, process_status = \ _text_processing(text_json=text_json, user=user, password=password) # Check response status if process_status != 'Done' or status_code != 200: return None # Cache reading output if out_file: with open(out_file, 'w') as fh: json.dump(json_response, fh, indent=1) return process_json(json_response, extract_filter=extract_filter, grounding_mode=grounding_mode)
[docs]def process_json(json_obj, extract_filter=None, grounding_mode=None): """Return processor by processing a JSON object returned by Sofia. Parameters ---------- json_obj : json A JSON object containing extractions from Sofia. extract_filter : Optional[list] A list of relation types to extract. Valid values in the list are 'influence' and 'event'. If not given, all relation types are extracted. This argument can be used if, for instance, only Influence statements are of interest. Default: None grounding_mode : Optional[str] Selects whether 'flat' or 'compositional' groundings should be extracted. Default: 'flat'. Returns ------- sp : indra.sources.sofia.processor.SofiaProcessor A SofiaProcessor object which has a list of extracted INDRA Statements as its statements attribute. """ grounding_mode = default_grounding_mode if not grounding_mode \ else grounding_mode sp = SofiaJsonProcessor(json_obj, grounding_mode=grounding_mode) if extract_filter is None or 'influence' in extract_filter: sp.extract_relations(json_obj) if extract_filter is None or 'event' in extract_filter: sp.extract_events(json_obj) return sp
[docs]def process_json_file(fname, extract_filter=None, grounding_mode=default_grounding_mode): """Return processor by processing a JSON file produced by Sofia. Parameters ---------- fname : str The name of the JSON file to process extract_filter : Optional[list] A list of relation types to extract. Valid values in the list are 'influence' and 'event'. If not given, all relation types are extracted. This argument can be used if, for instance, only Influence statements are of interest. Default: None grounding_mode : Optional[str] Selects whether 'flat' or 'compositional' groundings should be extracted. Default: 'flat'. Returns ------- indra.sources.sofia.processor.SofiaProcessor A SofiaProcessor object which has a list of extracted INDRA Statements as its statements attribute. """ with open(fname, 'r') as fh: jd = json.load(fh) return process_json(jd, extract_filter=extract_filter, grounding_mode=grounding_mode)
def _get_sofia_auth(): sofia_username = get_config('SOFIA_USERNAME') sofia_password = get_config('SOFIA_PASSWORD') return sofia_username, sofia_password def _sofia_api_post(api, option, json, auth): return requests.post(url=api + option, json=json, auth=auth) def _text_processing(text_json, user, password): assert len(text_json) > 0 sofia_api = 'https://sofia.worldmodelers.com' auth = (user, password) # Initialize process resp = _sofia_api_post(api=sofia_api, option='/process_text', json=text_json, auth=auth) res_json = resp.json() # Get status status = _sofia_api_post(api=sofia_api, option='/status', json=res_json, auth=auth) # Check status every two seconds while status.json()['Status'] == 'Processing': time.sleep(2.0) status = _sofia_api_post(api=sofia_api, option='/status', json=res_json, auth=auth) results = _sofia_api_post(api=sofia_api, option='/results', json=res_json, auth=auth) status_code = results.status_code process_status = status.json()['Status'] return results.json(), status_code, process_status