Relations Automated Check

Discussion on this check can be found here.

Requirements

  1. The ontology must not duplicate existing RO properties.
  2. The ontology should use existing RO properties, rather than creating new properties.

Fixes

Duplicated Properties

  1. Add an owl:deprecated annotation with a boolean value of true to the problematic property in your ontology
  2. Add obsolete to the beginning of the property’s label
  3. Replace all usages of that property with the duplicated RO property

Non-RO Properties

Review your non-RO properties to see if any can be replaced with an RO property using the steps above. Often, a corresponding property will not exist in RO and that is OK.

Implementation

The object and data properties from the ontology are compared to existing RO properties. If any labels match existing RO properties, but do not use the correct RO IRI, this is an error. Any non-RO properties (no label match and do not use an RO IRI) will be listed as INFO messages.

import os
import unicodedata

import dash_utils
from dash_utils import format_msg

owl_deprecated = 'http://www.w3.org/2002/07/owl#deprecated'

# Violation messages
ro_match = '{0} labels match RO labels.'
non_ro = '{0} non-RO properties used.'


def has_valid_relations(namespace, ontology, ro_props):
   """Check fp 7 - relations.

   Retrieve all non-obsolete properties from the ontology. Compare their
   labels and IRIs to the RO labels and IRIs. If a label matches an RO
   property but does not match an RO IRI, return ERROR; RO properties should
   not be duplicated. If a label matches neither an RO property or RO IRI,
   return INFO; it is recommended to use RO properties, but not required. If
   all properties used are RO properties, return PASS.

   Args:
       namespace (str): ontology ID
       ontology (OWLOntology): ontology object
       ro_props (dict): map of RO property label to IRI

   Return:
       PASS or violation level with optional help message
   """
   if ontology is None:
       return {'status': 'ERROR', 'comment': 'Unable to load ontology'}

   # ignore RO
   if namespace == 'ro':
       return {'status': 'PASS'}

   props = get_properties(namespace, ontology)

   # get results (PASS, INFO, or ERROR)
   return check_properties(namespace, props, ro_props)


def get_properties(namespace, ontology):
   """Create a map of normalized property label to property IRI.

   Args:
       namespace (str): ontology ID
       ontology (OWLOntology): ontology object

   Return:
       Dict of label to property IRI
   """
   props = {}

   # get object properties
   for op in ontology.getObjectPropertiesInSignature():
       op_iri = op.getIRI()
       obsolete = False
       normal = None
       for ann in ontology.getAnnotationAssertionAxioms(op_iri):
           # find the label
           ann_prop = ann.getProperty()
           if ann_prop.isLabel():
               val = ann.getValue().asLiteral().orNull()
               clz = val.getClass().getSimpleName().lower()
               if clz == 'owlliteralimplstring':
                   # string datatype, get the literal value
                   normal = normalize_label(val.getLiteral())
               elif val is not None:
                   normal = normalize_label(val.toString())

           elif ann_prop.getIRI().toString() == owl_deprecated:
               # check if the property is obsolete
               obsolete = dash_utils.is_obsolete(ann)

       if normal is not None and not obsolete:
           props[normal] = op_iri.toString()

   # get data properties
   for dp in ontology.getDataPropertiesInSignature():
       dp_iri = dp.getIRI()
       obsolete = False
       for ann in ontology.getAnnotationAssertionAxioms(op_iri):
           ann_prop = ann.getProperty()
           if ann_prop.isLabel():
               # find the label
               val = ann.getValue().asLiteral().orNull()
               clz = val.getClass().getSimpleName().lower()
               if clz == 'owlliteralimplstring':
                   # string datatype, get the literal value
                   normal = normalize_label(val.getLiteral())
               elif val is not None:
                   normal = normalize_label(val.toString())

           elif ann_prop.getIRI().toString() == owl_deprecated:
               # check if the property is obsolete
               obsolete = dash_utils.is_obsolete(ann)

       if normal is not None and not obsolete:
           props[normal] = dp_iri.toString()

   return props


def big_has_valid_relations(namespace, file, ro_props):
   """Check fp 7 - relations - on large ontologies.

   Retrieve all non-obsolete properties from the ontology. Compare their
   labels and IRIs to the RO labels and IRIs. If a label matches an RO
   property but does not match an RO IRI, return ERROR; RO properties should
   not be duplicated. If a label matches neither an RO property or RO IRI,
   return INFO; it is recommended to use RO properties, but not required. If
   all properties used are RO properties, return PASS.

   Args:
       namespace (str): ontology ID
       file (str): path to ontology file
       ro_props (dict): map of RO property label to IRI

   Return:
       PASS or violation level with optional help message
   """
   if not os.path.isfile(file):
       return {'status': 'ERROR', 'comment': 'Unable to find ontology file'}

   # ignore RO
   if namespace == 'ro':
       return {'status': 'PASS'}

   props = big_get_properties(namespace, file)

   # get results (PASS, INFO, or ERROR)
   return check_properties(namespace, props, ro_props)


def big_get_properties(namespace, file):
   """Create a map of normalized property label to property IRI for large
   ontologies by parsing RDF/XML.

   Args:
       namespace (str): ontology ID
       file (str): path to ontology file

   Return:
       Dict of label to property IRI
   """
   # TODO: handle different prefixes
   props = {}
   label = None
   prefixes = True
   with open(file, 'r') as f:
       p_iri = None
       for line in f:
           if 'owl:ObjectProperty rdf:about' in line:
               try:
                   p_iri = dash_utils.get_resource_value(line)
               except Exception as e:
                   print('Unable to get IRI from line: ' + line)
           elif p_iri and 'rdfs:label' in line:
               label = None
               try:
                   label = dash_utils.get_literal_value(line)
                   normal = normalize_label(label)
                   props[normal] = p_iri
               except Exception as e:
                   # continue on to the next line
                   # might be a line break between (like RNAO)
                   print('Unable to get label from line: ' + line)
                   continue
           elif p_iri and 'owl:ObjectProperty' in line:
               p_iri = None
           elif 'owl:Class' in line:
               break
   return props


def normalize_label(s):
   """Clean and normalize a string label for comparison.

   Args:
       s (str): label to normalize

   Return:
       normalized string label
   """
   clean = s.strip().lower().replace('"', '').replace('@en', '')
   return unicodedata.normalize('NFC', clean)


def check_properties(namespace, props, ro_props):
   """Compare the properties from an ontology to the RO properties.

   Args:
       namespace (str): ontology ID
       props (dict): map of ontology property label to IRI
       ro_props (dict): map of RO property label to IRI

   Return:
       PASS or violation level with optional help message
   """
   # properties that share an RO label
   # but have a different IRI
   same_label = {}

   # properties that do not have an RO IRI
   # and do not share a label with an RO prop
   not_ro = {}

   for label, iri in props.items():
       label_match = False
       iri_match = False

       if label in ro_props.keys():
           label_match = True
       if iri in ro_props.values():
           iri_match = True

       if label_match and not iri_match:
           ro_iri = ro_props[label]
           if iri != ro_iri:
               same_label[iri] = label
       elif not label_match and not iri_match:
           not_ro[iri] = label

   # delete the property map to free up memory
   del props

   # maybe save a report file
   if len(same_label) > 0 or len(not_ro) > 0:
       save_invalid_relations(namespace, ro_props, same_label, not_ro)

   # return the results
   if len(same_label) > 0 and len(not_ro) > 0:
       return {'status': 'ERROR',
               'file': 'fp7',
               'comment': ' '.join([ro_match.format(len(same_label)),
                                    non_ro.format(len(not_ro))])}
   elif len(same_label) > 0 and len(not_ro) == 0:
       return {'status': 'ERROR',
               'file': 'fp7',
               'comment': ro_match.format(len(same_label))}
   elif len(not_ro) > 0 and len(same_label) == 0:
       return {'status': 'INFO',
               'file': 'fp7',
               'comment': non_ro.format(len(not_ro))}
   else:
       return {'status': 'PASS'}


def save_invalid_relations(namespace, ro_props, same_label, not_ro):
   """Save any violations to a TSV file in the reports directory.

   Args:
       namespace (str): ontology ID
       ro_props (dict): map of RO property label to IRI
       same_label (dict): map of property label to IRI that matches RO
                          property label with a different IRI
       not_ro (dict): map of property label to IRI that does not have an RO
                      IRI
   """
   file = 'build/dashboard/{0}/fp7.tsv'.format(namespace)
   with open(file, 'w+') as f:
       f.write('IRI\tLabel\tIssue\n')
       for iri, label in same_label.items():
           ro_iri = ro_props[label]
           f.write('{0}\t{1}\tshares label with {2}\n'.format(
               iri, label, ro_iri))
       for iri, label in not_ro.items():
           f.write('{0}\t{1}\tnot an RO property\n'.format(iri, label))