Versioning Automated Check

Discussion on this check can be found here.

Requirements

  1. The released ontology must have a version IRI.
  2. The version IRI should follow a dated format (NS/YYYY-MM-DD/ontology.owl)

Fixes

First, make sure you have a valid version IRI pattern. See Versioning Implementation for more details.

Adding a Version IRI in Protégé

The “Ontology Version IRI” input is located in the “Active Ontology” tab that appears when you open your ontology in Protégé.

Adding a Version IRI with ROBOT

You may use the ROBOT annotate command the add a version IRI.

Please be aware that the Ontology Development Kit comes standard with a release process that will automatically generate a dated version IRI for your ontology release.

Implementation

The version IRI is retrieved from the ontology using OWL API. For very large ontologies, the RDF/XML ontology header is parsed to find the owl:versionIRI declaration. If found, the IRI is compared to a regex pattern to determine if it is in date format. If it is not in date format, a warning is issued. If the version IRI is not present, this is an error.

import re
from typing import Optional
from urllib.parse import urlparse

import dash_utils
from lib import url_exists

# regex pattern to match purl obolibrary url
pat = r'http:\/\/purl\.obolibrary\.org/obo/.*/.*/.*'
PATTERN = re.compile(pat)
#: Official regex for semantic versions from https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
# Simplify semantic versions to accept only numbers with or without point, .eg 1.1 or 11
SEMVER_PATTERN = re.compile(r"^(0|[1-9]\d*)(\.)?(0|[1-9]\d*)?((\.)?(0|[1-9]\d*))?$")
#: Regular expression for ISO 8601 compliant date in YYYY-MM-DD format
DATE_PATTERN = re.compile(r"^([0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])$")

# descriptions of issues
bad_format = 'Version IRI \'{0}\' is not in recommended format'
missing_version = 'Missing version IRI'


def has_versioning(ontology):
   """Check fp 4 - versioning.

   Retrieve the version IRI from the OWLOntology object. If the version IRI
   does not exist, ERROR. If the version IRI does exist, check if it is in the
   recommended date format. If not, WARN. Otherwise PASS.

   Args:
       ontology (OWLOntology): ontology object

   Return:
       PASS, INFO, WARN, or ERROR with optional message
   """
   if ontology is None:
       return {'status': 'ERROR', 'comment': 'Unable to load ontology'}

   # retrieve version IRI or None from ontology
   version_iri = dash_utils.get_version_iri(ontology)
   if not version_iri:
       return {'status': 'ERROR', 'comment': missing_version}

   if not url_exists(version_iri):
       return {"status": "ERROR", "comment": "Version IRI does not resolve"}

   iri_version_error_message = get_iri_version_error_message(version_iri)
   if iri_version_error_message is not None:
       return {"status": "ERROR", "comment": iri_version_error_message}

   # compare version IRI to the regex pattern
   if not PATTERN.search(version_iri):
       return {'status': 'WARN',
               'comment': bad_format.format(version_iri)}

   return {'status': 'PASS'}


def big_has_versioning(file):
   """Check fp 4 - versioning.

   This is suitible for large ontologies as it reads the file line by line,
   instead of loading an OWLOntology object. This method looks for the
   owl:versionIRI property in the header.

   Args:
       file (str): path to ontology

   Return:
       PASS, INFO, WARN, or FAIL with optional message
   """
   # may return empty string if version IRI is missing
   # or None if ontology cannot be parsed
   version_iri = dash_utils.get_big_version_iri(file)
   if version_iri is None:
       return {'status': 'ERROR', 'comment': 'Unable to parse ontology'}
   if version_iri == "":
       return {'status': 'ERROR', 'comment': missing_version}
   if not url_exists(version_iri):
       return {"status": "ERROR", "comment": "Version IRI does not resolve"}
   # compare version IRI to the regex pattern
   if not PATTERN.search(version_iri):
       return {'status': 'WARN',
               'comment': bad_format.format(version_iri)}

   return {'status': 'PASS'}

def contains_semver(iri: str) -> bool:
   """Return if the IRI contains a semantic version substring.

   >>> contains_semver("https://example.org/1.0.0/ontology.owl")
   True
   >>> contains_semver("https://example.org/1.0/ontology.owl")
   True
   >>> contains_semver("https://example.org/2022-01-01/1.0.0/ontology.owl")
   True
   >>> contains_semver("https://example.org/ontology.owl")
   False
   >>> contains_semver("https://example.org/2022-01-01/ontology.owl")
   False
   >>> contains_semver("http://purl.obolibrary.org/obo/chebi/223/chebi.owl")
   True
   >>> contains_semver("http://purl.obolibrary.org/obo/pr/68.0/pr.owl")
   True
   """
   return _match_any_part(iri, SEMVER_PATTERN)


def contains_date(iri: str) -> bool:
   """Return if the IRI contains a date substring.

   >>> contains_date("https://example.org/1.0.0/ontology.owl")
   False
   >>> contains_date("https://example.org/1.0/ontology.owl")
   False
   >>> contains_date("https://example.org/2022-01-01/1.0.0/ontology.owl")
   True
   >>> contains_date("https://example.org/ontology.owl")
   False
   >>> contains_date("https://example.org/2022-01-01/ontology.owl")
   True
   >>> contains_date("http://purl.obolibrary.org/obo/chebi/223/chebi.owl")
   False
   >>> contains_date("http://purl.obolibrary.org/obo/pr/68.0/pr.owl")
   False
   """
   return _match_any_part(iri, DATE_PATTERN)


def _match_any_part(iri, pattern):
   parse_result = urlparse(iri)
   return any(
       bool(pattern.match(part))
       for part in parse_result.path.split("/")
   )


def get_iri_version_error_message(version_iri: str) -> Optional[str]:
   """Check a version IRI has exactly one of a semantic version or ISO 8601 date (YYYY-MM-DD) in it.

   >>> get_iri_version_error_message("https://example.org/2022-01-01/ontology.owl")
   None
   >>> get_iri_version_error_message("https://example.org/1.0.0/ontology.owl")
   None
   >>> get_iri_version_error_message("https://example.org/1.0/ontology.owl")
   None
   >>> get_iri_version_error_message("http://purl.obolibrary.org/obo/chebi/223/chebi.owl")
   None
   >>> get_iri_version_error_message("http://purl.obolibrary.org/obo/pr/68.0/pr.owl")
   None
   >>> get_iri_version_error_message("https://obofoundry.org")
   'Version IRI has neither a semantic version nor a date'
   >>> get_iri_version_error_message("https://example.org/2022-01-01/1.0.0/ontology.owl")
   'Version IRI should not contain both a semantic version and date'
   """
   matches_semver = contains_semver(version_iri)
   matches_date = contains_date(version_iri)
   if matches_date and matches_semver:
       return "Version IRI should not contain both a semantic version and date"
   if not matches_date and not matches_semver:
       return "Version IRI has neither a semantic version nor a date"
   # None means it's all gucci
   return None