Module continuous_delivery_scripts.utils.third_party_licences
Third party licences.
Expand source code
#
# Copyright (C) 2020-2025 Arm Limited or its affiliates and Contributors. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
"""Third party licences."""
import re
import json
from dataclasses import dataclass
from license_expression import Licensing, LicenseExpression, OR
from spdx.config import _licenses
from typing import Iterable, cast, Optional, Iterator, List, Pattern, Any
from continuous_delivery_scripts.utils.configuration import ConfigurationVariable, configuration
from continuous_delivery_scripts.utils.string_helpers import determine_similar_string_from_list
@dataclass(order=True, frozen=True)
class Licence:
"""Licence descriptor.
Maps to what is defined in licenses.json in
https://github.com/spdx/license-list-data/blob/master/json/licenses.json
"""
reference_number: str
identifier: str
name: str
is_deprecated: bool
is_osi_approved: bool
url: str
reference: str
UNKNOWN_LICENCE = Licence(
reference_number="Unknown",
identifier="Unknown",
name="Unknown",
is_deprecated=True,
is_osi_approved=False,
url="Unknown",
reference="Unknown",
)
LICENCE_LIKELIHOOD_THRESHOLD = 0.5
LICENCE_NON_ACCEPTED_CHARACTERS = r"[^\w\s\.\:\-()]"
def _parse_licence_expression(licensing: Licensing, licence_expression: str) -> LicenseExpression:
# Removing any unwanted characters so that the expression follows the laws:
# > the valid characters are: letters and numbers, underscore, dot, colon or hyphen signs and spaces
expression = re.sub(LICENCE_NON_ACCEPTED_CHARACTERS, "", licence_expression)
expression = re.sub(r"\([sS]ee [\w\s\.\-]*\)", "", expression)
return licensing.parse(expression)
def parse_licence(licence_info: dict) -> Licence:
"""Parses a licence entry in the Json file and translates it into a licence object."""
return Licence(
reference_number=licence_info.get("referenceNumber", ""),
identifier=licence_info.get("licenseId", ""),
name=licence_info.get("name", ""),
url=licence_info.get("detailsUrl", ""),
reference=licence_info.get("reference", ""),
is_deprecated=bool(licence_info.get("isDeprecatedLicenseId", False)),
is_osi_approved=bool(licence_info.get("isOsiApproved", True)),
)
def iter_licenses(licence_info: dict) -> Iterable[Licence]:
"""Gets a generator over all the licences present in licenses.json."""
licences = cast(Iterable[dict], licence_info.get("licenses", []))
for licence_info in licences:
yield parse_licence(licence_info)
def _handle_special_licence_entries(cleansed_descriptor: str) -> str:
if cleansed_descriptor in ["Python Software Foundation License"]:
return "Python"
if cleansed_descriptor in ["Apache Software License", "Apache", "apache"]:
return "Apache-2.0"
if cleansed_descriptor in ["LGPL", "UNKNOWN", "Dual License"]:
# It is not possible to find which is the actual licence to consider.
return UNKNOWN_LICENCE.identifier
return cleansed_descriptor
def cleanse_licence_descriptor(licence_descriptor: str) -> str:
"""Cleanses the licence descriptor to only keep words describing the licence."""
cleansed_descriptor = licence_descriptor.strip()
cleansed_descriptor = re.sub(r"OSI\s?[Aa]pproved[\:]*", "", cleansed_descriptor)
cleansed_descriptor = re.sub(r"[pP]ublic [dD]omain", "", cleansed_descriptor)
cleansed_descriptor = re.sub(r"BSD[-\s][Ll]ike", "BSD", cleansed_descriptor)
cleansed_descriptor = re.sub(r"BSD [lL]icen[sc]e", "BSD", cleansed_descriptor)
cleansed_descriptor = re.sub(r"MIT [lL]icen[sc]e", "MIT", cleansed_descriptor)
cleansed_descriptor = _handle_special_licence_entries(cleansed_descriptor.strip())
if cleansed_descriptor.lower().startswith("the"):
cleansed_descriptor = cleansed_descriptor[3:].strip()
return cleansed_descriptor
class OpenSourceLicences:
"""All the opensource licences known."""
def __init__(self) -> None:
"""Initialiser."""
self._licence_store: Optional[dict] = None
self._licence_list: Optional[list] = None
def load(self) -> None:
"""Loads licence data from internal Json file."""
if self._licence_list and self._licence_store:
return
self._licence_store = {UNKNOWN_LICENCE.identifier: UNKNOWN_LICENCE}
self._licence_list = [UNKNOWN_LICENCE.identifier]
with open(_licenses, "r", encoding="utf8") as f:
for licence in iter_licenses(json.load(f)):
self._licence_store[licence.identifier] = licence
self._licence_list.append(licence.identifier)
self._licence_store[licence.name] = licence
self._licence_list.append(licence.name)
def get_licences_from_pattern(self, licence_descriptor_pattern: Pattern) -> Optional[List[Licence]]:
"""Determines all the licences following a certain pattern."""
self.load()
if not self._licence_store or not self._licence_list:
return None
matching_licences = [licence for licence in self._licence_list if licence_descriptor_pattern.match(licence)]
return (
[cast(Licence, self._licence_store.get(licence)) for licence in matching_licences]
if matching_licences
else None
)
def get_licence(self, licence_descriptor: Optional[str]) -> Optional[Licence]:
"""Determines the licence based on a string descriptor e.g. Apache 2."""
self.load()
if not self._licence_store or not self._licence_list or not licence_descriptor:
return None
cleansed_descriptor = cleanse_licence_descriptor(licence_descriptor)
likelihood, licence = determine_similar_string_from_list(cleansed_descriptor, self._licence_list)
return self._licence_store.get(licence) if likelihood > LICENCE_LIKELIHOOD_THRESHOLD else None
OPENSOURCE_LICENCES = OpenSourceLicences()
def cleanse_licence_expression(licence_expression: str) -> str:
"""Cleanses a licence expression by using SPDX identifiers when possible.
A licence expression can be a combination of licences and in a lot of cases is free-form text.
The idea is to return an equivalent expression but using SPDX identifiers when possible.
"""
simplified_expression = _parse_licence_expression(Licensing(), licence_expression).simplify()
for s in simplified_expression.symbols:
corresponding_licence = OPENSOURCE_LICENCES.get_licence(s.key)
if corresponding_licence:
s.key = corresponding_licence.identifier
return simplify_licence_expression(str(simplified_expression))
def _iter_matching_licences(desc: str) -> Iterable[Licence]:
licence = OPENSOURCE_LICENCES.get_licence(desc)
if licence:
yield licence
def _iter_matching_licences_from_pattern(desc: str) -> Iterable[Licence]:
desc_pattern = re.compile(desc.replace("*", ".*"), re.IGNORECASE)
licences = OPENSOURCE_LICENCES.get_licences_from_pattern(desc_pattern)
if licences:
yield from licences
def _retrieve_licences_from_identifier_list(identifiers: Iterable[str]) -> Iterable[Licence]:
for desc in identifiers:
if "*" in desc:
yield from _iter_matching_licences_from_pattern(desc)
else:
yield from _iter_matching_licences(desc)
def determine_allowed_opensource_licences_from_string(allowed_licences: Any) -> Iterable[Licence]:
"""Determines all the third party licences allowed as set in the input parameter."""
if isinstance(allowed_licences, str):
allowed_licences = allowed_licences.split(", ")
if isinstance(allowed_licences, (list, dict, tuple, set)):
yield from _retrieve_licences_from_identifier_list(allowed_licences)
def get_allowed_opensource_licences() -> Iterable[Licence]:
"""Determines all the third party licences allowed for a given project."""
yield from determine_allowed_opensource_licences_from_string(
configuration.get_value(ConfigurationVariable.ACCEPTED_THIRD_PARTY_LICENCES)
)
def simplify_licence_expression(licence_expression: str) -> str:
"""Simplifies a licence expression."""
return str(_parse_licence_expression(Licensing(), licence_expression).simplify())
def determine_licence_compound(main_licence: str, additional_licences: List[str]) -> str:
"""Determines the overall licence based on main licence and additional licences."""
overall_licence = f"({main_licence}) AND ({') AND ('.join(additional_licences)})"
return str(_parse_licence_expression(Licensing(), overall_licence).simplify())
def determine_licences_not_in_list(licence_expression: str, licence_list: Iterator[str]) -> Iterator[str]:
"""Determines all the licences in an expression which are not in list."""
licensing_util = Licensing()
licence_keys = licensing_util.license_keys(_parse_licence_expression(licensing_util, licence_expression))
for licence in licence_keys:
if licence not in licence_list:
yield licence
def determine_whether_licence_expression_is_compliant(licence_expression: str, licence_list: list) -> bool:
"""Checks whether an expression is compliant with a list of licences."""
licensing_util = Licensing()
for licence in licence_list:
if licensing_util.contains(licence_expression, licence):
return True
return False
def _is_expression_or(licence_expression: str) -> bool:
licensing_util = Licensing()
return isinstance(_parse_licence_expression(licensing_util, licence_expression), OR)
def is_licence_accepted(licence_expression: str) -> bool:
"""Determines whether the licence expressed is valid with regards to project's accepted licences."""
authorised_licences = [licence.identifier for licence in get_allowed_opensource_licences()]
is_or = _is_expression_or(licence_expression)
if bool([licence for licence in determine_licences_not_in_list(licence_expression, iter(authorised_licences))]):
return (
determine_whether_licence_expression_is_compliant(licence_expression, authorised_licences)
if is_or
else False
)
return True
Functions
def cleanse_licence_descriptor(licence_descriptor: str) ‑> str
-
Cleanses the licence descriptor to only keep words describing the licence.
Expand source code
def cleanse_licence_descriptor(licence_descriptor: str) -> str: """Cleanses the licence descriptor to only keep words describing the licence.""" cleansed_descriptor = licence_descriptor.strip() cleansed_descriptor = re.sub(r"OSI\s?[Aa]pproved[\:]*", "", cleansed_descriptor) cleansed_descriptor = re.sub(r"[pP]ublic [dD]omain", "", cleansed_descriptor) cleansed_descriptor = re.sub(r"BSD[-\s][Ll]ike", "BSD", cleansed_descriptor) cleansed_descriptor = re.sub(r"BSD [lL]icen[sc]e", "BSD", cleansed_descriptor) cleansed_descriptor = re.sub(r"MIT [lL]icen[sc]e", "MIT", cleansed_descriptor) cleansed_descriptor = _handle_special_licence_entries(cleansed_descriptor.strip()) if cleansed_descriptor.lower().startswith("the"): cleansed_descriptor = cleansed_descriptor[3:].strip() return cleansed_descriptor
def cleanse_licence_expression(licence_expression: str) ‑> str
-
Cleanses a licence expression by using SPDX identifiers when possible.
A licence expression can be a combination of licences and in a lot of cases is free-form text. The idea is to return an equivalent expression but using SPDX identifiers when possible.
Expand source code
def cleanse_licence_expression(licence_expression: str) -> str: """Cleanses a licence expression by using SPDX identifiers when possible. A licence expression can be a combination of licences and in a lot of cases is free-form text. The idea is to return an equivalent expression but using SPDX identifiers when possible. """ simplified_expression = _parse_licence_expression(Licensing(), licence_expression).simplify() for s in simplified_expression.symbols: corresponding_licence = OPENSOURCE_LICENCES.get_licence(s.key) if corresponding_licence: s.key = corresponding_licence.identifier return simplify_licence_expression(str(simplified_expression))
def determine_allowed_opensource_licences_from_string(allowed_licences: Any) ‑> Iterable[Licence]
-
Determines all the third party licences allowed as set in the input parameter.
Expand source code
def determine_allowed_opensource_licences_from_string(allowed_licences: Any) -> Iterable[Licence]: """Determines all the third party licences allowed as set in the input parameter.""" if isinstance(allowed_licences, str): allowed_licences = allowed_licences.split(", ") if isinstance(allowed_licences, (list, dict, tuple, set)): yield from _retrieve_licences_from_identifier_list(allowed_licences)
def determine_licence_compound(main_licence: str, additional_licences: List[str]) ‑> str
-
Determines the overall licence based on main licence and additional licences.
Expand source code
def determine_licence_compound(main_licence: str, additional_licences: List[str]) -> str: """Determines the overall licence based on main licence and additional licences.""" overall_licence = f"({main_licence}) AND ({') AND ('.join(additional_licences)})" return str(_parse_licence_expression(Licensing(), overall_licence).simplify())
def determine_licences_not_in_list(licence_expression: str, licence_list: Iterator[str]) ‑> Iterator[str]
-
Determines all the licences in an expression which are not in list.
Expand source code
def determine_licences_not_in_list(licence_expression: str, licence_list: Iterator[str]) -> Iterator[str]: """Determines all the licences in an expression which are not in list.""" licensing_util = Licensing() licence_keys = licensing_util.license_keys(_parse_licence_expression(licensing_util, licence_expression)) for licence in licence_keys: if licence not in licence_list: yield licence
def determine_whether_licence_expression_is_compliant(licence_expression: str, licence_list: list) ‑> bool
-
Checks whether an expression is compliant with a list of licences.
Expand source code
def determine_whether_licence_expression_is_compliant(licence_expression: str, licence_list: list) -> bool: """Checks whether an expression is compliant with a list of licences.""" licensing_util = Licensing() for licence in licence_list: if licensing_util.contains(licence_expression, licence): return True return False
def get_allowed_opensource_licences() ‑> Iterable[Licence]
-
Determines all the third party licences allowed for a given project.
Expand source code
def get_allowed_opensource_licences() -> Iterable[Licence]: """Determines all the third party licences allowed for a given project.""" yield from determine_allowed_opensource_licences_from_string( configuration.get_value(ConfigurationVariable.ACCEPTED_THIRD_PARTY_LICENCES) )
def is_licence_accepted(licence_expression: str) ‑> bool
-
Determines whether the licence expressed is valid with regards to project's accepted licences.
Expand source code
def is_licence_accepted(licence_expression: str) -> bool: """Determines whether the licence expressed is valid with regards to project's accepted licences.""" authorised_licences = [licence.identifier for licence in get_allowed_opensource_licences()] is_or = _is_expression_or(licence_expression) if bool([licence for licence in determine_licences_not_in_list(licence_expression, iter(authorised_licences))]): return ( determine_whether_licence_expression_is_compliant(licence_expression, authorised_licences) if is_or else False ) return True
def iter_licenses(licence_info: dict) ‑> Iterable[Licence]
-
Gets a generator over all the licences present in licenses.json.
Expand source code
def iter_licenses(licence_info: dict) -> Iterable[Licence]: """Gets a generator over all the licences present in licenses.json.""" licences = cast(Iterable[dict], licence_info.get("licenses", [])) for licence_info in licences: yield parse_licence(licence_info)
def parse_licence(licence_info: dict) ‑> Licence
-
Parses a licence entry in the Json file and translates it into a licence object.
Expand source code
def parse_licence(licence_info: dict) -> Licence: """Parses a licence entry in the Json file and translates it into a licence object.""" return Licence( reference_number=licence_info.get("referenceNumber", ""), identifier=licence_info.get("licenseId", ""), name=licence_info.get("name", ""), url=licence_info.get("detailsUrl", ""), reference=licence_info.get("reference", ""), is_deprecated=bool(licence_info.get("isDeprecatedLicenseId", False)), is_osi_approved=bool(licence_info.get("isOsiApproved", True)), )
def simplify_licence_expression(licence_expression: str) ‑> str
-
Simplifies a licence expression.
Expand source code
def simplify_licence_expression(licence_expression: str) -> str: """Simplifies a licence expression.""" return str(_parse_licence_expression(Licensing(), licence_expression).simplify())
Classes
class Licence (reference_number: str, identifier: str, name: str, is_deprecated: bool, is_osi_approved: bool, url: str, reference: str)
-
Licence descriptor.
Maps to what is defined in licenses.json in https://github.com/spdx/license-list-data/blob/master/json/licenses.json
Expand source code
@dataclass(order=True, frozen=True) class Licence: """Licence descriptor. Maps to what is defined in licenses.json in https://github.com/spdx/license-list-data/blob/master/json/licenses.json """ reference_number: str identifier: str name: str is_deprecated: bool is_osi_approved: bool url: str reference: str
Class variables
var identifier : str
var is_deprecated : bool
var is_osi_approved : bool
var name : str
var reference : str
var reference_number : str
var url : str
class OpenSourceLicences
-
All the opensource licences known.
Initialiser.
Expand source code
class OpenSourceLicences: """All the opensource licences known.""" def __init__(self) -> None: """Initialiser.""" self._licence_store: Optional[dict] = None self._licence_list: Optional[list] = None def load(self) -> None: """Loads licence data from internal Json file.""" if self._licence_list and self._licence_store: return self._licence_store = {UNKNOWN_LICENCE.identifier: UNKNOWN_LICENCE} self._licence_list = [UNKNOWN_LICENCE.identifier] with open(_licenses, "r", encoding="utf8") as f: for licence in iter_licenses(json.load(f)): self._licence_store[licence.identifier] = licence self._licence_list.append(licence.identifier) self._licence_store[licence.name] = licence self._licence_list.append(licence.name) def get_licences_from_pattern(self, licence_descriptor_pattern: Pattern) -> Optional[List[Licence]]: """Determines all the licences following a certain pattern.""" self.load() if not self._licence_store or not self._licence_list: return None matching_licences = [licence for licence in self._licence_list if licence_descriptor_pattern.match(licence)] return ( [cast(Licence, self._licence_store.get(licence)) for licence in matching_licences] if matching_licences else None ) def get_licence(self, licence_descriptor: Optional[str]) -> Optional[Licence]: """Determines the licence based on a string descriptor e.g. Apache 2.""" self.load() if not self._licence_store or not self._licence_list or not licence_descriptor: return None cleansed_descriptor = cleanse_licence_descriptor(licence_descriptor) likelihood, licence = determine_similar_string_from_list(cleansed_descriptor, self._licence_list) return self._licence_store.get(licence) if likelihood > LICENCE_LIKELIHOOD_THRESHOLD else None
Methods
def get_licence(self, licence_descriptor: Optional[str]) ‑> Optional[Licence]
-
Determines the licence based on a string descriptor e.g. Apache 2.
Expand source code
def get_licence(self, licence_descriptor: Optional[str]) -> Optional[Licence]: """Determines the licence based on a string descriptor e.g. Apache 2.""" self.load() if not self._licence_store or not self._licence_list or not licence_descriptor: return None cleansed_descriptor = cleanse_licence_descriptor(licence_descriptor) likelihood, licence = determine_similar_string_from_list(cleansed_descriptor, self._licence_list) return self._licence_store.get(licence) if likelihood > LICENCE_LIKELIHOOD_THRESHOLD else None
def get_licences_from_pattern(self, licence_descriptor_pattern: Pattern) ‑> Optional[List[Licence]]
-
Determines all the licences following a certain pattern.
Expand source code
def get_licences_from_pattern(self, licence_descriptor_pattern: Pattern) -> Optional[List[Licence]]: """Determines all the licences following a certain pattern.""" self.load() if not self._licence_store or not self._licence_list: return None matching_licences = [licence for licence in self._licence_list if licence_descriptor_pattern.match(licence)] return ( [cast(Licence, self._licence_store.get(licence)) for licence in matching_licences] if matching_licences else None )
def load(self) ‑> None
-
Loads licence data from internal Json file.
Expand source code
def load(self) -> None: """Loads licence data from internal Json file.""" if self._licence_list and self._licence_store: return self._licence_store = {UNKNOWN_LICENCE.identifier: UNKNOWN_LICENCE} self._licence_list = [UNKNOWN_LICENCE.identifier] with open(_licenses, "r", encoding="utf8") as f: for licence in iter_licenses(json.load(f)): self._licence_store[licence.identifier] = licence self._licence_list.append(licence.identifier) self._licence_store[licence.name] = licence self._licence_list.append(licence.name)