Module continuous_delivery_scripts.spdx_report.spdx_helpers

Facilities regarding SPDX.

SPDX file (i.e. tag-value format) https://github.com/OpenChain-Project/curriculum/blob/master/guides/including_license_info.rst https://github.com/david-a-wheeler/spdx-tutorial#spdx-files https://github.com/OpenChain-Project/curriculum/blob/master/guides/reusing_software.md https://github.com/vmware/tern/blob/c9a0c83369b92df58f7f80842aa15da5f63ed983/docs/spdx-tag-value-overview.md

Examples

Expand source code
#
# Copyright (C) 2020-2025 Arm Limited or its affiliates and Contributors. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
"""Facilities regarding SPDX.

SPDX file (i.e. tag-value format)
https://github.com/OpenChain-Project/curriculum/blob/master/guides/including_license_info.rst
https://github.com/david-a-wheeler/spdx-tutorial#spdx-files
https://github.com/OpenChain-Project/curriculum/blob/master/guides/reusing_software.md
https://github.com/vmware/tern/blob/c9a0c83369b92df58f7f80842aa15da5f63ed983/docs/spdx-tag-value-overview.md
Examples:
    - https://spdx.org/spdx-tagvalue-example
    - https://github.com/spdx/tools/blob/master/Examples/SPDXTagExample-v2.1.spdx
"""
import re

import logging
import toml
from pathlib import Path
from spdx.utils import SPDXNone, UnKnown
from typing import Union, Optional, Iterator, Any, Tuple

from continuous_delivery_scripts.utils.configuration import ConfigurationVariable, configuration
from continuous_delivery_scripts.utils.definitions import UNKNOWN
from continuous_delivery_scripts.utils.filesystem_helpers import (
    scan_file_for_pattern,
    should_exclude_path,
    list_all_files,
)
from continuous_delivery_scripts.utils.third_party_licences import simplify_licence_expression

logger = logging.getLogger(__name__)

# Copyright similar to the regex defined in flake8-copyright
COPYRIGHT_PATTERN = r"Copyright.*$"
COPYRIGHT_REGEX_PATTERN = re.compile(COPYRIGHT_PATTERN, flags=re.MULTILINE | re.IGNORECASE)
# Specification of the identifier based on https://spdx.org/spdx-specification-21-web-version#h.twlc0ztnng3b
# and https://spdx.org/ids-how
SPDX_LICENCE_IDENTIFIER_PATTERN = r"SPDX-License-Identifier: ([\.\w+\-\(\)\s]+)[\*]?$"
SPDX_IDENTIFIER_REGEX_PATTERN = re.compile(SPDX_LICENCE_IDENTIFIER_PATTERN, re.MULTILINE)
THIRD_PARTY_CONFIG_NAMESPACE = "spdx"
PATHS_TO_EXCLUDE = [
    "*.spdx",
    "*.png",
    "*.jpg",
    "*.rdf",
    "*.pyc",
    "*.bin",
    "*.tar",
    "*.zip",
    "**/__pycache__/**",
    "**/*.egg-info/**",
]


def determine_file_licence(path: Path) -> Optional[str]:
    """Determines the licence of a file based on the SPDX identifier."""
    licence = None
    try:
        match = scan_file_for_pattern(path, SPDX_IDENTIFIER_REGEX_PATTERN)
        if not match:
            return None
        licence = match.group(1).strip()
        return simplify_licence_expression(licence)
    except Exception as e:
        logger.error(f"Could not determine the licence of file [{path}] from identifier '{licence}'. Reason: {e}.")
        return None


def determine_file_copyright_text(path: Path) -> Optional[str]:
    """Determines the copyright text of a file."""
    match = scan_file_for_pattern(path, COPYRIGHT_REGEX_PATTERN)
    if not match:
        return None
    return str(match.group(0).strip())


def determine_spdx_value(value: Optional[str]) -> Union[str, UnKnown, SPDXNone]:
    """Determines the correct SPDX value.

    Args:
        value: a value
    Returns:
        correct SPDX value a string, UnKnown or SPDXNone
    """
    if not value:
        return SPDXNone()
    if value == UNKNOWN:
        return UnKnown()

    return value


def get_project_namespace(project_config_path: Path, document_name: str) -> str:
    """Determines the project namespace from configuration."""
    with open(str(project_config_path), "r", encoding="utf8") as f:
        config = toml.load(f).get(THIRD_PARTY_CONFIG_NAMESPACE, dict())
    protocol = "http://"
    path_part = f'{config.get("CreatorWebsite")}/{config.get("PathToSpdx")}'
    name_part = f'{document_name}-{config.get("UUID")}'
    return f"{protocol}{path_part}/{name_part}"


def list_project_files_for_licensing(project_root: Path) -> Iterator[Path]:
    """Gets a generator over all the project's files needing licensing."""

    def ignore_path(p: Path) -> bool:
        return True if p.name.startswith(".") else should_exclude_path(p, PATHS_TO_EXCLUDE)

    return list_all_files(project_root, ignore_path)


def _convert_list_into_dict(checked_packages: Any) -> dict:
    checked_package_description = dict()
    for item in checked_packages:
        info = item.split("=" if "=" in item else ":")
        checked_package_description[info[0].strip()] = info[-1].strip() if len(info) > 1 else None
    return checked_package_description


def determine_checked_packages_from_configuration_entry(checked_packages: Any) -> dict:
    """Determines the list of packages for which the licence has been manually checked."""
    if isinstance(checked_packages, str):
        checked_packages = checked_packages.split(", ")
    if isinstance(checked_packages, (list, tuple, set)):
        return _convert_list_into_dict(checked_packages)
    if isinstance(checked_packages, dict):
        return checked_packages
    return dict()


def get_packages_with_checked_licence() -> dict:
    """Determines the list of packages for which the licence has been checked from configuration."""
    return determine_checked_packages_from_configuration_entry(
        configuration.get_value(ConfigurationVariable.PACKAGES_WITH_CHECKED_LICENCE)
    )


def get_package_manual_check(package_name: str) -> Tuple[bool, Optional[str]]:
    """Gets information about package licence manual check."""
    checked_packages = get_packages_with_checked_licence()
    return bool(package_name.strip() in checked_packages), checked_packages.get(package_name.strip())


def is_package_licence_manually_checked(package_name: str) -> bool:
    """States whether the licence of a package has been manually checked and hence, that its licence is compliant."""
    checked, _ = get_package_manual_check(package_name)
    if not checked:
        checked, _ = get_package_manual_check(package_name.replace(".", "-"))
    return checked

Functions

def determine_checked_packages_from_configuration_entry(checked_packages: Any) ‑> dict

Determines the list of packages for which the licence has been manually checked.

Expand source code
def determine_checked_packages_from_configuration_entry(checked_packages: Any) -> dict:
    """Determines the list of packages for which the licence has been manually checked."""
    if isinstance(checked_packages, str):
        checked_packages = checked_packages.split(", ")
    if isinstance(checked_packages, (list, tuple, set)):
        return _convert_list_into_dict(checked_packages)
    if isinstance(checked_packages, dict):
        return checked_packages
    return dict()

Determines the copyright text of a file.

Expand source code
def determine_file_copyright_text(path: Path) -> Optional[str]:
    """Determines the copyright text of a file."""
    match = scan_file_for_pattern(path, COPYRIGHT_REGEX_PATTERN)
    if not match:
        return None
    return str(match.group(0).strip())
def determine_file_licence(path: pathlib.Path) ‑> Optional[str]

Determines the licence of a file based on the SPDX identifier.

Expand source code
def determine_file_licence(path: Path) -> Optional[str]:
    """Determines the licence of a file based on the SPDX identifier."""
    licence = None
    try:
        match = scan_file_for_pattern(path, SPDX_IDENTIFIER_REGEX_PATTERN)
        if not match:
            return None
        licence = match.group(1).strip()
        return simplify_licence_expression(licence)
    except Exception as e:
        logger.error(f"Could not determine the licence of file [{path}] from identifier '{licence}'. Reason: {e}.")
        return None
def determine_spdx_value(value: Optional[str]) ‑> Union[str, spdx.utils.UnKnown, spdx.utils.SPDXNone]

Determines the correct SPDX value.

Args

value
a value

Returns

correct SPDX value a string, UnKnown or SPDXNone

Expand source code
def determine_spdx_value(value: Optional[str]) -> Union[str, UnKnown, SPDXNone]:
    """Determines the correct SPDX value.

    Args:
        value: a value
    Returns:
        correct SPDX value a string, UnKnown or SPDXNone
    """
    if not value:
        return SPDXNone()
    if value == UNKNOWN:
        return UnKnown()

    return value
def get_package_manual_check(package_name: str) ‑> Tuple[bool, Optional[str]]

Gets information about package licence manual check.

Expand source code
def get_package_manual_check(package_name: str) -> Tuple[bool, Optional[str]]:
    """Gets information about package licence manual check."""
    checked_packages = get_packages_with_checked_licence()
    return bool(package_name.strip() in checked_packages), checked_packages.get(package_name.strip())
def get_packages_with_checked_licence() ‑> dict

Determines the list of packages for which the licence has been checked from configuration.

Expand source code
def get_packages_with_checked_licence() -> dict:
    """Determines the list of packages for which the licence has been checked from configuration."""
    return determine_checked_packages_from_configuration_entry(
        configuration.get_value(ConfigurationVariable.PACKAGES_WITH_CHECKED_LICENCE)
    )
def get_project_namespace(project_config_path: pathlib.Path, document_name: str) ‑> str

Determines the project namespace from configuration.

Expand source code
def get_project_namespace(project_config_path: Path, document_name: str) -> str:
    """Determines the project namespace from configuration."""
    with open(str(project_config_path), "r", encoding="utf8") as f:
        config = toml.load(f).get(THIRD_PARTY_CONFIG_NAMESPACE, dict())
    protocol = "http://"
    path_part = f'{config.get("CreatorWebsite")}/{config.get("PathToSpdx")}'
    name_part = f'{document_name}-{config.get("UUID")}'
    return f"{protocol}{path_part}/{name_part}"
def is_package_licence_manually_checked(package_name: str) ‑> bool

States whether the licence of a package has been manually checked and hence, that its licence is compliant.

Expand source code
def is_package_licence_manually_checked(package_name: str) -> bool:
    """States whether the licence of a package has been manually checked and hence, that its licence is compliant."""
    checked, _ = get_package_manual_check(package_name)
    if not checked:
        checked, _ = get_package_manual_check(package_name.replace(".", "-"))
    return checked
def list_project_files_for_licensing(project_root: pathlib.Path) ‑> Iterator[pathlib.Path]

Gets a generator over all the project's files needing licensing.

Expand source code
def list_project_files_for_licensing(project_root: Path) -> Iterator[Path]:
    """Gets a generator over all the project's files needing licensing."""

    def ignore_path(p: Path) -> bool:
        return True if p.name.startswith(".") else should_exclude_path(p, PATHS_TO_EXCLUDE)

    return list_all_files(project_root, ignore_path)