Module continuous_delivery_scripts.utils.string_helpers

Utilities regarding string handling.

Expand source code
#
# Copyright (C) 2020-2025 Arm Limited or its affiliates and Contributors. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
"""Utilities regarding string handling."""
from functools import total_ordering

import jellyfish
from dataclasses import dataclass
from typing import Iterable, Tuple


@total_ordering
@dataclass(frozen=True)
class MatchingStats:
    """Definition of matching statistics for two strings. .

    Attributes:
        string1: a string
        string2: another string
        damerau_levenshtein_distance: the Damerau Levenshtein distance between the two strings.
        jaro_winkler_distance: the Jaro Winkler distance between the two strings.
        match_rating_approach_comparison: the match rating approach comparison.
        exact_match: whether the two strings match exactly.
    """

    string1: str
    string2: str
    damerau_levenshtein_distance: int
    jaro_winkler_distance: float
    match_rating_approach_comparison: bool
    exact_match: bool

    def __lt__(self, other: "MatchingStats") -> bool:
        """Redefines Less than.

        The stats are "smaller" if the distance between the two strings is smaller.
        In other words, the stats are smaller if the strings are more likely to match.
        """
        if self.jaro_winkler_distance > other.jaro_winkler_distance:
            return True
        return self.damerau_levenshtein_distance < other.damerau_levenshtein_distance

    def __eq__(self, other: object) -> bool:
        """Redefines equal."""
        if not isinstance(other, MatchingStats):
            return False
        return (self.exact_match == other.exact_match) or (
            self.match_rating_approach_comparison == other.match_rating_approach_comparison
        )


def determine_matching_stats(string1: str, string2: str) -> MatchingStats:
    """Determines the different distances between two strings."""
    return MatchingStats(
        string1=string1,
        string2=string2,
        damerau_levenshtein_distance=jellyfish.damerau_levenshtein_distance(string1, string2),
        jaro_winkler_distance=jellyfish.jaro_winkler_similarity(string1, string2),
        match_rating_approach_comparison=jellyfish.match_rating_comparison(string1, string2),
        exact_match=string1.strip().lower() == string2.strip().lower(),
    )


def determine_similar_string_from_list(string: str, strings: Iterable[str]) -> Tuple[float, str]:
    """Determines the closest string to a string from a list."""
    string_to_assess = string.strip()
    ordered_list = sorted([determine_matching_stats(string_to_assess, s) for s in strings])
    most_similar_string = ordered_list[0]
    return (most_similar_string.jaro_winkler_distance, most_similar_string.string2)

Functions

def determine_matching_stats(string1: str, string2: str) ‑> MatchingStats

Determines the different distances between two strings.

Expand source code
def determine_matching_stats(string1: str, string2: str) -> MatchingStats:
    """Determines the different distances between two strings."""
    return MatchingStats(
        string1=string1,
        string2=string2,
        damerau_levenshtein_distance=jellyfish.damerau_levenshtein_distance(string1, string2),
        jaro_winkler_distance=jellyfish.jaro_winkler_similarity(string1, string2),
        match_rating_approach_comparison=jellyfish.match_rating_comparison(string1, string2),
        exact_match=string1.strip().lower() == string2.strip().lower(),
    )
def determine_similar_string_from_list(string: str, strings: Iterable[str]) ‑> Tuple[float, str]

Determines the closest string to a string from a list.

Expand source code
def determine_similar_string_from_list(string: str, strings: Iterable[str]) -> Tuple[float, str]:
    """Determines the closest string to a string from a list."""
    string_to_assess = string.strip()
    ordered_list = sorted([determine_matching_stats(string_to_assess, s) for s in strings])
    most_similar_string = ordered_list[0]
    return (most_similar_string.jaro_winkler_distance, most_similar_string.string2)

Classes

class MatchingStats (string1: str, string2: str, damerau_levenshtein_distance: int, jaro_winkler_distance: float, match_rating_approach_comparison: bool, exact_match: bool)

Definition of matching statistics for two strings. .

Attributes

string1
a string
string2
another string
damerau_levenshtein_distance
the Damerau Levenshtein distance between the two strings.
jaro_winkler_distance
the Jaro Winkler distance between the two strings.
match_rating_approach_comparison
the match rating approach comparison.
exact_match
whether the two strings match exactly.
Expand source code
@total_ordering
@dataclass(frozen=True)
class MatchingStats:
    """Definition of matching statistics for two strings. .

    Attributes:
        string1: a string
        string2: another string
        damerau_levenshtein_distance: the Damerau Levenshtein distance between the two strings.
        jaro_winkler_distance: the Jaro Winkler distance between the two strings.
        match_rating_approach_comparison: the match rating approach comparison.
        exact_match: whether the two strings match exactly.
    """

    string1: str
    string2: str
    damerau_levenshtein_distance: int
    jaro_winkler_distance: float
    match_rating_approach_comparison: bool
    exact_match: bool

    def __lt__(self, other: "MatchingStats") -> bool:
        """Redefines Less than.

        The stats are "smaller" if the distance between the two strings is smaller.
        In other words, the stats are smaller if the strings are more likely to match.
        """
        if self.jaro_winkler_distance > other.jaro_winkler_distance:
            return True
        return self.damerau_levenshtein_distance < other.damerau_levenshtein_distance

    def __eq__(self, other: object) -> bool:
        """Redefines equal."""
        if not isinstance(other, MatchingStats):
            return False
        return (self.exact_match == other.exact_match) or (
            self.match_rating_approach_comparison == other.match_rating_approach_comparison
        )

Class variables

var damerau_levenshtein_distance : int
var exact_match : bool
var jaro_winkler_distance : float
var match_rating_approach_comparison : bool
var string1 : str
var string2 : str