Source code for deepparse.comparer.formatted_compared_addresses_raw

from dataclasses import dataclass
from typing import Dict

from .formatted_compared_addresses import FormattedComparedAddresses


[docs]@dataclass(frozen=True)
class FormattedComparedAddressesRaw(FormattedComparedAddresses):
    """
    A formatted compared address of two raw (not parsed) addresses.
    """

    def _get_probs(self) -> Dict:
        """
        Method to get the tags from the parsing with their associated probabilities, a method needs to be
        implemented in each class because they don't use the probabilities the same way.
        """
        return {
            self.first_address.raw_address: self.first_address.address_parsed_components,
            self.second_address.raw_address: self.second_address.address_parsed_components,
        }

    def _get_raw_diff_color(self, verbose=True) -> str:
        """
        Print the raw addresses and highlight the differences between them.
        """

        str_formatted = ""

        if verbose:
            str_formatted += "White: Shared\n"
            str_formatted += "Blue: Belongs only to the first address\n"
            str_formatted += "Yellow: Belongs only to the second address\n"
            str_formatted += "\n"

        str_formatted += (
            self._get_color_diff(
                self.first_address.raw_address,
                self.second_address.raw_address,
                highlight=True,
            )
            + "\n"
        )
        return str_formatted

    def _comparison_report_builder(self) -> str:
        """
        Builds the core of a comparison report for the various comparisons. Since the procedure to make a tags
        comparison and the raw addresses comparison are different, the comparison report is not the same for the two.
        It is then implemented in each specific class.
        """
        str_formatted = ""
        intro_str = "Comparison report of the two raw addresses: "
        if self.identical:
            str_formatted += intro_str + "Identical\n\n"
            str_formatted += "Address : " + self.first_address.raw_address + "\n\n\n"
        else:
            if self.equivalent:
                str_formatted += intro_str + "Equivalent\n\n"
            else:
                str_formatted += intro_str + "Not equivalent\n\n"

            str_formatted += "First address : " + self.first_address.raw_address + "\n"
            str_formatted += "and\n"
            str_formatted += "Second address: " + self.second_address.raw_address + "\n\n\n"
        str_formatted += "Probabilities of parsed tags for the addresses with " + self.origin[0] + ": \n\n"
        probs = list(self._get_probs().values())
        str_formatted += "Parsed address: " + repr(self.first_address) + "\n"
        str_formatted += str(probs[0]) + "\n"
        if not self.identical:
            str_formatted += "\nParsed address: " + repr(self.second_address) + "\n"
            str_formatted += str(probs[1]) + "\n"

            if self.equivalent:
                str_formatted += "\n\nRaw differences between the two addresses: \n"
                str_formatted += self._get_raw_diff_color()
            else:
                str_formatted += "\n\nAddresses tags differences between the two addresses: \n"
                str_formatted += self._get_tags_diff_color()

        return str_formatted