Source code for goma.prioritymatch

# -*- coding: utf-8 -*-

# goma
# ----
# Generic object mapping algorithm.
# 
# Author:   sonntagsgesicht, based on a fork of Deutsche Postbank [pbrisk]
# Version:  0.2, copyright Wednesday, 18 September 2019
# Website:  https://github.com/sonntagsgesicht/goma
# License:  Apache License 2.0 (see LICENSE file)


from .basematch import BaseMatch
from .exactmatch import ExactMatch


[docs]class PriorityMatch(BaseMatch): """ Priority Match class """ def __init__(self): self.exact_match = ExactMatch()
[docs] def match(self, match_details, mapping_list, start_col=0): """ matching based on prioritizing entries in the right column of the mapping list Parameters: match_details (list): holds the information based on which the mapping should be conducted, a row entry is structured as [Detail ,Value] mapping_list (list): hold the mapping information, the first row describes the properties on which mapping should be conducted and a column named target start_col (int): starting column for the matching algorithms of the mapping_list Return: string: target value which has been matched The priority match uses a given mapping_list, e.g +-------------------------+------------+----------+ | Property1 | Property2 | Property3 | Target | +============+============+============+==========+ | Value1_1 | | Value3_1 | Target1 | +------------+------------+------------+----------+ | Value1_1 | | Value3_2 | Target2 | +------------+------------+------------+----------+ | | Value2_1 | Value3_3 | Target3 | +------------+------------+------------+----------+ | | Value2_1 | Value3_4 | Target4 | +------------+------------+------------+----------+ Given the above tables, the matching searches column by column, starting on the left hand side (highest priority columns), for the respective mapping list. For a given list of match_details, e.g. +------------+-----------+ | Property1 | Value1_2 | +------------+-----------+ | Property2 | Value2_1 | +------------+-----------+ | Property3 | Value3_3 | +------------+-----------+ the match returns in a first step a 2x4 matrix +------------+------------+------------+----------+ | | Value2_1 | Value3_3 | Target3 | +------------+------------+------------+----------+ | | Value2_1 | Value3_4 | Target4 | +------------+------------+------------+----------+ As a second step, the empty columns are removed and the exact match algorithm is applied to the remaining properties to find the target, which for the given example yields Target3. If for column no matching is found it continues recursively at the next column and searches for the respective property, i.e Property3. """ match = False rows = [] mapping_list_cols = list(mapping_list[0]) target_col = mapping_list_cols.index('Target') # todo add some description on what is going on here for j, attr in enumerate(mapping_list_cols[start_col:-1]): for i in range(1, len(mapping_list)): detail_value = None for c in range(0, len(match_details)): if match_details[c][0] == attr: detail_value = match_details[c][1] break if detail_value != '': if self._has_relation_operator(mapping_list[i][j]): if self._apply_relation_match(mapping_list[i][j], detail_value): rows.append(i) match = True else: match = False elif type(detail_value)(mapping_list[i][j]) == detail_value: rows.append(i) match = True else: match = False else: match = False new_mapping_list = list() new_mapping_list.append(mapping_list_cols[start_col:]) new_mapping_list.extend([mapping_list[i][start_col:] for i in rows]) # todo add some description on what is going on here if len(new_mapping_list[:-1]) == 1: match_obj = new_mapping_list[len(new_mapping_list) - 1][target_col] else: empty_cols = [[i for i, str in enumerate(j[:]) if str == ''] for j in new_mapping_list[1:]] unique_empty_cols = set([item for sublist in empty_cols for item in sublist]) n = 0 # todo add some description on what is going on here for i, j in enumerate(unique_empty_cols): if i == 0: n = 0 [[new_mapping_list[r].pop(j)] for r in range(0, len(new_mapping_list))] else: n += 1 [[new_mapping_list[r].pop(j - n)] for r in range(0, len(new_mapping_list))] match_obj = self.exact_match.match(match_details, new_mapping_list) if not match_obj: new_mapping_list_cols = [map_col for map_col in new_mapping_list[0]] start_col = [i for i, name in enumerate(mapping_list_cols) if name == new_mapping_list_cols[0]][0] + 1 if start_col < len(mapping_list_cols[:-1]): match_obj = self.match(match_details, mapping_list, start_col) return match_obj