Source code for cnt.rulebase.rules.interval_based_operations.interval_based_collector

"""
Collect the unicode codepoint specified by intervals.
"""
from typing import cast

from cnt.rulebase import workflow
from cnt.rulebase.rules.interval_based_operations.basic_operation import (
        IntervalBasedOperationOutputGenerator,
        BasicIntervalBasedOperation,
)


#pylint: disable=W0223
class _IntervalBasedCollectorOutputGenerator(IntervalBasedOperationOutputGenerator):

    def _result(self) -> workflow.SegmentGeneratorType:
        for interval, label in self.continuous_intervals():
            if label:
                start, end = interval
                yield self.input_sequence[start:end], interval


[docs]class IntervalBasedCollectorOutputGeneratorLazy(_IntervalBasedCollectorOutputGenerator):
[docs] def result(self) -> workflow.SegmentGeneratorType: return self._result()
[docs]class IntervalBasedCollectorOutputGenerator(_IntervalBasedCollectorOutputGenerator):
[docs] def result(self) -> workflow.SegmentListType: return list(self._result())
[docs]class IntervalBasedCollectorLazy(BasicIntervalBasedOperation):
[docs] def initialize_output_generator_class(self) -> None: self._output_generator_class = IntervalBasedCollectorOutputGeneratorLazy
[docs] def result(self, text: str) -> workflow.SegmentGeneratorType: return cast(workflow.SegmentGeneratorType, self.interval_based_workflow.result(text))
[docs]class IntervalBasedCollector(BasicIntervalBasedOperation):
[docs] def initialize_output_generator_class(self) -> None: self._output_generator_class = IntervalBasedCollectorOutputGenerator
[docs] def result(self, text: str) -> workflow.SegmentListType: return cast(workflow.SegmentListType, self.interval_based_workflow.result(text))