cnt.rulebase.rules.interval_based_operations package

Submodules

cnt.rulebase.rules.interval_based_operations.basic_operation module

Collect the unicode codepoint specified by intervals.

class cnt.rulebase.rules.interval_based_operations.basic_operation.BasicIntervalBasedOperation(intervals)[source]

Bases: object

initialize_output_generator_class()[source]

Derived class should override this method by initializing self._output_generator_class.

Return type

None

class cnt.rulebase.rules.interval_based_operations.basic_operation.IntervalBasedOperationLabelProcessor(input_sequence, index_labels_generator, config)[source]

Bases: cnt.rulebase.workflow.basic_workflow.BasicLabelProcessor

result()[source]

Label processor could generate any return type. Derived class must override this method.

Return type

Generator[Tuple[int, bool], None, None]

class cnt.rulebase.rules.interval_based_operations.basic_operation.IntervalBasedOperationOutputGenerator(input_sequence, label_processor_result, config)[source]

Bases: cnt.rulebase.workflow.basic_workflow.BasicOutputGenerator

continuous_intervals()[source]
Return type

Generator[Tuple[Tuple[int, int], bool], None, None]

class cnt.rulebase.rules.interval_based_operations.basic_operation.IntervalsCollectionBasedOperation(intervals_collection)[source]

Bases: object

initialize_label_processor_class()[source]

Derived class should override this method by initializing self._label_processor_class.

Return type

None

initialize_output_generator_class()[source]

Derived class should override this method by initializing self._output_generator_class.

Return type

None

cnt.rulebase.rules.interval_based_operations.builtin_application module

TODO

class cnt.rulebase.rules.interval_based_operations.builtin_application.BuiltInCollector[source]

Bases: object

chinese_chars = <cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollector object>
chinese_chars_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollectorLazy object>
chinese_sentence_chars = <cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollector object>
chinese_sentence_chars_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollectorLazy object>
delimiters = <cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollector object>
delimiters_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollectorLazy object>
digits = <cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollector object>
digits_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollectorLazy object>
english_chars = <cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollector object>
english_chars_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollectorLazy object>
classmethod generate_collector(intervals_collection)[source]
Return type

IntervalBasedCollector

classmethod generate_collector_lazy(intervals_collection)[source]
Return type

IntervalBasedCollectorLazy

classmethod setup_collector(name, intervals_collection)[source]
Return type

None

class cnt.rulebase.rules.interval_based_operations.builtin_application.BuiltInReplacer[source]

Bases: object

REGISTERED_REPL_KEY: Dict[str, Callable[Callable[str, str]]] = {'empty': <function BuiltInReplacer.<lambda>>, 'space': <function BuiltInReplacer.<lambda>>, 'tab': <function BuiltInReplacer.<lambda>>}
chinese_chars = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacer object>
chinese_chars_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerLazy object>
chinese_chars_spaced = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacer object>
chinese_chars_spaced_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerLazy object>
chinese_chars_spaced_to_string = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerToString object>
chinese_chars_to_string = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerToString object>
delimiters = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacer object>
delimiters_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerLazy object>
delimiters_spaced = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacer object>
delimiters_spaced_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerLazy object>
delimiters_spaced_to_string = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerToString object>
delimiters_to_string = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerToString object>
digits = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacer object>
digits_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerLazy object>
digits_spaced = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacer object>
digits_spaced_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerLazy object>
digits_spaced_to_string = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerToString object>
digits_to_string = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerToString object>
english_chars = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacer object>
english_chars_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerLazy object>
english_chars_spaced = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacer object>
english_chars_spaced_lazy = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerLazy object>
english_chars_spaced_to_string = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerToString object>
english_chars_to_string = <cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerToString object>
classmethod generate_param(repl_with_intervals_collection)[source]
Return type

Dict[Callable[[str], str], List[Tuple[int, int]]]

classmethod generate_replacer(repl_with_intervals_collection)[source]
Return type

IntervalsCollectionBasedReplacer

classmethod generate_replacer_lazy(repl_with_intervals_collection)[source]
Return type

IntervalsCollectionBasedReplacerLazy

classmethod generate_replacer_to_string(repl_with_intervals_collection)[source]
Return type

IntervalsCollectionBasedReplacerToString

classmethod setup_replacer(name, repl, intervals_collection)[source]
Return type

None

cnt.rulebase.rules.interval_based_operations.interval_based_collector module

Collect the unicode codepoint specified by intervals.

class cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollector(intervals)[source]

Bases: cnt.rulebase.rules.interval_based_operations.basic_operation.BasicIntervalBasedOperation

initialize_output_generator_class()[source]

Derived class should override this method by initializing self._output_generator_class.

Return type

None

result(text)[source]
Return type

List[Tuple[str, Tuple[int, int]]]

class cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollectorLazy(intervals)[source]

Bases: cnt.rulebase.rules.interval_based_operations.basic_operation.BasicIntervalBasedOperation

initialize_output_generator_class()[source]

Derived class should override this method by initializing self._output_generator_class.

Return type

None

result(text)[source]
Return type

Generator[Tuple[str, Tuple[int, int]], None, None]

class cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollectorOutputGenerator(input_sequence, label_processor_result, config)[source]

Bases: cnt.rulebase.rules.interval_based_operations.interval_based_collector._IntervalBasedCollectorOutputGenerator

result()[source]

Output generator could generate any return type. Derived class must override this method.

Return type

List[Tuple[str, Tuple[int, int]]]

class cnt.rulebase.rules.interval_based_operations.interval_based_collector.IntervalBasedCollectorOutputGeneratorLazy(input_sequence, label_processor_result, config)[source]

Bases: cnt.rulebase.rules.interval_based_operations.interval_based_collector._IntervalBasedCollectorOutputGenerator

result()[source]

Output generator could generate any return type. Derived class must override this method.

Return type

Generator[Tuple[str, Tuple[int, int]], None, None]

cnt.rulebase.rules.interval_based_operations.interval_based_replacer module

Replace the unicode codepoint specified by intervals with arbitary strings.

class cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacer(replacer_intervals)[source]

Bases: cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerOperation

initialize_output_generator_class()[source]

Derived class should override this method by initializing self._output_generator_class.

Return type

None

result(text)[source]
Return type

List[Tuple[str, Tuple[Tuple[int, int], Tuple[int, int], bool]]]

class cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerConfig(labeler2repl)[source]

Bases: cnt.rulebase.workflow.basic_workflow.BasicConfig

class cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerLabelProcessor(input_sequence, index_labels_generator, config)[source]

Bases: cnt.rulebase.workflow.basic_workflow.BasicLabelProcessor

result()[source]

Label processor could generate any return type. Derived class must override this method.

Return type

Generator[Tuple[int, Optional[Type[IntervalLabeler]]], None, None]

class cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerLazy(replacer_intervals)[source]

Bases: cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerOperation

initialize_output_generator_class()[source]

Derived class should override this method by initializing self._output_generator_class.

Return type

None

result(text)[source]
Return type

Generator[Tuple[str, Tuple[Tuple[int, int], Tuple[int, int], bool]], None, None]

class cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerOperation(replacer_intervals)[source]

Bases: cnt.rulebase.rules.interval_based_operations.basic_operation.IntervalsCollectionBasedOperation

initialize_label_processor_class()[source]

Derived class should override this method by initializing self._label_processor_class.

Return type

None

class cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerOutputGenerator(input_sequence, label_processor_result, config)[source]

Bases: cnt.rulebase.rules.interval_based_operations.interval_based_replacer._IntervalsCollectionBasedReplacerOutputGenerator

result()[source]

Output generator could generate any return type. Derived class must override this method.

Return type

List[Tuple[str, Tuple[Tuple[int, int], Tuple[int, int], bool]]]

class cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerOutputGeneratorLazy(input_sequence, label_processor_result, config)[source]

Bases: cnt.rulebase.rules.interval_based_operations.interval_based_replacer._IntervalsCollectionBasedReplacerOutputGenerator

result()[source]

Output generator could generate any return type. Derived class must override this method.

Return type

Generator[Tuple[str, Tuple[Tuple[int, int], Tuple[int, int], bool]], None, None]

class cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerToString(replacer_intervals)[source]

Bases: cnt.rulebase.rules.interval_based_operations.interval_based_replacer.IntervalsCollectionBasedReplacerOperation

initialize_output_generator_class()[source]

Derived class should override this method by initializing self._output_generator_class.

Return type

None

result(text)[source]
Return type

str

Module contents

Collect or replace any unicode codepoint intervals.