ja_sentence_segmenter.concatenate.simple_concatenator
Simple sentence concatenator for japanese text.
1"""Simple sentence concatenator for japanese text.""" 2import re 3from typing import Generator, Iterator, List, Optional, Union, overload 4 5 6def __concatenate_matching_iter( 7 texts: Iterator[str], former_matching_rule: Optional[str], latter_matching_rule: Optional[str], remove_former_matched: bool, remove_latter_matched: bool 8) -> Generator[str, None, None]: 9 try: 10 former = next(texts) 11 12 for latter in texts: 13 former_match_obj = re.match(former_matching_rule, former) if former_matching_rule else None 14 latter_match_obj = re.match(latter_matching_rule, latter) if latter_matching_rule else None 15 16 if former_matching_rule and latter_matching_rule and former_match_obj and latter_match_obj: 17 tmp_former = former_match_obj.group("result") if remove_former_matched else former 18 tmp_latter = latter_match_obj.group("result") if remove_latter_matched else latter 19 former = tmp_former + tmp_latter 20 elif former_matching_rule and not latter_matching_rule and former_match_obj: 21 tmp_former = former_match_obj.group("result") if remove_former_matched else former 22 former = tmp_former + latter 23 elif not former_matching_rule and latter_matching_rule and latter_match_obj: 24 tmp_latter = latter_match_obj.group("result") if remove_latter_matched else latter 25 former += tmp_latter 26 else: 27 yield former 28 former = latter 29 30 yield former 31 except StopIteration: 32 pass 33 34 35@overload 36def concatenate_matching( 37 arg: List[str], 38 former_matching_rule: Optional[str] = None, 39 latter_matching_rule: Optional[str] = None, 40 remove_former_matched: bool = True, 41 remove_latter_matched: bool = True, 42) -> Generator[str, None, None]: 43 ... 44 45 46@overload 47def concatenate_matching( 48 arg: Iterator[str], 49 former_matching_rule: Optional[str] = None, 50 latter_matching_rule: Optional[str] = None, 51 remove_former_matched: bool = True, 52 remove_latter_matched: bool = True, 53) -> Generator[str, None, None]: 54 ... 55 56 57def concatenate_matching( 58 arg: Union[str, List[str], Iterator[str]], 59 former_matching_rule: Optional[str] = None, 60 latter_matching_rule: Optional[str] = None, 61 remove_former_matched: bool = True, 62 remove_latter_matched: bool = True, 63) -> Generator[str, None, None]: 64 r"""Concatenate two lines with regular expression rule. 65 66 Parameters 67 ---------- 68 arg : Union[str, List[str], Iterator[str]] 69 texts you want to concatenate. 70 former_matching_rule : Optional[str], optional 71 regular expression for former line, by default None 72 latter_matching_rule : Optional[str], optional 73 regular expression for latter line, by default None 74 remove_former_matched : bool, optional 75 whether to remove matched place of former line, by default True. 76 if this is True, former_matching_rule must contain named group 'result', 77 only that group remains. 78 e.g. r"^(\s*[>]+\s*)(?P<result>.+)$" 79 remove_latter_matched : bool, optional 80 whether to remove matched place of latter line, by default True. 81 if this is True, latter_matching_rule must contain named group 'result', 82 only that group remains. 83 e.g. r"^(\s*[>]+\s*)(?P<result>.+)$" 84 85 Yields 86 ------ 87 Generator[str, None, None] 88 concatenated texts. 89 """ 90 if isinstance(arg, list): 91 yield from __concatenate_matching_iter(iter(arg), former_matching_rule, latter_matching_rule, remove_former_matched, remove_latter_matched) 92 elif isinstance(arg, Iterator): 93 yield from __concatenate_matching_iter(arg, former_matching_rule, latter_matching_rule, remove_former_matched, remove_latter_matched)
def
concatenate_matching( arg: Union[str, List[str], Iterator[str]], former_matching_rule: Optional[str] = None, latter_matching_rule: Optional[str] = None, remove_former_matched: bool = True, remove_latter_matched: bool = True) -> Generator[str, NoneType, NoneType]:
58def concatenate_matching( 59 arg: Union[str, List[str], Iterator[str]], 60 former_matching_rule: Optional[str] = None, 61 latter_matching_rule: Optional[str] = None, 62 remove_former_matched: bool = True, 63 remove_latter_matched: bool = True, 64) -> Generator[str, None, None]: 65 r"""Concatenate two lines with regular expression rule. 66 67 Parameters 68 ---------- 69 arg : Union[str, List[str], Iterator[str]] 70 texts you want to concatenate. 71 former_matching_rule : Optional[str], optional 72 regular expression for former line, by default None 73 latter_matching_rule : Optional[str], optional 74 regular expression for latter line, by default None 75 remove_former_matched : bool, optional 76 whether to remove matched place of former line, by default True. 77 if this is True, former_matching_rule must contain named group 'result', 78 only that group remains. 79 e.g. r"^(\s*[>]+\s*)(?P<result>.+)$" 80 remove_latter_matched : bool, optional 81 whether to remove matched place of latter line, by default True. 82 if this is True, latter_matching_rule must contain named group 'result', 83 only that group remains. 84 e.g. r"^(\s*[>]+\s*)(?P<result>.+)$" 85 86 Yields 87 ------ 88 Generator[str, None, None] 89 concatenated texts. 90 """ 91 if isinstance(arg, list): 92 yield from __concatenate_matching_iter(iter(arg), former_matching_rule, latter_matching_rule, remove_former_matched, remove_latter_matched) 93 elif isinstance(arg, Iterator): 94 yield from __concatenate_matching_iter(arg, former_matching_rule, latter_matching_rule, remove_former_matched, remove_latter_matched)
Concatenate two lines with regular expression rule.
Parameters
- arg (Union[str, List[str], Iterator[str]]): texts you want to concatenate.
- former_matching_rule (Optional[str], optional): regular expression for former line, by default None
- latter_matching_rule (Optional[str], optional): regular expression for latter line, by default None
- remove_former_matched (bool, optional):
whether to remove matched place of former line, by default True.
if this is True, former_matching_rule must contain named group 'result',
only that group remains.
e.g. r"^(\s[>]+\s)(?P
.+)$" - remove_latter_matched (bool, optional):
whether to remove matched place of latter line, by default True.
if this is True, latter_matching_rule must contain named group 'result',
only that group remains.
e.g. r"^(\s[>]+\s)(?P
.+)$"
Yields
- Generator[str, None, None]: concatenated texts.