Source code for pylangacq.objects

import dataclasses
from typing import Dict, List, Tuple, Union


[docs]@dataclasses.dataclass class Gra: """Grammatical relation of a word in an utterance. Attributes ---------- dep : int The position of the dependent (i.e., the word itself) in the utterance head : int The position of the head in the utterance rel : str Grammatical relation """ __slots__ = ("dep", "head", "rel") dep: int head: int rel: str
[docs]@dataclasses.dataclass class Token: """Token with attributes as parsed from a CHAT utterance. Attributes ---------- word : str Word form of the token pos : str Part-of-speech tag mor : str Morphological information gra : Gra Grammatical relation """ __slots__ = ("word", "pos", "mor", "gra") word: str pos: Union[str, None] mor: Union[str, None] gra: Union[Gra, None]
[docs]@dataclasses.dataclass class Utterance: """Utterance in a CHAT transcript data. Attributes ---------- participant : str Participant of the utterance, e.g., ``"CHI"``, ``"MOT"`` tokens : List[Token] List of tokens of the utterance time_marks : Tuple[int, int] If available from the CHAT data, these are the start and end times (in milliseconds) for a segment in a digitized video or audio file, e.g., ``(0, 1073)``, extracted from ``"·0_1073·"`` in the CHAT data. ``"·"`` is ASCII code 21 (0x15), for NAK (Negative Acknowledgment). tiers : Dict[str, str] This dictionary contains all the original, unparsed data from the utterance, including the transcribed utterance (signaled by ``*CHI:``, ``*MOT:`` etc in CHAT), common tiers such as %mor and %gra, as well as all other tiers associated with the utterance. This dictionary is useful to retrieve whatever information not readily handled by this package. """ __slots__ = ("participant", "tokens", "time_marks", "tiers") participant: str tokens: List[Token] time_marks: Union[Tuple[int, int], None] tiers: Dict[str, str]