o
    _hW&                     @   s(  d Z ddlZddlmZ ddlmZ d ZZdZi Z	e
djZe
djZd	Zi Zzed
ZW n eyA   eejd ZY nw ee D ]!Zejdkriejdd Zeee< edd Zeevrieee< qHdd eD Zdd ZG dd dZG dd de Z!G dd dZ"G dd dZ#dS )zY

Pyphen
======

Pure Python module to hyphenate text, inspired by Ruby's Text::Hyphen.

    N)	resources)Pathz0.17.2)	LANGUAGESPyphenlanguage_fallbackz\^{2}([0-9a-f]{2})z
(\d?)(\D?))%#LEFTHYPHENMINRIGHTHYPHENMINCOMPOUNDLEFTHYPHENMINCOMPOUNDRIGHTHYPHENMINzpyphen.dictionariesdictionariesz.dic   _c                 C   s   i | ]}|  |qS  )lower).0namer   r   C/var/www/html/myenv/lib/python3.10/site-packages/pyphen/__init__.py
<dictcomp>,   s    r   c                 C   sH   |  dd d}|r"d|} | tv rt|  S |  |sdS dS )a	  Get a fallback language available in our dictionaries.

    http://www.unicode.org/reports/tr35/#Locale_Inheritance

    We use the normal truncation inheritance. This function needs aliases
    including scripts for languages with multiple regions available.

    -r   N)replacer   splitjoinLANGUAGES_LOWERCASEpop)languagepartsr   r   r   r   /   s   	
r   c                   @       e Zd ZdZdd Zdd ZdS )AlternativeParserzParser of nonstandard hyphen pattern alternative.

    The instance returns a special int with data about the current position in
    the pattern when called with an odd value.

    c                 C   sP   | d}|d | _t|d | _t|d | _|dr&|  jd7  _d S d S )N,r         .)r   changeintindexcut
startswith)selfpatternalternativer   r   r   __init__G   s   


zAlternativeParser.__init__c                 C   s8   |  j d8  _ t|}|d@ rt|| j| j | jfS |S )Nr"   )r'   r&   DataIntr%   r(   )r*   valuer   r   r   __call__O   s
   zAlternativeParser.__call__N)__name__
__module____qualname____doc__r-   r0   r   r   r   r   r    @   s    r    c                   @   s   e Zd ZdZdddZdS )r.   zE``int`` with some other data can be stuck to in a ``data`` attribute.Nc                 C   s0   t | |}|rt|tr|j|_|S ||_|S )zCreate a new ``DataInt``.

        Call with ``reference=dataint_object`` to use the data from another
        ``DataInt``.

        )r&   __new__
isinstancer.   data)clsr/   r7   	referenceobjr   r   r   r5   Z   s   zDataInt.__new__)NN)r1   r2   r3   r4   r5   r   r   r   r   r.   X   s    r.   c                   @   r   )HyphDictzHyphenation patterns.c           
         sn  i | _ |d}|  }W d   n1 sw   Y  | dkr&d}||ddd D ]t}| }|r?|t	r@q2t
dd |}d	|v r]d
|v r]|d	d\}}t|| nt t fddt|D  \}}t|dkruq2dt|}}	|| s|d7 }|| r||	d  s|	d8 }	||	d  r||||	 f| j d|< q2i | _tdd | j D | _dS )zhRead a ``hyph_*.dic`` and parse its patterns.

        :param path: Path of hyph_*.dic to read

        rbNzmicrosoft-cp1251cp1251
r"   c                 S   s   t t| ddS )Nr"      )chrr&   group)matchr   r   r   <lambda>   s    z#HyphDict.__init__.<locals>.<lambda>/=c                    s    g | ]\}}| |pd fqS )0r   )r   istringfactoryr   r   
<listcomp>   s    z%HyphDict.__init__.<locals>.<listcomp>r    c                 s   s    | ]}t |V  qd S )N)len)r   keyr   r   r   	<genexpr>   s    z$HyphDict.__init__.<locals>.<genexpr>)patternsopenreadlinedecoder   	read_textr   stripr)   ignored	parse_hexr    r&   zipparsemaxrM   r   cachemaxlen)
r*   pathfdencodingr+   r,   tagsvaluesstartendr   rI   r   r-   l   s@   zHyphDict.__init__c                 C   s   |  }| j|}|du rtd| d}dgt|d  }tt|d D ]@}t|| j t|d }t|d |D ]*}| j||| }|sIq:|\}	}
t||	 ||	 t|
 }t	t
|
|| ||< q:q%dd t|D  | j|< }|S )a  Get a list of positions where the word can be hyphenated.

        :param word: unicode string of the word to hyphenate

        E.g. for the dutch word 'lettergrepen' this method returns ``[3, 6,
        9]``.

        Each position is a ``DataInt`` with a data attribute.

        If the data attribute is not ``None``, it contains a tuple with
        information about nonstandard hyphenation at that point: ``(change,
        index, cut)``.

        change
          a string like ``'ff=f'``, that describes how hyphenation should
          take place.

        index
          where to substitute the change, counting from the current point

        cut
          how many characters to remove while substituting the nonstandard
          hyphenation

        Nr$   r   r"   c                 S   s(   g | ]\}}|d  rt |d |dqS )r#   r"   )r9   )r.   )r   rG   r9   r   r   r   rK      s
    z&HyphDict.positions.<locals>.<listcomp>)r   r[   getrM   rangeminr\   rP   slicemaprZ   	enumerate)r*   wordpointspointed_word
referencesrG   stopjr+   offsetra   slice_r   r   r   	positions   s&   zHyphDict.positionsN)r1   r2   r3   r4   r-   rr   r   r   r   r   r;   i   s    1r;   c                   @   sB   e Zd ZdZdddZdd Zd	d
 ZdddZdddZeZ	dS )r   zEHyphenation class, with methods to hyphenate strings in various ways.Nr#   Tc                 C   sL   ||| _ | _|rt|ntt| }|r|tvrt|t|< t| | _dS )a  Create an hyphenation instance for given lang or filename.

        :param filename: filename or Path of hyph_*.dic to read
        :param lang: lang of the included dict to use if no filename is given
        :param left: minimum number of characters of the first syllabe
        :param right: minimum number of characters of the last syllabe
        :param cache: if ``True``, use cached copy of the hyphenation patterns

        N)leftrightr   r   r   hdcacher;   hd)r*   filenamelangrs   rt   r[   r]   r   r   r   r-      s
   
zPyphen.__init__c                    s*   t |j   fddj|D S )zGet a list of positions where the word can be hyphenated.

        :param word: unicode string of the word to hyphenate

        See also ``HyphDict.positions``. The points that are too far to the
        left or right are removed.

        c                    s*   g | ]}j |  kr krn n|qS r   )rs   )r   rG   rt   r*   r   r   rK      s   * z$Pyphen.positions.<locals>.<listcomp>)rM   rt   rv   rr   )r*   rj   r   ry   r   rr      s   	zPyphen.positionsc                 c   s    t | |D ]?}|jr:|j\}}}||7 }| r| }|d\}}|d| | |||| d  fV  q|d| ||d fV  qdS )zIterate over all hyphenation possibilities, the longest first.

        :param word: unicode string of the word to hyphenate

        rE   N)reversedrr   r7   isupperupperr   )r*   rj   positionr%   r'   r(   c1c2r   r   r   iterate   s   (zPyphen.iterater   c                 C   s@   |t |8 }| |D ]\}}t ||kr|| |f  S qdS )a  Get the longest possible first part and the last part of a word.

        :param word: unicode string of the word to hyphenate
        :param width: maximum length of the first part
        :param hyphen: unicode string used as hyphen character

        The first part has the hyphen already attached.

        Returns ``None`` if there is no hyphenation point before ``width``, or
        if the word could not be hyphenated.

        N)rM   r   )r*   rj   widthhyphenw1w2r   r   r   wrap   s   zPyphen.wrapc                 C   sv   t |}t| |D ]*}|jr/|j\}}}||7 }| r"| }|d||||| < q||| qd|S )a  Get the word as a string with all the possible hyphens inserted.

        :param word: unicode string of the word to hyphenate
        :param hyphen: unicode string used as hyphen character

        E.g. for the dutch word ``'lettergrepen'``, this method returns the
        unicode string ``'let-ter-gre-pen'``. The hyphen string to use can be
        given as the second parameter, that defaults to ``'-'``.

        rE   rL   )	listrz   rr   r7   r{   r|   r   insertr   )r*   rj   r   lettersr}   r%   r'   r(   r   r   r   inserted  s   
zPyphen.inserted)NNr#   r#   T)r   )
r1   r2   r3   r4   r-   rr   r   r   r   r0   r   r   r   r   r      s    


r   )$r4   re	importlibr   pathlibr   VERSION__version____all__ru   compilesubrW   findallrY   rV   r   filesr   	TypeError__file__parentsortediterdirr]   suffixr   r   
short_namer   r   r    r&   r.   r;   r   r   r   r   r   <module>   s<    	
d