o
    Rŀg                     @   s\   d Z ddlZdd Zdd Ze Zdd Zd	d
 Zdd Zedkr,ddl	m
Z
 e
  dS dS )z3Functions to calculate assorted sequence checksums.    Nc                 C   s2   zt | }W n ty   |  }Y nw t|S )zReturn the crc32 checksum for a sequence (string or Seq object).

    Note that the case is important:

    >>> crc32("ACGTACGTACGT")
    20049947
    >>> crc32("acgtACGTacgt")
    1688586483

    )bytes	TypeErrorencodebinasciicrc32)seqs r	   I/var/www/html/myenv/lib/python3.10/site-packages/Bio/SeqUtils/CheckSum.pyr      s   
r   c                  C   sj   g } t dD ],}|}d}t dD ]}|d@ }|dL }|d@ r"|dO }|dL }|r,|dN }q| | q| S )N   r         l        l      0 )rangeappend)_table_hipart_lpart_hjrflagr	   r	   r
   _init_table_h$   s   r   c                 C   sd   d}d}| D ]"}|d@ d> }|d? }|d? |B }|t |A d@ }|t| A }|}qd|d|dS )zReturn the crc64 checksum for a sequence (string or Seq object).

    Note that the case is important:

    >>> crc64("ACGTACGTACGT")
    'CRC-C4FBB762C4A87EBD'
    >>> crc64("acgtACGTacgt")
    'CRC-DA4509DC64A87EBD'

    r         r   zCRC-08X)ordr   )r   crclcrchcshrtemp1htemp1lidxr	   r	   r
   crc649   s   r"   c                 C   sB   d }}| D ]}|d7 }||t |  7 }|dkrd}q|d S )a  Return the GCG checksum (int) for a sequence (string or Seq object).

    Given a nucleotide or amino-acid sequence (or any string),
    returns the GCG checksum (int). Checksum used by GCG program.
    seq type = str.

    Based on BioPerl GCG_checksum. Adapted by Sebastian Bassi
    with the help of John Lenton, Pablo Ziliani, and Gabriel Genellina.

    All sequences are converted to uppercase.

    >>> gcg("ACGTACGTACGT")
    5688
    >>> gcg("acgtACGTacgt")
    5688

    r   r   9   i'  )r   upper)r   indexchecksumcharr	   r	   r
   gcgQ   s   r(   c                 C   sr   ddl }ddl}| }zt| } W n ty   |  } Y nw ||   ||	 }|
 dddS )a  Return the SEGUID (string) for a sequence (string or Seq object).

    Given a nucleotide or amino-acid sequence (or any string),
    returns the SEGUID string (A SEquence Globally Unique IDentifier).
    seq type = str.

    Note that the case is not important:

    >>> seguid("ACGTACGTACGT")
    'If6HIvcnRSQDVNiAoefAzySc6i4'
    >>> seguid("acgtACGTacgt")
    'If6HIvcnRSQDVNiAoefAzySc6i4'

    For more information about SEGUID, see:
    http://bioinformatics.anl.gov/seguid/
    https://doi.org/10.1002/pmic.200600032
    r   N
 =)base64hashlibsha1r   r   r   updater$   encodebytesdigestdecodereplacerstrip)r   r,   r-   mtmpr	   r	   r
   seguidl   s   r7   __main__)run_doctest)__doc__r   r   r   r   r"   r(   r7   __name__
Bio._utilsr9   r	   r	   r	   r
   <module>   s   !
