o
    Rŀg_/                     @   sJ   d Z ddlmZ G dd dejeZG dd deZddd	Zd
d ZdS )zParsing TRANSFAC files.    )motifsc                       s0   e Zd ZdZh dZh dZ fddZ  ZS )Motifa  Store the information for one TRANSFAC motif.

    This class inherits from the Bio.motifs.Motif base class, as well
    as from a Python dictionary. All motif information found by the parser
    is stored as attributes of the base class when possible; see the
    Bio.motifs.Motif base class for a description of these attributes. All
    other information associated with the motif is stored as (key, value)
    pairs in the dictionary, where the key is the two-letter fields as found
    in the TRANSFAC file. References are an exception: These are stored in
    the .references attribute.

    These fields are commonly found in TRANSFAC files::

        AC:    Accession number
        AS:    Accession numbers, secondary
        BA:    Statistical basis
        BF:    Binding factors
        BS:    Factor binding sites underlying the matrix
               [sequence; SITE accession number; start position for matrix
               sequence; length of sequence used; number of gaps inserted;
               strand orientation.]
        CC:    Comments
        CO:    Copyright notice
        DE:    Short factor description
        DR:    External databases
               [database name: database accession number]
        DT:    Date created/updated
        HC:    Subfamilies
        HP:    Superfamilies
        ID:    Identifier
        NA:    Name of the binding factor
        OC:    Taxonomic classification
        OS:    Species/Taxon
        OV:    Older version
        PV:    Preferred version
        TY:    Type
        XX:    Empty line; these are not stored in the Record.

    References are stored in an .references attribute, which is a list of
    dictionaries with the following keys::

        RN:    Reference number
        RA:    Reference authors
        RL:    Reference data
        RT:    Reference title
        RX:    PubMed ID

    For more information, see the TRANSFAC documentation.
    >   BFBSCCDRDTHCHPOV>   RARLRTRXc                    s:   z	t  |}W |S  ty   t tj| |}Y |S w )N)super__getitem__	TypeErrorr   r   )selfkeyvalue	__class__ G/var/www/html/myenv/lib/python3.10/site-packages/Bio/motifs/transfac.pyr   F   s   zMotif.__getitem__)__name__
__module____qualname____doc__multiple_value_keysreference_keysr   __classcell__r   r   r   r   r      s
    2r   c                   @   s    e Zd ZdZdd Zdd ZdS )RecordzStore the information in a TRANSFAC matrix table.

    The record inherits from a list containing the individual motifs.

    Attributes:
     - version - The version number, corresponding to the 'VV' field
       in the TRANSFAC file;

    c                 C   s
   d| _ dS )zInitialize the class.N)versionr   r   r   r   __init__Y   s   
zRecord.__init__c                 C   s   t | S )z'Turn the TRANSFAC matrix into a string.)writer#   r   r   r   __str__]   s   zRecord.__str__N)r   r   r   r   r$   r&   r   r   r   r   r!   N   s    
r!   Tc              
   C   s  i }g }d}t  }| D ]}| }|sq|dd}|d  }|r1t|dkr1td| dt|dkrN|d  }	|rN|dd sNtd| d|d	krV|	|_n|d
v r7i }|	 dd g dkrstd| d| d}
dD ]}g ||< qw| D ]}| }|dd}|d  }t|dkr|d  }	|r|dd std| dzt|}W n
 ty   Y  nxw |
dkr|dkr|rtd| n|
d7 }
||
krtd| d|rt|dkrtd|dd|dd|ddt|dkrtd| d|	 dd }t|dkr"td| dtd|D ]\}}|| 	t
| q'q|dkr=q|dkr|	d\}}}|d dkr\td| d | d|d! d"krntd| d#| dt|dd! }t||d krtd|dd$t|d dd|dd||	i}|	| q|d%kr|durtd|d&}|| ||_|	| i }g }q|tjv r|	||< q|tjv r||vrg ||< || 	|	 q|	||< q|S )'z4Parse a transfac format handle into a Record object.N   r      zAThe key value of a TRANSFAC motif line should have 2 characters:""  zKA TRANSFAC motif line should have 2 spaces between key and value columns: "VV)P0PO   )ACGTzA TRANSFAC matrix "z(" line should be followed by "A C G T": ACGTzaA TRANSFAC matrix should start with "01" as first row of the matrix, but this matrix uses "00": "zKThe TRANSFAC matrix row number does not match the position in the matrix: "zLA TRANSFAC matrix line should have a 2 digit key at the start of the line ("02dz"), but this matrix uses "dz": "sz".z7A TRANSFAC matrix line should have a key and a value: "zQA TRANSFAC matrix line should have a value for each nucleotide (A, C, G and T): "XXRN;[zThe index "z2" in a TRANSFAC RN line should start with a "[": "]z0" in a TRANSFAC RN line should end with a "]": "zP" of the TRANSFAC RN line does not match the current number of seen references "//)alphabetcounts)r!   stripsplitlen
ValueError	partitionr"   intzipappendfloatr   update
referencesr   r   )handlestrictannotationsrJ   r?   recordline	key_valuer   r   lengthcivaluesvindex	separator	accession	referencemotifr   r   r   readb   s  












r[   c                    sF  g }z| j }W n	 ty   Y nw |durd| }|| tj}d}| D ]g }|D ]}d}|D ]}	|	dkrj}
|
dkr?q1j}tj}d	dg| }|| t
|
D ]. d	d	gd
d |D  d }|t d g fdd|D  |  g  }|| qXd}n;z|	}W n ty   d}Y nw |dur|	|v r|D ]}|	 d| }|| qn|	 d| }|| d}|	dkrzj}W n	 ty   Y q1w d}|D ]}|D ]}	||	}|du rq|	 d| }|| d}qqq1|rd}|| q+d}|| d	|d }|| q%d	|}|S )z7Write the representation of a motif in TRANSFAC format.NzVV  %s
XX
//
))ACAS)ID)r   CO)NA)DE)TY)OSOC)r
   r	   )r   )r,   )BA)r   )r   )r   )r   PVFr,   r   z       z%02.dc                 S   s   g | ]}d qS )z%6.20gr   .0_r   r   r   
<listcomp>  s    zwrite.<locals>.<listcomp>z      %sr'   c                    s   g | ]	}j |   qS r   )r?   rh   rS   rZ   r   r   rk     s    Tr*   rf   )r8   r   r   r   r   r7   r=   
 )r"   AttributeErrorrG   r   r   rQ   degenerate_consensussortedr>   joinrangetuplegetrJ   )r   blocksr"   blockr   sectionslinessectionblankr   rQ   sequencelettersrO   r   rU   rJ   keysrY   textr   rl   r   r%      s   










r%   N)T)	r   Bior   r   dictlistr!   r[   r%   r   r   r   r   <module>   s   A
 