o
    Rŀg6                     @   s  U d Z ddlZddlZddlmZ ddlmZ ddlmZ dZ	da
dai aeeef ed< i Zeeef ed< g aee ed	< d
d Zdd Zdd Zdd Zdd Zd$ddZdd Zd%ddZd&ddZdd Zed'dd Zde_ed!krdd"lm Z  e dd# dS dS )(a  Provides code to access the TogoWS integrated websevices of DBCLS, Japan.

This module aims to make the TogoWS (from DBCLS, Japan) easier to use. See:
http://togows.dbcls.jp/

The TogoWS REST service provides simple access to a range of databases, acting
as a proxy to shield you from all the different provider APIs. This works using
simple URLs (which this module will construct for you). For more details, see
http://togows.dbcls.jp/site/en/rest.html

The functionality is somewhat similar to Biopython's Bio.Entrez module which
provides access to the NCBI's Entrez Utilities (E-Utils) which also covers a
wide range of databases.

Currently TogoWS does not provide any usage guidelines (unlike the NCBI whose
requirements are reasonably clear). To avoid risking overloading the service,
Biopython will only allow three calls per second.

The TogoWS SOAP service offers a more complex API for calling web services
(essentially calling remote functions) provided by DDBJ, KEGG and PDBj. For
example, this allows you to run a remote BLAST search at the DDBJ. This is
not yet covered by this module, however there are lots of Python examples
on the TogoWS website using the SOAPpy python library. See:
http://togows.dbcls.jp/site/en/soap.html
http://soapy.sourceforge.net/
    N)quote)urlopen)function_with_previouszhttp://togows.dbcls.jp_entry_db_fields_entry_db_formats_convert_formatsc                 C   s$   t | }|   }|  |S )z=Query a TogoWS URL for a plain text list of values (PRIVATE).)_openreadstripsplitclose)urlhandlefields r   G/var/www/html/myenv/lib/python3.10/site-packages/Bio/TogoWS/__init__.py_get_fields4   s   r   c                   C   s   t td S )Nz/entryr   	_BASE_URLr   r   r   r   _get_entry_dbs<   s   r   c                 C      t td|  d S )N/entry/z?fieldsr   dbr   r   r   _get_entry_fields@      r   c                 C   r   )Nr   z?formatsr   r   r   r   r   _get_entry_formatsD   r   r   c                   C   s   dd t td D S )Nc                 S   s   g | ]}| d qS ).)r   .0pairr   r   r   
<listcomp>I   s    z(_get_convert_formats.<locals>.<listcomp>	/convert/r   r   r   r   r   _get_convert_formatsH   s   r#   c                 C   sX  t du rt a | t vrtd|  d|rVzt|  }W n ty,   t| }|t| < Y nw | dkrD|dkrDd|v rDd}ddl}|d ||vrVtd	|| d
t	|f |rzt
|  }W n tyo   t| }|t
| < Y nw ||vrtd|| d
t	|f t|trd|}td|  dt|  }|r|d| 7 }|r|d| 7 }t|S )ab  Call TogoWS 'entry' to fetch a record.

    Arguments:
     - db - database (string), see list below.
     - id - identifier (string) or a list of identifiers (either as a list of
       strings or a single string with comma separators).
     - format - return data file format (string), options depend on the database
       e.g. "xml", "json", "gff", "fasta", "ttl" (RDF Turtle)
     - field - specific field from within the database record (string)
       e.g. "au" or "authors" for pubmed.

    At the time of writing, this includes the following::

        KEGG: compound, drug, enzyme, genes, glycan, orthology, reaction,
              module, pathway
        DDBj: ddbj, dad, pdb
        NCBI: nuccore, nucest, nucgss, nucleotide, protein, gene, onim,
              homologue, snp, mesh, pubmed
        EBI:  embl, uniprot, uniparc, uniref100, uniref90, uniref50

    For the current list, please see http://togows.dbcls.jp/entry/

    This function is essentially equivalent to the NCBI Entrez service
    EFetch, available in Biopython as Bio.Entrez.efetch(...), but that
    does not offer field extraction.
    Nz9TogoWS entry fetch does not officially support database 'z'.pubmedtititler   zETogoWS dropped 'pubmed' field alias 'ti', please use 'title' instead.zUTogoWS entry fetch does not explicitly support field '%s' for database '%s'. Only: %sz, zVTogoWS entry fetch does not explicitly support format '%s' for database '%s'. Only: %s,r   /r   )_entry_db_namesr   
ValueErrorr   KeyErrorr   warningswarnjoinsortedr   r   
isinstancelistr   r   r   )r   idformatfieldr   r,   formatsr   r   r   r   entryL   s\   


r6   c                 C   s   t du r
ttd a | t vrddl}|d| tf  td|  dt| d }t|}| }|  |s=t	d| zt
| W S  t	yV   t	d	| d
|dw )a  Call TogoWS search count to see how many matches a search gives.

    Arguments:
     - db - database (string), see http://togows.dbcls.jp/search
     - query - search term (string)

    You could then use the count to download a large set of search results in
    batches using the offset and limit options to Bio.TogoWS.search(). In
    general however the Bio.TogoWS.search_iter() function is simpler to use.
    N/searchr   zTTogoWS search does not officially support database '%s'. See %s/search/ for options./search/r(   z/countz!TogoWS returned no data from URL zExpected an integer from URL z, got: )_search_db_namesr   r   r,   r-   r   r   r	   r   r*   intr
   )r   queryr,   r   r   datar   r   r   search_count   s(   r=   d   c           
      c   s    t | |}|s
dS |}|durt||}d}g }|rgt||}t| |||   }t||ks=J dt||f ||krEtd|D ]}	|	|v rUtd|	 d|	V  qG||7 }||8 }|}|sdS dS )a  Call TogoWS search iterating over the results (generator function).

    Arguments:
     - db - database (string), see http://togows.dbcls.jp/search
     - query - search term (string)
     - limit - optional upper bound on number of search results
     - batch - number of search results to pull back each time talk to
       TogoWS (currently limited to 100).

    You would use this function within a for loop, e.g.

    >>> from Bio import TogoWS
    >>> for id in TogoWS.search_iter("pubmed", "diabetes+human", limit=10):
    ...     print("PubMed ID: %s" %id) # maybe fetch data with entry?
    PubMed ID: ...

    Internally this first calls the Bio.TogoWS.search_count() and then
    uses Bio.TogoWS.search() to get the results in batches.
    N   zGot %i, expected %iz'Same search results for previous offsetzResult z was in previous batch)r=   minsearchr	   r
   r   lenRuntimeError)
r   r;   limitbatchcountremainoffsetprev_idsids
identifierr   r   r   search_iter   s.   


 rL   c                 C   s   t du r
ttd a | t vrddl}|d| tf  td|  dt|  }|durx|durxzt|}W n tyD   td|dw zt|}W n tyZ   td|dw |dkretd	| |dkrotd
| |d||f 7 }n|dus|durtd|r|d| 7 }t|S )a  Call TogoWS search.

    This is a low level wrapper for the TogoWS search function, which
    can return results in a several formats. In general, the search_iter
    function is more suitable for end users.

    Arguments:
     - db - database (string), see http://togows.dbcls.jp/search/
     - query - search term (string)
     - offset, limit - optional integers specifying which result to start from
       (1 based) and the number of results to return.
     - format - return data file format (string), e.g. "json", "ttl" (RDF)
       By default plain text is returned, one result per line.

    At the time of writing, TogoWS applies a default count limit of 100
    search results, and this is an upper bound. To access more results,
    use the offset argument or the search_iter(...) function.

    TogoWS supports a long list of databases, including many from the NCBI
    (e.g. "ncbi-pubmed" or "pubmed", "ncbi-genbank" or "genbank", and
    "ncbi-taxonomy"), EBI (e.g. "ebi-ebml" or "embl", "ebi-uniprot" or
    "uniprot, "ebi-go"), and KEGG (e.g. "kegg-compound" or "compound").
    For the current list, see http://togows.dbcls.jp/search/

    The NCBI provide the Entrez Search service (ESearch) which is similar,
    available in Biopython as the Bio.Entrez.esearch() function.

    See also the function Bio.TogoWS.search_count() which returns the number
    of matches found, and the Bio.TogoWS.search_iter() function which allows
    you to iterate over the search results (taking care of batching for you).
    Nr7   r   zTTogoWS search does not explicitly support database '%s'. See %s/search/ for options.r8   r(   z0Offset should be an integer (at least one), not z/Limit should be an integer (at least one), not z%Offset should be at least one, not %iz$Count should be at least one, not %iz/%i,%iz8Expect BOTH offset AND limit to be provided (or neither)r   )	r9   r   r   r,   r-   r   r:   r*   r   )r   r;   rH   rD   r3   r,   r   r   r   r   rA      sN   !rA   c                 C   sx   t st a ||gt vrddd t D }td| td| d|  }z|  } W n	 ty5   Y nw t|| dS )a  Call TogoWS for file format conversion.

    Arguments:
     - data - string or handle containing input record(s)
     - in_format - string describing the input file format (e.g. "genbank")
     - out_format - string describing the requested output format (e.g. "fasta")

    For a list of supported conversions (e.g. "genbank" to "fasta"), see
    http://togows.dbcls.jp/convert/

    Note that Biopython has built in support for conversion of sequence and
    alignnent file formats (functions Bio.SeqIO.convert and Bio.AlignIO.convert)
    
c                 s   s    | ]	}d t | V  qdS )z%s -> %sN)tupler   r   r   r   	<genexpr>D  s    zconvert.<locals>.<genexpr>z%Unsupported conversion. Choose from:
r"   r   )post)r   r#   r.   r*   r   r	   AttributeErrorr   )r<   	in_format
out_formatmsgr   r   r   r   convert2  s   rU   c                 C   st   d}t   }tj| | }|dkrt | || t_n|t_|r)t| | }nt| }tj|dd}|j|_|S )a  Build the URL and open a handle to it (PRIVATE).

    Open a handle to TogoWS, will raise an IOError if it encounters an error.

    In the absence of clear guidelines, this function enforces a limit of
    "up to three queries per second" to avoid abusing the TogoWS servers.
    gTUU?r   zUTF-8)encoding)	timer   previoussleepr   encodeioTextIOWrapperr   )r   rP   delaycurrentwaitr   text_handler   r   r   r   Q  s   	
r   __main__)run_doctest)verbose)NN)Nr>   )NNN)N)!__doc__r[   rW   urllib.parser   urllib.requestr   
Bio._utilsr   r   r9   r)   r   dictstr__annotations__r   r   r1   r   r   r   r   r#   r6   r=   rL   rA   rU   r   rX   __name__rb   r   r   r   r   <module>   s:   
M
#
/G