<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"># Copyright 2003 Yair Benita.  All rights reserved.
# Revisions copyright 2020 by Tianyi Shi.  All rights reserved.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Calculate isoelectric points of polypeptides using methods of Bjellqvist.

pK values and the methods are taken from::

    * Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
    Sanchez, J.-Ch., Frutiger, S. &amp; Hochstrasser, D.F.
    The focusing positions of polypeptides in immobilized pH gradients can be
    predicted from their amino acid sequences. Electrophoresis 1993, 14,
    1023-1031.

    * Bjellqvist, B., Basse, B., Olsen, E. and Celis, J.E.
    Reference points for comparisons of two-dimensional maps of proteins from
    different human cell types defined in a pH scale where isoelectric points
    correlate with polypeptide compositions. Electrophoresis 1994, 15, 529-539.

I designed the algorithm according to a note by David L. Tabb, available at:
http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
"""

positive_pKs = {"Nterm": 7.5, "K": 10.0, "R": 12.0, "H": 5.98}
negative_pKs = {"Cterm": 3.55, "D": 4.05, "E": 4.45, "C": 9.0, "Y": 10.0}
pKcterminal = {"D": 4.55, "E": 4.75}
pKnterminal = {
    "A": 7.59,
    "M": 7.0,
    "S": 6.93,
    "P": 8.36,
    "T": 6.82,
    "V": 7.44,
    "E": 7.7,
}
charged_aas = ("K", "R", "H", "D", "E", "C", "Y")


class IsoelectricPoint:
    """A class for calculating the IEP or charge at given pH of a protein.

    Parameters
    ----------
    :protein_sequence: A ``Bio.Seq`` or string object containing a protein
                       sequence.
    :aa_content: A dictionary with amino acid letters as keys and its
                 occurrences as integers, e.g. ``{"A": 3, "C": 0, ...}``.
                 Default: ``None``. If ``None``, the dic will be calculated
                 from the given sequence.

    Methods
    -------
    :charge_at_pH(pH):  Calculates the charge of the protein for a given pH
    :pi():              Calculates the isoelectric point


    Examples
    --------
    The methods of this class can either be accessed from the class itself
    or from a ``ProtParam.ProteinAnalysis`` object (with partially different
    names):

    &gt;&gt;&gt; from Bio.SeqUtils.IsoelectricPoint import IsoelectricPoint as IP
    &gt;&gt;&gt; protein = IP("INGAR")
    &gt;&gt;&gt; print(f"IEP of peptide {protein.sequence} is {protein.pi():.2f}")
    IEP of peptide INGAR is 9.75
    &gt;&gt;&gt; print(f"Its charge at pH 7 is {protein.charge_at_pH(7.0):.2f}")
    Its charge at pH 7 is 0.76


    &gt;&gt;&gt; from Bio.SeqUtils.ProtParam import ProteinAnalysis as PA
    &gt;&gt;&gt; protein = PA("PETER")
    &gt;&gt;&gt; print(f"IEP of {protein.sequence}: {protein.isoelectric_point():.2f}")
    IEP of PETER: 4.53
    &gt;&gt;&gt; print(f"Charge at pH 4.53: {protein.charge_at_pH(4.53):.2f}")
    Charge at pH 4.53: 0.00

    """

    def __init__(self, protein_sequence, aa_content=None):
        """Initialize the class."""
        self.sequence = protein_sequence.upper()
        if not aa_content:
            from Bio.SeqUtils.ProtParam import ProteinAnalysis as _PA

            aa_content = _PA(self.sequence).count_amino_acids()
        self.charged_aas_content = self._select_charged(aa_content)

        self.pos_pKs, self.neg_pKs = self._update_pKs_tables()

    # This function creates a dictionary with the contents of each charged aa,
    # plus Cterm and Nterm.
    def _select_charged(self, aa_content):
        charged = {}
        for aa in charged_aas:
            charged[aa] = float(aa_content[aa])
        charged["Nterm"] = 1.0
        charged["Cterm"] = 1.0
        return charged

    def _update_pKs_tables(self):
        """Update pKs tables with seq specific values for N- and C-termini."""
        pos_pKs = positive_pKs.copy()
        neg_pKs = negative_pKs.copy()
        nterm, cterm = self.sequence[0], self.sequence[-1]
        if nterm in pKnterminal:
            pos_pKs["Nterm"] = pKnterminal[nterm]
        if cterm in pKcterminal:
            neg_pKs["Cterm"] = pKcterminal[cterm]
        return pos_pKs, neg_pKs

    def charge_at_pH(self, pH):
        """Calculate the charge of a protein at given pH."""
        # derivation:
        #   Henderson Hasselbalch equation: pH = pKa + log([A-]/[HA])
        #   Rearranging: [HA]/[A-] = 10 ** (pKa - pH)
        #   partial_charge =
        #       [A-]/[A]total = [A-]/([A-] + [HA]) = 1 / { ([A-] + [HA])/[A-] } =
        #       1 / (1 + [HA]/[A-]) = 1 / (1 + 10 ** (pKa - pH)) for acidic residues;
        #                             1 / (1 + 10 ** (pH - pKa)) for basic residues
        positive_charge = 0.0
        for aa, pK in self.pos_pKs.items():
            partial_charge = 1.0 / (10 ** (pH - pK) + 1.0)
            positive_charge += self.charged_aas_content[aa] * partial_charge

        negative_charge = 0.0
        for aa, pK in self.neg_pKs.items():
            partial_charge = 1.0 / (10 ** (pK - pH) + 1.0)
            negative_charge += self.charged_aas_content[aa] * partial_charge

        return positive_charge - negative_charge

    # This is the action function, it tries different pH until the charge of
    # the protein is 0 (or close).
    def pi(self, pH=7.775, min_=4.05, max_=12):
        r"""Calculate and return the isoelectric point as float.

        This is a recursive function that uses bisection method.
        Wiki on bisection: https://en.wikipedia.org/wiki/Bisection_method

        Arguments:
         - pH: the pH at which the current charge of the protein is computed.
           This pH lies at the centre of the interval (mean of `min_` and `max_`).
         - min\_: the minimum of the interval. Initial value defaults to 4.05,
           which is below the theoretical minimum, when the protein is composed
           exclusively of aspartate.
         - max\_: the maximum of the the interval. Initial value defaults to 12,
           which is above the theoretical maximum, when the protein is composed
           exclusively of arginine.
        """
        charge = self.charge_at_pH(pH)
        if max_ - min_ &gt; 0.0001:
            if charge &gt; 0.0:
                min_ = pH
            else:
                max_ = pH
            next_pH = (min_ + max_) / 2
            return self.pi(next_pH, min_, max_)
        return pH


if __name__ == "__main__":
    from Bio._utils import run_doctest

    run_doctest()
</pre></body></html>