Source code for etfl.core.memodel

# -*- coding:utf-8 -*-
"""
.. module:: ETFL
   :platform: Unix, Windows
   :synopsis: flux balance models accounting for expression, thermodynamics, and resource allocation constraints

.. moduleauthor:: ETFL team

Core for the ME-part

"""

import numpy as np
import optlang
import pandas as pd
import sympy
from cobra import Model, Reaction, Gene
from cobra.core import Solution, DictList
from collections import defaultdict, OrderedDict
from Bio.SeqUtils import molecular_weight

from tqdm import tqdm

from .ion import Ion
from .carbohydrate import Carbohydrate
from .lipid import Lipid
from ..utils.parsing import parse_gpr
from ..utils.utils import replace_by_enzymatic_reaction, replace_by_me_gene, \
    replace_by_coding_gene
from .genes import ExpressedGene, CodingGene
from .dna import DNA
from .rna  import mRNA,rRNA, tRNA
from .enzyme import Enzyme, Peptide
from .reactions import EnzymaticReaction, ProteinComplexation, \
    TranslationReaction, TranscriptionReaction, DegradationReaction, DNAFormation
from .expression import build_trna_charging, enzymes_to_gpr_no_stoichiometry, \
    make_stoich_from_aa_sequence, make_stoich_from_nt_sequence, \
    degrade_peptide, degrade_mrna, _extract_trna_from_reaction
from ..optim.constraints import CatalyticConstraint, ForwardCatalyticConstraint,\
    BackwardCatalyticConstraint, EnzymeMassBalance, \
    rRNAMassBalance, mRNAMassBalance, tRNAMassBalance, DNAMassBalance, \
    GrowthCoupling, TotalCapacity, ExpressionCoupling, EnzymeRatio, \
    GrowthChoice, EnzymeDegradation, mRNADegradation,\
    LinearizationConstraint, SynthesisConstraint, SOS1Constraint,\
    InterpolationConstraint, RNAPAllocation, LipidMassBalance, IonMassBalance,\
    CarbohydrateMassBalance, MinimalCoupling, MinimalAllocation
from ..optim.variables import ModelVariable, GrowthActivation, \
    EnzymeVariable, LinearizationVariable, RibosomeUsage, RNAPUsage, \
    FreeEnzyme, BinaryActivator, InterpolationVariable, DNAVariable, \
    GrowthRate, GenericVariable

from .allocation import add_dummy_expression,add_dummy_mrna,add_dummy_peptide,\
    add_dummy_protein, add_interpolation_variables,\
        MRNA_WEIGHT_CONS_ID, PROT_WEIGHT_CONS_ID, \
    MRNA_WEIGHT_VAR_ID, PROT_WEIGHT_VAR_ID, \
    DNA_WEIGHT_CONS_ID, DNA_WEIGHT_VAR_ID, DNA_FORMATION_RXN_ID, \
    define_prot_weight_constraint, define_mrna_weight_constraint, \
    define_dna_weight_constraint, get_dna_synthesis_mets

from pytfa.core.model import LCSBModel
from pytfa.optim.reformulation import petersen_linearization
from pytfa.optim.utils import chunk_sum, symbol_sum
from pytfa.utils.logger import get_bistream_logger
from pytfa.utils.str import camel2underscores
from pytfa.optim.utils import copy_solver_configuration

def id_maker_rib_rnap(the_set):
        # for a set of ribosomes or RNAPs makes an id
        id_ = ''.join([x for x in the_set])
        return id_


[docs]class MEModel(LCSBModel, Model): def __init__(self, model=Model(), name = None, growth_reaction='', mu_range = None, n_mu_bins = 1, big_M = 1000, *args, **kwargs): """ :param model: The input model :type model: cobra.Model :param mu: (Facultative) Mean growth rate to constraint the model :param mu_error: (Facultative) Absolute error on mu to constraint the model :type mu_error: float > 0 :param mu_range: (Facultative) Min-Max growth rate to constraint the model :type mu_range: tuple (l,u) :param n_mu_bins: (Facultative) In how many intervals to separate the growth rate for the linearization :param args: :param kwargs: """ name = 'ETFL_' + name if name is not None else 'ETFL_model' LCSBModel.__init__(self, model, name) self.logger = get_bistream_logger('ME model' + str(self.name)) self.parent = model if model is not None: self.sanitize_varnames() self.init_etfl(big_M, growth_reaction, mu_range, n_mu_bins, name) def init_etfl(self, big_M, growth_reaction, mu_range, n_mu_bins, name): self.big_M = big_M self._var_dict = dict() self._cons_dict = dict() self.logger.info('# ETFL Model {} initialized'.format(name)) self._growth_reaction_id = growth_reaction self._mu_range = mu_range self._n_mu_bins = n_mu_bins if mu_range is not None: self._mu = self.add_variable(kind=GrowthRate, hook=self, id_='total', # Will read MU_total lb=mu_range[0], ub=mu_range[1]) self.init_mu_variables() else: # message = """ You need to supply mu_range.""" message = "Empty model initialized" # raise ValueError(message) self.logger.info(message) self.aa_dict = dict() self.rna_nucleotides = dict() self.trna_dict = dict() self.dilution_terms = dict() self.enzymes = DictList() self.mrnas = DictList() self.rrnas = DictList() self.trnas = DictList() self.peptides = DictList() self.transcription_reactions = DictList() self.translation_reactions = DictList() self.complexation_reactions = DictList() self.degradation_reactions = DictList() self.dna = None self.ribosome = OrderedDict() self.rnap = OrderedDict() self.coupling_dict = dict() @property def mu(self): return self._mu @property def mu_max(self): return self._mu_range[-1] # @mu.setter # def mu(self, val, epsilon = None): # if epsilon is None: # epsilon = self.solver.configuration.tolerances.feasibility # # self._mu.lb = val-epsilon # self._mu.ub = val+epsilon def make_mu_bins(self): from numpy import linspace bounds = linspace(self.mu.variable.lb, self.mu.variable.ub, self._n_mu_bins+1) bins = zip(bounds[:-1], bounds[1:]) self.mu_bins = tuple(((x[0]+x[1])/2, x) for x in bins) @property def n_mu_bins(self): return len(self.mu_bins) def init_mu_variables(self): """ Necessary for the zeroth order approximation of mu: .. math:: mu \in [0.1, 0.9] , nbins = 8 => mu = 0.15 OR mu = 0.25 OR ... OR mu = 0.85 Using binary expansion of the bins instead of a list of 0-1s described `here <https://orinanobworld.blogspot.ch/2013/07/integer-variables-and-quadratic-terms.html>`_ :return: """ self.make_mu_bins() ga = list() N = self.n_mu_bins n_vars = np.int(np.ceil(np.log2(N))) for e in range(n_vars): ga.append(self.add_variable(kind=GrowthActivation, hook=self, id_=str(2 ** e))) # Force that only one growth range can be chosen: # b0*2^0 + b1*2^1 + b2*2^2 + ... + bn*2^n <= n_bins choice_expr = sum(ga) self.add_constraint(kind=GrowthChoice, hook=self, expr=choice_expr, id_='growth', ub=self.n_mu_bins, lb=0) # Couple growth v_fwd = self.growth_reaction.forward_variable v_bwd = self.growth_reaction.reverse_variable # |v_net - mu| <= bin_width bin_half_width = max([(x[1] - x[0]) / 2 for _, x in self.mu_bins]) the_integer = symbol_sum([(2 ** i) * ga_i for i, ga_i in enumerate(ga)]) binarized_mu = self.mu.variable.lb + the_integer * self.mu_approx_resolution growth_coupling_expr = v_fwd - v_bwd - binarized_mu self.add_constraint(kind=GrowthCoupling, hook=self.growth_reaction, expr=growth_coupling_expr, ub=bin_half_width, lb=-1 * bin_half_width) # So that the solver spends less time looking for an ub on the objective # when optimizing for growth self.growth_reaction.upper_bound = self.mu.variable.ub + self.mu_approx_resolution # Update the variable indices self.regenerate_variables() self.regenerate_constraints() @property def mu_approx_resolution(self): return (self.mu.variable.ub - self.mu.variable.lb) / self.n_mu_bins @property def growth_reaction(self): """ Returns the growth reaction of the model. Useful because tied to the growth variable :return: """ if self._growth_reaction_id: return self.reactions.get_by_id(self._growth_reaction_id) else: return None @growth_reaction.setter def growth_reaction(self, reaction_id): """ The growth_reaction is set by supplying the id of the candidate reaction :param reaction_id: an id within the model :type reaction_id: str :return: """ rxn = self.reactions.get_by_id(reaction_id) self._growth_reaction_id = rxn.id def add_nucleotide_sequences(self, sequences): """ :param sequences: :return: """ for gene_id, seq in sequences.items(): if gene_id in self.genes: new = replace_by_me_gene(self, gene_id, seq) else: self.logger.warning('Model has no gene {}, Adding it'.format(gene_id)) new = CodingGene(id= gene_id, name = gene_id, sequence=seq) self.add_genes([new]) self._make_peptide_from_gene(gene_id) def add_transcription_by(self, transcription_dict): for gene_id, transcribed_by in transcription_dict.items(): # transcribed_by is a list of rnap(s) try: self.genes.get_by_id(gene_id).transcribed_by = transcribed_by except KeyError: # the gene is not in the model continue def add_translation_by(self, translation_dict): for gene_id, translated_by in translation_dict.items(): # translated_by is a list of rnap(s) try: self.genes.get_by_id(gene_id).translated_by = translated_by except KeyError: # the gene is not in the model continue def add_min_tcpt_activity(self, min_act_dict): for gene_id, min_tcpt_activity in min_act_dict.items(): # min_tcpt_activity is the fraction of maximal capacity that should be forced try: self.genes.get_by_id(gene_id).min_tcpt_activity = min_tcpt_activity except KeyError: # the gene is not in the model continue def add_min_tnsl_activity(self, min_act_dict): for gene_id, min_tnsl_activity in min_act_dict.items(): # min_tnsl_activity is the fraction of maximal capacity that should be forced try: self.genes.get_by_id(gene_id).min_tnsl_activity = min_tnsl_activity except KeyError: # the gene is not in the model continue def _make_peptide_from_gene(self, gene_id): free_pep = Peptide(id=gene_id, name='Peptide, {}'.format(gene_id), gene_id=gene_id) free_pep._model = self self.peptides += [free_pep] def add_peptide_sequences(self, aa_sequences): for pep_id, seq in aa_sequences.items(): if pep_id in self.peptides: self.peptides.get_by_id(pep_id).peptide = seq else: self.logger.warning('Model has no peptide {}'.format(pep_id)) continue def add_dummies(self, nt_ratios, mrna_kdeg, mrna_length, aa_ratios, enzyme_kdeg, peptide_length, transcribed_by=None, translated_by=None): """ Create dummy peptide and mrna to enforce mrna and peptide production. Can be used to account for the missing data for all mrnas and proteins. :param nt_ratios: :param mrna_kdeg: :param mrna_length: :param aa_ratios: :param enzyme_kdeg: :param peptide_length: :param gtp: :param gdp: :param h2o: :param h: :return: """ add_interpolation_variables(self) # Create a dummy gene and override the sequences with input data dummy_sequence = 'N'*mrna_length dummy_gene = CodingGene(id='dummy_gene', name='Dummy Gene', sequence=dummy_sequence) dummy_gene._rna = dummy_sequence dummy_gene._peptide = 'X'*peptide_length dummy_gene.transcribed_by = transcribed_by dummy_gene.translated_by = translated_by self.add_genes([dummy_gene]) dummy_mrna = add_dummy_mrna(self, dummy_gene, mrna_kdeg, mrna_length, nt_ratios) dummy_peptide = add_dummy_peptide(self, aa_ratios, dummy_gene, peptide_length) dummy_protein = add_dummy_protein(self, dummy_peptide, enzyme_kdeg) add_dummy_expression(self, aa_ratios, dummy_gene, dummy_peptide, dummy_protein, peptide_length) def add_essentials(self, essentials, aa_dict, rna_nucleotides, rna_nucleotides_mp): """ Marks important metabolites for expression :param essentials: A dictionary of important metabolites to met id **Example :** .. code-block:: python essentials = { 'atp': 'atp_c', 'adp': 'adp_c', 'amp': 'amp_c', ... 'h2o': 'h2o_c', 'h': 'h_c'} } :param aa_dict: A dictionary of aminoacid letter to amicoacid met id **Example :** .. code-block:: python aa_dict = { 'A':'ala__L_c', 'R':'arg__L_c', ... } :param rna_nucleotides: A dictionary of RNA nucleotide triphosphate letter to nucleotideTP met id **Example :** .. code-block:: python rna_nucleotides = { 'A':'atp_c', 'U':'utp_c', ... } :param rna_nucleotides_mp: A dictionary of RNA nucleotide monophosphate letter to nucleotideMP met id **Example :** .. code-block:: python rna_nucleotides_mp = { 'A':'amp_c', 'U':'ump_c', ... } :return: """ self.essentials = essentials self.aa_dict = aa_dict self.rna_nucleotides = rna_nucleotides self.rna_nucleotides_mp = rna_nucleotides_mp def build_expression(self): """ Given a dictionary from amino acids nucleotides to metabolite names, goes through the list of genes in the model that have sequence information to build transcription and translation reactions :return: """ aa_dict = self.aa_dict atp = self.essentials['atp'] amp = self.essentials['amp'] ppi = self.essentials['ppi'] h2o = self.essentials['h2o'] h = self.essentials['h'] self.trna_dict = build_trna_charging(self,aa_dict,atp,amp,ppi,h2o,h) self.add_trnas([item for sublist in self.trna_dict.values() for item in sublist if isinstance(item, tRNA)]) # Check that the ribosomes have been added if not self.ribosome: raise Exception( 'A ribosome has to be added with the add_ribosome method') # Check that the RNAP has been added if not self.rnap: raise Exception( 'A RNA Polymerase has to be added with the add_rnap method') self.express_genes(self.genes) def express_genes(self, gene_list): """ Adds translation and transcription reaction to the genes in the provided list :param gene_list: :type gene_list: Iterable of str or ExpressedGene :return: """ for gene in gene_list: if isinstance(gene, str): the_gene = self.genes.get_by_id(gene) else: the_gene = self.genes.get_by_id(gene.id) if issubclass(type(the_gene), ExpressedGene): # Build the transcription self._add_gene_transcription_reaction(the_gene) if issubclass(type(the_gene), CodingGene): # Build the translation self._add_gene_translation_reaction(the_gene) def _add_gene_translation_reaction(self, gene): """ :param gene: A gene of the model that has sequence data :type gene: CodingGene :return: """ gtp = self.essentials['gtp'] gdp = self.essentials['gdp'] pi = self.essentials['pi'] h2o = self.essentials['h2o'] h = self.essentials['h'] if gene.translated_by is None: translating_enzymes = self.ribosome.values() else: translating_enzymes = [self.enzymes.get_by_id(x) for x in gene.translated_by] rxn = TranslationReaction( id='{}_translation'.format(gene.id), name='Translation, {}'.format(gene.id), gene_id= gene.id, enzymes=translating_enzymes, upper_bound=1, scaled=True) self.add_reactions([rxn]) aa_stoichiometry = make_stoich_from_aa_sequence(gene.peptide, self.aa_dict, self.trna_dict, gtp, gdp, pi, h2o, h ) _extract_trna_from_reaction(aa_stoichiometry, rxn) rxn.add_metabolites(aa_stoichiometry, rescale=True) free_peptide = self.peptides.get_by_id(gene.id) rxn.add_peptide(free_peptide) # Add ribosome as necessary enzyme rxn.gene_reaction_rule = enzymes_to_gpr_no_stoichiometry(rxn) self.translation_reactions += [rxn] def _add_gene_transcription_reaction(self, gene): """ Adds the transcription reaction related to a gene :param gene: A gene of the model that has sequence data :type gene: ExpressedGene :return: """ ppi = self.essentials['ppi'] nt_stoichiometry = make_stoich_from_nt_sequence(gene.rna, self.rna_nucleotides, ppi ) if gene.transcribed_by is None: transcribing_enzymes = self.rnap.values() else: transcribing_enzymes = [self.enzymes.get_by_id(x) for x in gene.transcribed_by] rxn = TranscriptionReaction( id=self._get_transcription_name(gene.id), name='Transcription, {}'.format(gene.id), gene_id= gene.id, enzymes=transcribing_enzymes, upper_bound=1, scaled=True) self.add_reactions([rxn]) rxn.add_metabolites(nt_stoichiometry) # Add rnap as necessary enzyme rxn.gene_reaction_rule = enzymes_to_gpr_no_stoichiometry(rxn) self.transcription_reactions += [rxn] def add_trna_mass_balances(self): """ Once the tRNAs, transcription and translation reactions have been added, we need to add the constraints: d/dt [charged_tRNA] = v_charging - sum(nu_trans*v_trans) - mu*[charged_tRNA] d/dt [uncharged_tRNA] = -v_charging + sum(nu_trans*v_trans) - mu*[uncharged_tRNA] The stoichiometries are set from the reaction dict in _extract_trna_from_reaction We also need to scale the tRNAs in mRNA space and unscale the translation: .. code:: d/dt σ_m * [*charged_tRNA] = +- σ_m * v_charging -+ σ_m/σ_p*sum(nu_tsl*σ_p*v_tr) - σ_m * mu*[*charged_tRNA] d/dt [*charged_tRNA]_hat = +- σ_m * v_charging -+ σ_m/σ_p * sum( nu_tsl * v_tr_hat) - mu*[*charged_tRNA]_hat :return: """ translation_fluxes = self.translation_reactions.list_attr('net') for _, (charged_trna, uncharged_trna, charging_rxn) in self.trna_dict.items(): # Charged tRNAs are generated with the charging reaction, consumed # by translation charged_stoichs = [translation.trna_stoich[charged_trna.id] for translation in self.translation_reactions] v_tsl_c = symbol_sum( [x * y for x, y in zip(charged_stoichs, translation_fluxes)]) charged_expr = charging_rxn.forward_variable \ + v_tsl_c self.add_mass_balance_constraint(synthesis_flux=charged_expr, macromolecule=charged_trna, queue=True) # Uncharged tRNAs are generated whenever translation happens, # consumed by charging uncharged_stoichs = [translation.trna_stoich[uncharged_trna.id] for translation in self.translation_reactions] v_tsl_u = symbol_sum( [x * y for x, y in zip(uncharged_stoichs, translation_fluxes)]) uncharged_expr = -1 * charging_rxn.forward_variable \ + v_tsl_u self.add_mass_balance_constraint(synthesis_flux=uncharged_expr, macromolecule=uncharged_trna, queue=True) self._push_queue() def add_enzymatic_coupling(self, coupling_dict): """ Couples the enzymatic reactions maximal rates with the Enzyme availability The coupling dictionary looks like: .. code-block:: python coupling_dict : { 'reaction_id_1':[ enzyme_instance_1, enzyme_instance_2], 'reaction_id_2':[ enzyme_instance_3, enzyme_instance_4, enzyme_instance_5], :param coupling_dict: A dictionary of reaction ids to enzyme lists :type coupling_dict: {str:list(Enzyme)} :return: """ self.coupling_dict.update(coupling_dict) self.add_enzymes(coupling_dict.values()) # /!\ We modify the reaction list # self.add_gene_reactions() # Generic reactions <-> Enzymes coupling for rid in tqdm(self.coupling_dict, desc='cat. constraints'): r = self.reactions.get_by_id(rid) if isinstance(r, EnzymaticReaction) and r.id in coupling_dict: # This is a proper enzymatic reaction and we can directly apply # the constraint self.logger.debug('Applying catalytic constraint to {}'. \ format(rid)) r.add_enzymes(coupling_dict[r.id]) self.apply_enzyme_catalytic_constraint(r) elif not isinstance(r, EnzymaticReaction) and r.id in coupling_dict: # This reaction needs to be transformed to an EnzymaticReaction self.logger.debug('Transforming and applying catalytic constraint to {}'. \ format(rid)) #TODO : Add enzymatic_reaction dictlist ?? enzymes = coupling_dict[r.id] enz_r = replace_by_enzymatic_reaction(self, r.id, enzymes, scaled=False) self.apply_enzyme_catalytic_constraint(enz_r) else: self.logger.error('Could not find reaction {} in the coupling dictionary'.format(r.id)) # update variable and constraints attributes self._push_queue() self.regenerate_constraints() self.regenerate_variables() def apply_enzyme_catalytic_constraint(self, reaction): """ Apply a catalytic constraint using a gene-enzymes reaction rule (GPR) :param reaction: :return: """ v_max_fwd = dict() v_max_bwd = dict() # Write v_max constraint fwd_variable = reaction.forward_variable bwd_variable = reaction.reverse_variable for e, enz in enumerate(reaction.enzymes): # If the enzymes has the same kcat for both directions # v_fwd <= kcat_fwd [E] # v_fwd - kcat_fwd [E] <= 0 v_max_fwd[e] = enz.kcat_fwd * enz.concentration v_max_bwd[e] = enz.kcat_bwd * enz.concentration # Formulating the scaling factor on the max kcat k_f = max([x.kcat_fwd for x in reaction.enzymes]) k_b = max([x.kcat_bwd for x in reaction.enzymes]) # k_f = np.median([x.kcat_fwd for x in self.enzymes]) # k_b = np.median([x.kcat_bwd for x in self.enzymes]) E_m = max([x.scaling_factor for x in reaction.enzymes]) # v_fwd <= sum(kcat_i*E_i) # for all i, E_i <= E_max (= 1g/gDW) # v_fwd / sum(kcat_i*E_i^max) <= sum(kcat_i*E_i) / sum(kcat_i*E_i^max) (<= 1) enz_constraint_expr_fwd = (fwd_variable - sum(v_max_fwd.values()))/(k_f*E_m) enz_constraint_expr_bwd = (bwd_variable - sum(v_max_bwd.values()))/(k_b*E_m) self.add_constraint(kind=ForwardCatalyticConstraint, hook=reaction, expr=enz_constraint_expr_fwd, ub=0, queue=True) self.add_constraint(kind=BackwardCatalyticConstraint, hook=reaction, expr=enz_constraint_expr_bwd, ub=0, queue=True) def add_mass_balance_constraint(self, synthesis_flux, macromolecule=None, queue=False): """ Adds a mass balance constraint of the type .. math:: d[E]/dt = 0 <=> v_synthesis - k_deg*[M] - μ*[M] = 0 for a macromolecule (mRNA or enzyme) :param synthesis_flux: :param macromolecule: :return: """ kwargs = dict() try: z = self.dilution_terms[macromolecule.id] self.logger.warning('Dilution term already exists for {}'.format(macromolecule.id)) except KeyError: z = self.linearize_me(macromolecule, queue=queue) if isinstance(macromolecule, Enzyme): kind = EnzymeMassBalance hook = macromolecule # expr = synthesis_flux.scaled_net \ # - macromolecule.degradation.scaled_net \ # - 1/macromolecule.kdeg * z # - 1/self.mu_max * z expr = synthesis_flux.net \ - macromolecule.degradation.net \ - macromolecule.scaling_factor * z # expr = 1/macromolecule.degradation.scaling_factor * \ # ( synthesis_flux.net \ # - macromolecule.degradation.net \ # - macromolecule.scaling_factor * z) elif isinstance(macromolecule, mRNA): # This also includes rRNA and sRNA kind = mRNAMassBalance hook = macromolecule # sigma = 1/macromolecule.degradation.scaling_factor # expr = sigma * synthesis_flux.net \ # - macromolecule.degradation.scaled_net \ # - 1/macromolecule.kdeg * z # - 1/self.mu_max * z expr = synthesis_flux.net \ - macromolecule.degradation.net \ - macromolecule.scaling_factor * z # expr = 1/macromolecule.degradation.scaling_factor * \ # ( synthesis_flux.net \ # - macromolecule.degradation.net \ # - macromolecule.scaling_factor * z) elif isinstance(macromolecule, tRNA): kind = tRNAMassBalance kwargs['id_'] = macromolecule.id hook = self # mu_ub = self.mu_max # sigma = mu_ub * macromolecule.scaling_factor # The synthesis flux comes unscaled [flux units] # expr = 1/sigma * synthesis_flux - 1/mu_ub * z expr = synthesis_flux - macromolecule.scaling_factor * z elif isinstance(macromolecule, DNA): kind = DNAMassBalance kwargs['id_'] = 'dna' hook = self # mu_ub = self.mu_max # expr = synthesis_flux.scaled_net - 1/mu_ub *z expr = synthesis_flux.net - macromolecule.scaling_factor * z elif isinstance(macromolecule, Lipid): kind = LipidMassBalance kwargs['id_'] = 'lipid' hook = self # mass balance is of form: 0 =v_syn - [new_mass_ratio]*mu/[old_mass_ratio] # the inverse of old mass ratio is scaling factor expr = synthesis_flux.forward_variable - synthesis_flux.reverse_variable \ - macromolecule.scaling_factor * z elif isinstance(macromolecule, Carbohydrate): kind = CarbohydrateMassBalance kwargs['id_'] = 'carbohydrate' hook = self # mass balance is of form: 0 =v_syn - [new_mass_ratio]*mu/[old_mass_ratio] # the inverse of old mass ratio is scaling factor expr = synthesis_flux.forward_variable - synthesis_flux.reverse_variable \ - macromolecule.scaling_factor * z elif isinstance(macromolecule, Ion): kind = IonMassBalance kwargs['id_'] = 'ion' hook = self # mass balance is of form: 0 =v_syn - [new_mass_ratio]*mu/[old_mass_ratio] # the inverse of old mass ratio is scaling factor expr = synthesis_flux.forward_variable - synthesis_flux.reverse_variable \ - macromolecule.scaling_factor * z else: raise Exception('Macromolecule type not recognized: {}' .format(macromolecule)) self.add_constraint(kind=kind, hook=hook, expr=expr, lb=0, ub=0, queue = queue, **kwargs) def linearize_me(self, macromolecule, queue=False): """ Performs Petersen linearization on μ*E to keep a MILP problem :return: """ E = macromolecule.variable # z = lambda_i * E_hat <= 1 ub = 1 # ga_i is a binary variable for the binary expansion f the fraction on N # of the max growth rate ga_vars = self.get_ordered_ga_vars() out_expr = self.mu.variable.lb # Build z = ga_0*2^0*mu_max/N * [E] # + ga_1*2^1*mu_max/N * [E] # + ... # + ga_n*2^n*mu_max/N * [E] for i, ga_i in enumerate(ga_vars): # Linearization step for ga_i * [E] z_name = '__MUL__'.join([ga_i.name, E.name]) # Add the variables model_z_i = self.add_variable(kind=LinearizationVariable, hook=self, id_=z_name, lb=0, ub=ub, queue=False) # z_i, cons = glovers_linearization(b = ga_i, fy=E, L=E.lb, U=E.ub, z=model_z_i) z_i, new_constraints = petersen_linearization(b=ga_i, x=E, M=E.ub, z=model_z_i) # Add the constraints: for cons in new_constraints: # Do not forget to substitute the sympy symbol in the constraint # with a variable ! # new_expression = cons.expression.subs(z_i, model_z_i.variable) # EDIT: Not anymore needed if we supply the variable self.add_constraint(kind=LinearizationConstraint, hook=self, id_=cons.name, expr=cons.expression, # expr=new_expression, ub=cons.ub, lb=cons.lb, queue=queue) out_expr += (2 ** i) * self.mu_approx_resolution * model_z_i self.dilution_terms[macromolecule.id] = out_expr return out_expr def get_ordered_ga_vars(self): """ Returns in order the variables that discretize growth :return: """ # ga_i is a binary variable for the binary expansion f the fraction on N # of the max growth rate ga_vars = self.get_variables_of_type(GrowthActivation) ga_vars = sorted(ga_vars, key=lambda x: x.ix) return ga_vars def _prep_enzyme_variables(self, enzyme): """ Reads Enzyme.composition to find complexation reaction from enzyme information :param reaction: :type reaction: cobra.Reaction :return: """ #1. Complexation # Does this enzyme already have a complexation reaction ? # This happens if an enzyme is used in several reactions if enzyme.complexation is not None: complexation = enzyme.complexation else: complexation = self.make_enzyme_complexation(enzyme) enzyme.init_variable() #2. Also add degradation self._add_enzyme_degradation(enzyme, scaled=True, queue=True) #3. Finally make the mass balance # Cannot queue, if the same enzyme is to be added twice self.add_mass_balance_constraint(complexation, enzyme, queue=False) def make_enzyme_complexation(self, enzyme): """ Makes the complexation reaction and attached it to its enzyme :param enzyme: :return: """ if not enzyme.composition: self.logger.warning('Enzyme {} has no composition' .format(enzyme.id)) return None this_id = '{}_complex'.format(enzyme.id) this_name = '{} Complexation'.format(enzyme.id) complexation = ProteinComplexation(id=this_id, name=this_name, target=enzyme, # upper_bound=1, scaled=True) try: peptides = {self.peptides.get_by_id(k): -v \ for k, v in enzyme.composition.items()} except KeyError: missing_genes = '.'.join(enzyme.composition.keys()) self.logger.warning('No nucleotide sequence found for ' 'some of these genes {}'.format(missing_genes)) return None self.add_reactions([complexation]) complexation.add_peptides(peptides) # Post processing self.complexation_reactions+= [complexation] enzyme.complexation = complexation return complexation def add_enzymes(self, enzyme_list, prep = True): """ Adds an Enzyme object, or iterable of Enzyme objects, to the model :param enzyme_list: :type enzyme_list:Iterable(Enzyme) or Enzyme :param prep: whether or not to add complexation, degradation, and mass balance constraints (needs to be overridden for dummies for example) :type prep: Boolean :return: """ if not hasattr(enzyme_list, '__iter__'): enzyme_list = [enzyme_list] else: enzyme_list = list(enzyme_list) if len(enzyme_list) == 0: return None # unpacking if not isinstance(enzyme_list[0],Enzyme): enzyme_list = [x for item in enzyme_list for x in item] # First check whether the enzymes exist in the model # Also enzymes could be declared twice for different reactions, # hence turn the list into a set enzyme_list = [x for x in set(enzyme_list) if x.id not in self.enzymes] enz_ids = [x.id for x in enzyme_list] tot_ids = len(enz_ids) unique_ids = len(set(enz_ids)) if tot_ids != unique_ids: msg = '{} duplicate enzyme IDs detected'.format(tot_ids-unique_ids) self.logger.error(msg) raise KeyError(msg) for enz in enzyme_list: enz._model = self self.enzymes += enzyme_list if prep: for enz in tqdm(enzyme_list, desc='enz. vars'): self._prep_enzyme_variables(enz) def add_mrnas(self, mrna_list, add_degradation=True): """ Adds a mRNA object, or iterable of mRNA objects, to the model :param mrna_list: :type mrna_list:Iterable(mRNA) or mRNA :return: """ if not hasattr(mrna_list, '__iter__'): mrna_list = [mrna_list] if len(mrna_list) == 0: return None # First check whether the mRNAs exist in the model mrna_list = [x for x in mrna_list if x.id not in self.mrnas] for mrna in mrna_list: mrna._model = self mrna.init_variable() if add_degradation: self._add_mrna_degradation(mrna, scaled=True, queue=True) self.mrnas += mrna_list self._push_queue def add_trnas(self, trna_list): """ Adds a tRNA object, or iterable of tRNA objects, to the model :param trna_list: :type trna_list:Iterable(tRNA) or tRNA :return: """ if not hasattr(trna_list, '__iter__'): trna_list = [trna_list] if len(trna_list) == 0: return None # First check whether the tRNAs exist in the model trna_list = [x for x in trna_list if x.id not in self.trnas] for trna in trna_list: trna._model = self trna.init_variable() self.trnas += trna_list def add_dna(self, dna): """ Adds a DNA object to the model :param dna: :type dna: DNA :return: """ dna._model = self dna.init_variable() self.dna = dna def add_lipid(self, lipid): """ Adds a lipid object to the model :param lipid: :type lipid: Lipid :return: """ lipid._model = self lipid.init_variable() self.lipid = lipid def add_ion(self, ion): """ Adds a ion object to the model :param ion: :type ion: ion :return: """ ion._model = self ion.init_variable() self.ion = ion def add_carbohydrate(self, carbohydrate): """ Adds a carbohydrate object to the model :param carbohydrate: :type carbohydrate: carbohydrate :return: """ carbohydrate._model = self carbohydrate.init_variable() self.carbohydrate = carbohydrate def remove_enzymes(self, enzyme_list): """ Removes an Enzyme object, or iterable of Enzyme objects, from the model :param enzyme_list: :type enzyme_list:Iterable(Enzyme) or Enzyme :return: """ if not hasattr(enzyme_list, '__iter__'): enzyme_list = [enzyme_list] if len(enzyme_list) == 0: return None # First check whether the metabolites exist in the model enzyme_list = [x for x in enzyme_list if x.id not in self.enzymes] for enz in enzyme_list: self.remove_reactions(enz.degradation) self.remove_reactions(enz.complexation) self.enzymes.pop(enz.id) def _add_enzyme_degradation(self, enzyme, scaled=True, queue=False): """ Given an enzyme, adds the corresponding degradation reaction :param enzyme: :type enzyme: Enzyme :param scaled: Indicates whether scaling should be performed (see manuscript) :type scaled: bool :param queue: Indicates whether to add the variable directly or in the next batch :type queue: bool :return: """ h2o = self.essentials['h2o'] if enzyme.kdeg is None or np.isnan(enzyme.kdeg): return None complex_dict = enzyme.complexation.metabolites deg_stoich = defaultdict(int) for peptide, stoich in complex_dict.items(): the_pep = self.peptides.get_by_id(peptide.id) degradation_mets = degrade_peptide(the_pep, self.aa_dict, h2o) for k,v in degradation_mets.items(): deg_stoich[k]+=-1*v*stoich # stoich is negative self._make_degradation_reaction(deg_stoich, enzyme, EnzymeDegradation, scaled=scaled, queue=queue) def _add_mrna_degradation(self, mrna, scaled = True, queue=False): """ Given an mRNA, adds the corresponding degradation reaction :param mrna: :type mrna: mRNA :param scaled: Indicates whether scaling should be performed (see manuscript) :type scaled: bool :param queue: Indicates whether to add the variable directly or in the next batch :type queue: bool :return: """ h2o = self.essentials['h2o'] h = self.essentials['h'] if mrna.kdeg is None or np.isnan(mrna.kdeg): return None degradation_mets = degrade_mrna(mrna, self.rna_nucleotides_mp, h2o, h) self._make_degradation_reaction(degradation_mets,mrna,mRNADegradation, scaled=scaled, queue=queue) def _make_degradation_reaction(self, deg_stoich, macromolecule, kind, scaled, queue=False): """ given a degradation stoichiometry, makes the corresponding degradation reaction :param deg_stoich: stoichiometry of the degradation :type deg_stoich: dict({:class:`cobra.core.Species:Number`}) :param macromolecule: the macromalecule being degraded. Used for binding the degradation constraint :type macromolecule: Macromolecule :param kind: kind of constraint :type kind: mRNADegradation or EnzymeDegradation :param scaled: Indicates whether scaling should be performed (see manuscript) :type scaled: bool :param queue: Indicates whether to add the variable directly or in the next batch :type queue: bool :return: """ reaction = DegradationReaction(id='{}_degradation'.format(macromolecule.id), macromolecule=macromolecule, scaled=scaled) if scaled: reaction.upper_bound = 1 # Assignment to model must be done before since met dict kas string keys self.add_reactions([reaction]) self.degradation_reactions += [reaction] reaction.add_metabolites(deg_stoich, rescale = True) # Couple with the expression constraint v_deg = k_deg [E] # Scaled into v_deg_hat = E_hat # expr = reaction.scaled_net \ # - (macromolecule.kdeg / self.mu_max) * macromolecule.scaled_concentration # expr = reaction.scaled_net - macromolecule.scaled_concentration expr = reaction.net - macromolecule.kdeg * macromolecule.concentration self.add_constraint(kind=kind, hook=macromolecule, expr=expr, lb=0, ub=0, queue=queue) def populate_expression(self): """ Defines upper- and lower_bound for the RNAP and Ribosome binding capacities and define catalytic constraints for the RNAP and Ribosome :return: """ # This part defines catalytic constraints by iterating over transcription # and translation rxns. This already excluded ExpressedGene from translation self._populate_rnap() self._populate_ribosomes() self._push_queue() # This part defines the min and max binding capacities by iterating over # the gene names, So we need to check if the gene is expressed or not. for the_mrna in tqdm(self.mrnas, desc='populating expression'): self.add_mrna_mass_balance(the_mrna) self._constrain_polysome(the_mrna) if self.dna is not None: for the_gene in tqdm(self.genes, desc='constraining transcription'): self._constrain_polymerase(the_gene) else: self.logger.warning('RNAP allocation constraints were not added,' 'since DNA is not defined for the model. Use add' 'fix_dna_ratio or add_dna_mass_requirement.') self._push_queue() self.regenerate_variables() self.regenerate_constraints() # we should modify the mass balance constraints for rRNAs (after adding mrna mass balances) self.couple_rrna_synthesis() def add_mrna_mass_balance(self, the_mrna): # Get the synthesis_flux syn_id = self._get_transcription_name(the_mrna.id) syn = self.transcription_reactions.get_by_id(syn_id) # Add the mass balance constraint for the mrna self.add_mass_balance_constraint(syn, the_mrna, queue=True) def _constrain_polysome(self, the_mrna, basal_fraction = 0): """ Add the coupling between mRNA availability and ribosome charging The number of ribosomes assigned to a mRNA species is lower than the number of such mRNA times the max number of ribosomes that can sit on the mRNA: [RPi] <= loadmax_i*[mRNAi] loadmax is : len(peptide_chain)/size(ribo) Their distance from one another along the mRNA is at least the size of the physical footprint of a ribosome (≈20 nm, BNID 102320, 100121) which is the length of about 60 base pairs (length of nucleotide ≈0.3 nm, BNID 103777), equivalent to ≈20 aa. also 28715909 "http://book.bionumbers.org/how-many-proteins-are-made-per-mrna-molecule/" Hence: [RPi] <= L_nt/Ribo_footprint * [mRNA] In addition, it also adds a minimal binding activity for ribosome to the mRNA. We modeled it as a Fraction of the maximum loadmax and the Fraction depends on the affinity of ribosome to the mRNA: [RPi] >= Fraction * L_nt/Ribo_footprint * [mRNA] :return: """ # check if it is not CodingGene no constraint should be defined if not isinstance(the_mrna.gene, CodingGene): return ribo_footprint_size = 60 # [bp] see docstring rib_usage_vars = self.get_variables_of_type(RibosomeUsage) # Get the ribosomes assigned to this translation RPi_hat = rib_usage_vars.get_by_id(the_mrna.id) # Get the mRNA concentration mrna_hat = the_mrna.scaled_concentration polysome_size = len(the_mrna.gene.rna) / ribo_footprint_size # σ_m is the mRNA scaling factor, # σ_r is the ribosome scaling factor # [RPi] <= Lmrna/Lrib * [mRNAi] # [RPi]_hat <= Lmrna/Lrib * σ_m/σ_r * [mRNAi]_hat # # nondimensionalized: scaling_factor = the_mrna.scaling_factor / RPi_hat.scaling_factor expression_coupling = RPi_hat \ - polysome_size * scaling_factor * mrna_hat # expression_coupling = RPi_hat - polysome_size * mrna_hat # Add expression coupling self.add_constraint(kind=ExpressionCoupling, hook=the_mrna.gene, expr=expression_coupling, queue=True, ub=0) # [RPi]_hat >= basal_fraction * Lmrna/Lrib * σ_m/σ_r * [mRNAi]_hat basal_fraction = float(the_mrna.gene.min_tnsl_activity) minimal_coupling = RPi_hat \ - basal_fraction * polysome_size * scaling_factor * mrna_hat self.add_constraint(kind=MinimalCoupling, hook=the_mrna.gene, expr=minimal_coupling, queue=True, lb=0) def _constrain_polymerase(self, the_gene, basal_fraction = 0): """ Add the coupling between DNA availability and RNAP charging The number of RNAP assigned to a gene locus is lower than the number of such loci times the max number of RNAP that can sit on the locus: [RNAPi] <= loadmax_i*[# of loci]*[DNA] loadmax is : len(nucleotide chain)/size(RNAP) "The footprint of RNAP II [...] covers approximately 40 nt and is nearly symmetrical [...]." BNID 107873 Range ~40 Nucleotides Hence: [RNAPi] <= loadmax_i*[# of loci]*[DNA] In addition, it also adds a minimal binding activity for RNAP to the gene. We modeled it as a Fraction of the maximum loadmax and the Fraction depends on the affinity of RNAP to the gene, i.e. the strength of the promoter: [RNAPi] >= Fraction * L_nt/RNAP_footprint * [# of loci]*[DNA] :return: """ # check if it is not ExpressedGene no constraint should be defined if not isinstance(the_gene, ExpressedGene): return rnap_footprint_size = 40 # [bp] see docstring of ```_constrain_polymerases''' loadmax = len(the_gene.sequence) / rnap_footprint_size rnap_usage_vars = self.get_variables_of_type(RNAPUsage) if not the_gene in rnap_usage_vars: self.logger.debug('Gene {} has no RNAPUsage var. ' 'No RNAP allocation constraint added.' .format(the_gene.id)) return None # Get the RNAP assigned to this transcription RNAPi_hat = rnap_usage_vars.get_by_id(the_gene.id) # Get the number of loci n_loci = the_gene.copy_number # σ_dna is the DNA scaling factor, # σ_rp is the RNAP scaling factor # [RNAPi] <= Lgene/Lrnap * n_loci * [DNA] # [RNAPi]_hat <= Lgene/Lrnap * σ_dna/σ_rp * n_loci * [DNA]_hat # # Non-dimensionalized scaling_factor = self.dna.scaling_factor / RNAPi_hat.scaling_factor rnap_alloc = RNAPi_hat \ - loadmax * n_loci * scaling_factor * self.dna.scaled_concentration # Add expression coupling self.add_constraint(kind=RNAPAllocation, hook=the_gene, expr=rnap_alloc, queue=True, ub=0) # [RNAPi]_hat >= Lgene/Lrnap * σ_dna/σ_rp * n_loci * [DNA]_hat basal_fraction = float(the_gene.min_tcpt_activity) min_alloc = RNAPi_hat \ - basal_fraction * loadmax * n_loci * scaling_factor * \ self.dna.scaled_concentration self.add_constraint(kind=MinimalAllocation, hook=the_gene, expr=min_alloc, queue=True, lb=0) def edit_gene_copy_number(self, gene_id): """ Edits the RNAP allocation constraints if the copy number of a gene changes. :param gene_id: :return: """ the_gene = self.genes.get_by_id(gene_id) rnap_alloc = self.get_constraints_of_type(RNAPAllocation) rnap_usage = self.get_variables_of_type(RNAPUsage) if gene_id in rnap_alloc and gene_id in rnap_usage: # We need to edit the variable # Modify the polymerase constraint cons = rnap_alloc.get_by_id(gene_id) new_expr = self._get_rnap_allocation_expression(the_gene) cons.change_expr(new_expr) self.logger.debug('Changed RNAP allocation for gene {}'.format(gene_id)) else: # There is no variable, maybe the allocation constraints # have not been made yet. # self.model._add_rnap_allocation(rnap_usage, self) pass def recompute_translation(self): """ :return: """ for rib_id in self.ribosome: #1.1 Find the RNAP capacity constraint cons = self.get_constraints_of_type(TotalCapacity).get_by_id(rib_id) #1.2 Edit the Ribosome capacity constraint new_total_capacity = self._get_rib_total_capacity() cons.change_expr(new_total_capacity) def recompute_transcription(self): """ :return: """ # TODO: No need to recompute for the one we just added. # Keep tabs ? use an except list for rnap_id in self.rnap: # 1.1 Find the RNAP capacity constraint cons = self.get_constraints_of_type(TotalCapacity).get_by_id(rnap_id) # 1.2 Edit the RNAP capacity constraint new_total_capacity = self._get_rnap_total_capacity() cons.change_expr(new_total_capacity) def recompute_allocation(self): """ :return: """ #0. Does the model have allocation constraints ? interpolation_constraints = self.get_constraints_of_type(InterpolationConstraint) interpolation_variables = self.get_variables_of_type(InterpolationVariable) if not interpolation_constraints: return None #1. Remove previous allocation constraints # mRNA mrna_weight_def_cons = interpolation_constraints.get_by_id(MRNA_WEIGHT_CONS_ID) mrna_weight_var = interpolation_variables.get_by_id(MRNA_WEIGHT_VAR_ID) # Proteins prot_weight_def_cons = interpolation_constraints.get_by_id(PROT_WEIGHT_CONS_ID) prot_weight_var = interpolation_variables.get_by_id(PROT_WEIGHT_VAR_ID) #DNA dna_weight_def_cons = interpolation_constraints.get_by_id(DNA_WEIGHT_CONS_ID) dna_weight_var = interpolation_variables.get_by_id(DNA_WEIGHT_VAR_ID) for the_cons in [mrna_weight_def_cons, prot_weight_def_cons, dna_weight_def_cons]: self.remove_constraint(the_cons) #2. Apply new allocation constraints define_prot_weight_constraint(self,prot_weight_var) define_mrna_weight_constraint(self,mrna_weight_var) self.recalculate_dna(dna_weight_var) def _get_transcription_name(self, the_mrna_id): """ Given an mrna_id, gives the id of the corresponding transcription reaction :param the_mrna_id: :type the_mrna_id: str :return: str """ return '{}_transcription'.format(the_mrna_id) def _get_translation_name(self, the_peptide_id): """ Given an mrna_id, gives the id of the corresponding translation reaction :param the_peptide_id: :type the_peptide_id: str :return: str """ return '{}_translation'.format(the_peptide_id) def get_translation(self, the_peptide_id): """ Given an peptide_id, gives the translation reaction :param the_peptide_id: :type the_peptide_id: str :return: TranslationReaction """ return self.reactions.get_by_id(self._get_translation_name(the_peptide_id)) def get_transcription(self, the_peptide_id): """ Given an mrna_id, gives corresponding transcription reaction :param the_mrna_id: :type the_mrna_id: str :return: TranscriptionReaction """ return self.reactions.get_by_id(self._get_transcription_name(the_peptide_id)) def add_rnap(self, rnap, free_ratio=0): """ Adds the RNA Polymerase used by the model. :param rnap: :type rnap: Ribosome :return: """ if rnap.id in self.rnap: raise KeyError('A RNAP with this id ({}) alread exists in ' 'the model'.format(rnap.id)) else: self.rnap[rnap.id] = rnap self.add_enzymes(rnap) if free_ratio > 0: self._add_free_enzyme_ratio(rnap, free_ratio) def _populate_rnap(self): """ Once RNAP have been assigned to the model, we still need to link them to the rest of the variables and constraints. This function creates the mass balance constraint on the RNAP, as well as the total RNAP capacity constraint :return: """ # v_complexation = complexation.forward_variable \ # - complexation.reverse_variable # Create the mass balance constraint # 1 -> Write the RNAP mass balance # for this_rnap in self.rnap.values(): # self.add_mass_balance_constraint(this_rnap.complexation, this_rnap) # UPDATE: We now do this in add_rnap :) # 2 -> Parametrize all the transcription reactions with RNAP vmax for trans_rxn in self.transcription_reactions: self.apply_rnap_catalytic_constraint(trans_rxn, queue=True) self._push_queue() # 3 -> Add RNAP capacity constraint self.regenerate_variables() transcription_dict = self._sort_rnap_assignment() gene_list_tot = [] # we need to have a list of all genes for the_combination, gene_list in transcription_dict.items(): # CATCH : This is summing ~1500+ variable objects, and for a reason # sympy does not like it. Let's cut it in smaller chunks and sum # afterwards # sum_RPs = sum(self.get_variables_of_type(RibosomeUsage)) for gene_id in gene_list: if gene_id not in gene_list_tot: gene_list_tot.append(gene_id) if len(the_combination) != len(self.rnap.keys()): # at least one of the rnaps isn't assigned to these genes # per each such combination adds an inequality to make sure # that at least we have enough rnap for these genes # For nondimensionalization # usage = (sum_RMs + self.RNAPf[this_rnap.id].unscaled - this_rnap.concentration) / \ # this_rnap.scaling_factor # Create the capacity constraint usage = self._get_rnap_total_capacity(rnap_ids = the_combination, genes = gene_list) self.add_constraint(kind=TotalCapacity, hook=self, id_ = id_maker_rib_rnap(the_combination), expr=usage, lb = -self.big_M, ub = 0, ) # Finally add an equlaity constraint to make sure that rnaps # assigned to all the genes is equal to sum of all rnap usages rnap_set = frozenset([x for x in self.rnap.keys()]) # a set of rnap ids usage = self._get_rnap_total_capacity(rnap_ids = rnap_set, genes = gene_list_tot) self.add_constraint(kind=TotalCapacity, hook=self, id_=id_maker_rib_rnap(rnap_set), expr=usage, lb = 0, ub = 0, ) # update variable and constraints attributes self.regenerate_constraints() self.regenerate_variables() def _sort_rnap_assignment(self): # a function to sort genes based on their type of rnap assignment # 2 types exist: None or at least one rnap assigned to this gene all_rnap_usage = self.get_variables_of_type(RNAPUsage) rnap_set = frozenset([x for x in self.rnap.keys()]) # a set of rnap ids transcription_dict = dict() # a dict to keep different types; the keys are rnap sets and values are genes for var in all_rnap_usage: if var.hook.transcribed_by is None: # the 1st type which is handeled by assigning all rnaps to this gene this_type = rnap_set try: transcription_dict[this_type].append(var.hook.id) except KeyError: # the first time encountering this type transcription_dict[this_type] = [var.hook.id] else: # the 2nd type this_type = frozenset(var.hook.transcribed_by) try: transcription_dict[this_type].append(var.hook.id) except KeyError: # the first time encountering this type transcription_dict[this_type] = [var.hook.id] return transcription_dict def _get_rnap_total_capacity(self, rnap_ids, genes): all_rnap_usage = self.get_variables_of_type(RNAPUsage) # only for genes trascribed by thiscombination of rnaps sum_RMs = symbol_sum([x.unscaled for x in all_rnap_usage \ if x.hook.id in genes]) free_rnap = [self.get_variables_of_type(FreeEnzyme).get_by_id(rnap_id) \ for rnap_id in rnap_ids] # The total RNAP capacity constraint looks like # ΣRMi + Σ(free RNAPj) = Σ(Total RNAPj) usage = sum_RMs \ + sum([x.unscaled for x in free_rnap]) \ - sum([self.rnap[rnap_id].concentration for rnap_id in rnap_ids]) usage /= min([self.rnap[rnap_id].scaling_factor for rnap_id in rnap_ids]) return usage def apply_rnap_catalytic_constraint(self, reaction, queue): """ Given a translation reaction, apply the constraint that links it with RNAP usage :param reaction: a TranscriptionReaction :type reaction: TranscriptionReaction :return: """ # Check that we indeed have a transcription reaction assert(isinstance(reaction, TranscriptionReaction)) the_scaling_factor = min([x.scaling_factor for x in self.rnap.values()]) RMi = self.add_variable(RNAPUsage, reaction.gene, scaling_factor = the_scaling_factor, lb =0, ub=1, queue=False) # fwd_variable = reaction.forward_variable # bwd_variable = reaction.reverse_variable # v_fwd - v_bwd = ktrans/length_aa [RNAPi] # v_fwd - v_bwd - ktrans/length_aa [RNAPi] = 0 # Scaled in protein concentrations (eg mmol.gDW^-1) # σ_m is mRNA scaling factor, σ_p is protein scaling factor # v = k/L [RNAP] # σ_m * V = k/L * σ_m/σ_p * σ_p[RNAP] # v_hat = k/L * σ_m/σ_p * [RNAP]_hat # v_max = self.rnap.ktrans \ # / reaction.nucleotide_length \ # * (self._mrna_scaling/self._prot_scaling) \ # * RMi # rnap_constraint_expr = fwd_variable - bwd_variable - v_max k = sum([this_rnap.ktrans / reaction.nucleotide_length for this_rnap in self.rnap.values()]) # Scaled version: # rnap_constraint_expr = reaction.scaled_net - RMi # scaled rnap_constraint_expr = reaction.net - k * RMi.unscaled self.add_constraint(kind=SynthesisConstraint, hook=reaction, expr=rnap_constraint_expr, lb=0, ub=0,queue=queue) def _add_free_enzyme_ratio(self, enzyme, free_ratio): """ Adds free enzyme variables to the models /!\ A total capacity constraint still needs to be added # TODO: Make that more user friendly :return: """ # Safety_check: if isinstance(enzyme, Enzyme): enzyme = self.enzymes.get_by_id(enzyme.id) elif isinstance(enzyme, str): enzyme = self.enzymes.get_by_id(enzyme) else: raise TypeError("`enzyme' should be an enzyme object or an ID") # Free enzyme free_enz = \ self.add_variable(kind=FreeEnzyme, hook=enzyme, lb=0, ub=1, scaling_factor = enzyme.scaling_factor) # Add constraint on availability of free enzymes expr = free_enz - free_ratio * enzyme.variable #scaled self.add_constraint(kind=EnzymeRatio, hook=enzyme, expr=expr, lb=0, ub=0) def add_ribosome(self, ribosome, free_ratio): """ Adds the ribosome used by the model. :param ribosome: :type ribosome: Ribosome :return: """ if ribosome.id in self.ribosome: raise KeyError('A ribosome with this id ({}) already exists in ' 'the model'.format(ribosome.id)) else: self.ribosome[ribosome.id] = ribosome self.add_enzymes(ribosome) if free_ratio > 0: self._add_free_enzyme_ratio(ribosome, free_ratio) # moved here to convert rRNA genes to ExpressedGene earlier self.add_rrnas_to_rib_assembly(ribosome) # v_complexation = complexation.forward_variable \ # - complexation.reverse_variable # 1 -> Write the ribosome mass balance # Total amount of ribosome is in: # mass_balance_expr = v_complexation \ # - self.ribosome.kdeg * Rt \ # - self.mu * Rt # Create the mass balance constraint # UPDATE: We now do this in add_rnap :) # self.add_mass_balance_constraint(this_rib.complexation, # this_rib) def add_rrnas_to_rib_assembly(self, ribosome): """ Adds the ribosomal RMAs to the composition of the ribosome. This has to be done after the transcription reactions have been added, so that the rRNAs synthesis reactions exist for the mass balance :return: """ # rRNA rrnas = [] # f = ribosome.kdeg * ribosome.scaling_factor # f = self.mu_max * self.ribosome.scaling_factor for the_rrna_id in ribosome.rrna_composition: try: # it is possible to have ribosomes with the same rrnas the_rrna = self.rrnas.get_by_id('rrna_' + the_rrna_id) except KeyError: the_rrna = rRNA(id = 'rrna_' + the_rrna_id, name = 'rRNA {}'.format(the_rrna_id)) the_rrna._model = self # If it is new, make sure its gene is ExpressedGene and not CodingGene if the_rrna_id in self.genes: # If it has already added, then it is CodingGene and should be replaced new = replace_by_coding_gene(self, the_rrna_id) else: self.logger.warning('Model has no gene for rRNA {}, Adding it without any sequence'.format(the_rrna_id)) new = ExpressedGene(id= the_rrna_id, name = the_rrna_id, sequence='') self.add_genes([new]) the_rrna.ribosomes = the_rrna.ribosomes + [ribosome] rrnas.append(the_rrna) # nothing to do here! later ribosome complexation will be added to rRNA mass balance (macromolecule) # ribosome.complexation.add_metabolites( # {x:-1 for x in rrnas}, rescale=True) # {x:-1*f for x in rrnas}, rescale=False) # it is possible to have different ribosomes with the same rrnas self.rrnas += [r for r in rrnas if r not in self.rrnas] @property def Rt(self): return self.ribosome def _populate_ribosomes(self): """ Once ribosomes have been assigned to the model, we still need to link them to the rest of the variables and constraints. This function creates the mass balance constraint on the ribosomes, as well as the total ribosome capacity constraint :return: """ # 2 -> Parametrize all the translation reactions with ribosomal vmax for trans_rxn in self.translation_reactions: self.apply_ribosomal_catalytic_constraint(trans_rxn) # 3 -> Add ribosomal capacity constraint self.regenerate_variables() translation_dict = self._sort_rib_assignment() gene_list_tot = [] # we need to have a list of all genes for the_combination, gene_list in translation_dict.items(): # CATCH : This is summing ~1500+ variable objects, and for a reason # sympy does not like it. Let's cut it in smaller chunks and sum # afterwards # sum_RPs = sum(self.get_variables_of_type(RibosomeUsage)) for gene_id in gene_list: if gene_id not in gene_list_tot: gene_list_tot.append(gene_id) if len(the_combination) != len(self.ribosome.keys()): # at least one of the ribosomes isn't assigned to these genes # per each such combination adds an inequality to make sure # that at least we have enough ribosome for these genes # For nondimensionalization # ribo_usage = (sum_RPs + self.Rf.unscaled - self.ribosome.concentration) \ # / self.ribosome.scaling_factor # Create the capacity constraint ribo_usage = self._get_rib_total_capacity(rib_ids = the_combination, genes = gene_list) self.add_constraint(kind=TotalCapacity, hook=self, id_=id_maker_rib_rnap(the_combination), expr=ribo_usage, lb = -self.big_M, ub = 0, ) # Finally add an equlaity constraint to make sure that ribosomes # assigned to all the genes is equal to sum of all ribosome usages rib_set = frozenset([x for x in self.ribosome.keys()]) # a set of rib ids ribo_usage = self._get_rib_total_capacity(rib_ids = rib_set, genes = gene_list_tot) self.add_constraint(kind=TotalCapacity, hook=self, id_=id_maker_rib_rnap(rib_set), expr=ribo_usage, lb = 0, ub = 0, ) # update variable and constraints attributes self.regenerate_constraints() self.regenerate_variables() def couple_rrna_synthesis(self): mass_balance_cstrs = self.get_constraints_of_type(mRNAMassBalance) for the_rrna in self.rrnas: the_rrna_id = the_rrna.id.replace('rrna_','') # until here the rRNA mass balance is of macromolecule form: # d[rRNA]/dt = Vsyn - Vdeg - u[rRNA] # Now we should add the terms for ribosome complexation, i.e : # d[rRNA]/dt = Vsyn - Vdeg - u[rRNA] - Vcomp the_constr = mass_balance_cstrs.get_by_id(the_rrna_id) complexation_terms = symbol_sum([rib.complexation.net for rib in \ the_rrna.ribosomes]) new_expr = the_constr.constraint.expression - complexation_terms lb = the_constr.constraint.lb ub = the_constr.constraint.ub self.remove_constraint(the_constr) self.add_constraint(kind = mRNAMassBalance, hook = the_constr.hook, expr = new_expr, lb = lb, ub = ub) def _sort_rib_assignment(self): # a function to sort genes based on their type of ribosome assignment # 2 types exist: None or at least one ribosome assigned to this gene all_ribosome_usage = self.get_variables_of_type(RibosomeUsage) rib_set = frozenset([x for x in self.ribosome.keys()]) # a set of rib ids translation_dict = dict() # a dict to keep different types; the keys are ribosome sets and values are genes for var in all_ribosome_usage: if var.hook.translated_by is None: # the 1st type which is handeled by assigning all ribosomes to this gene this_type = rib_set try: translation_dict[this_type].append(var.hook.id) except KeyError: # the first time encountering this type translation_dict[this_type] = [var.hook.id] else: # the 2nd type this_type = frozenset(var.hook.translated_by) try: translation_dict[this_type].append(var.hook.id) except KeyError: # the first time encountering this type translation_dict[this_type] = [var.hook.id] return translation_dict def _get_rib_total_capacity(self, rib_ids, genes): all_ribosome_usage = self.get_variables_of_type(RibosomeUsage) # only for genes traslated by this combination of ribosomes sum_RPs = symbol_sum([x.unscaled for x in all_ribosome_usage \ if x.hook.id in genes]) free_ribosome = [self.get_variables_of_type(FreeEnzyme).get_by_id(rib_id) \ for rib_id in rib_ids] # The total RNAP capacity constraint looks like # ΣRMi + Σ(free RNAPj) = Σ(Total RNAPj) usage = sum_RPs \ + sum([x.unscaled for x in free_ribosome]) \ - sum([self.ribosome[rib_id].concentration for rib_id in rib_ids]) usage /= min([self.ribosome[rib_id].scaling_factor for rib_id in rib_ids]) return usage def apply_ribosomal_catalytic_constraint(self, reaction): """ Given a translation reaction, apply the constraint that links it with ribosome usage :param reaction: a TranslationReaction :type reaction: TranslationReaction :return: """ # Check that we indeed have a translation reaction assert(isinstance(reaction, TranslationReaction)) the_scaling_factor = min([x.scaling_factor for x in self.ribosome.values()]) RPi = self.add_variable(RibosomeUsage, reaction.gene, scaling_factor=the_scaling_factor, ub=1, lb=0) # v_fwd - v_bwd = kribo/length_aa [Ri] # v_fwd - v_bwd - kribo/length_aa [Ri] = 0 # No scaling : Flux is in protein scale, and so is the ribosome concentration # v_max = self.ribosome.kribo \ # / reaction.aminoacid_length \ # * RPi # ribo_constraint_expr = fwd_variable - bwd_variable - v_max k = sum([rib.kribo / reaction.aminoacid_length for rib in self.ribosome.values()]) # Scaled version # ribo_constraint_expr = reaction.scaled_net - RPi ribo_constraint_expr = reaction.net - k * RPi.unscaled self.add_constraint(kind=SynthesisConstraint, hook=reaction, expr=ribo_constraint_expr, lb=0, ub=0) def add_genes(self, genes): """ Oddly I could not find this method in cobra. Adds one or several genes to the model. :param genes: :type genes: Iterable(Gene) or Gene :return: """ if not hasattr(genes,'__iter__'): genes = [genes] new_genes = [x for x in genes if x.id not in self.genes] modified_genes = [x for x in genes if x.id in self.genes] for g in new_genes: self._add_gene(g) for g in modified_genes: raise NotImplementedError() def _add_gene(self, gene): gene._model = self self.genes.append(gene) #-------------------------------------------------------------------------# def sanitize_varnames(self): """ Makes variable name safe for the solvers. In particular, variables whose name start with :return: """ for met in self.metabolites: if met.id[0].isdigit(): met.id = '_'+met.id self.logger.info('Sanitized variable name {}'.format(met.id)) for rxn in self.reactions: if rxn.id[0].isdigit(): rxn.id = '_'+rxn.id self.logger.info('Sanitized variable name {}'.format(rxn.id)) Model.repair(self) def print_info(self, specific = False): """ Print information and counts for the cobra_model :return: """ if not specific: LCSBModel.print_info(self) n_reactions = len(self.reactions) n_enzymes = len(self.enzymes) n_enzymatic_reactions = len([x for x in self.reactions \ if isinstance(x, EnzymaticReaction)]) info = pd.DataFrame(columns = ['value']) info.loc['num enzymes'] = n_enzymes info.loc['num enzymatic_reactions'] = n_enzymatic_reactions info.loc['pct enzymatic_reactions'] = n_enzymatic_reactions/n_reactions*100 info.index.name = 'key' print(info) def __deepcopy__(self, memo): """ Calls self.copy() to return an independant copy of the model :param memo: :return: """ return self.copy() def copy(self): """ Pseudo-smart copy of the model using dict serialization. This builds a new model from the ground up, with independwnt variables, solver, etc. :return: """ from ..io.dict import model_from_dict, model_to_dict dictmodel = model_to_dict(self) new = model_from_dict(dictmodel) copy_solver_configuration(self, new) return new