"""Contains class Model; main class user interacts with."""
import numpy as np
import pandas as pd
import textdistance as td
import itertools as it
import copy as cp
import seaborn as sns
import joblib as jl
from opqua.internal.host import Host
from opqua.internal.vector import Vector
from opqua.internal.population import Population
from opqua.internal.setup import Setup
from opqua.internal.intervention import Intervention
from opqua.internal.gillespie import Gillespie
from opqua.internal.data import saveToDf, getPathogens, getProtections, \
getPathogenDistanceHistoryDf
from opqua.internal.plot import populationsPlot, compartmentPlot, \
compositionPlot, clustermap
[docs]
class Model(object):
"""Class defines a Model.
This is the main class that the user interacts with.
The Model class contains populations, setups, and interventions to be used
in simulation. Also contains groups of hosts/vectors for manipulations and
stores model history as snapshots for each time point.
**CONSTANTS:**
- `CB_PALETTE`: a colorblind-friendly 8-color color scheme.
- `DEF_CMAP`: a colormap object for Seaborn plots.
Attributes:
populations: dictionary with keys=population IDs, values=Population
objects.
setups: dictionary with keys=setup IDs, values=Setup objects.
interventions: contains model interventions in the order they will occur.
groups: dictionary with keys=group IDs, values=lists of hosts/vectors.
history: dictionary with keys=time values, values=Model objects that
are snapshots of Model at that timepoint.
t_var: variable that tracks time in simulations.
"""
### CONSTANTS ###
### Color scheme constants ###
CB_PALETTE = ["#E69F00", "#56B4E9", "#009E73",
"#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999"]
# www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
# http://jfly.iam.u-tokyo.ac.jp/color/
DEF_CMAP = sns.cubehelix_palette(
start=.5, rot=-.75, as_cmap=True, reverse=True
)
### CLASS CONSTRUCTOR ###
def __init__(self):
"""Create a new Model object."""
super(Model, self).__init__()
self.populations = {}
# dictionary with keys=population IDs, values=Population objects
self.setups = {}
# dictionary with keys=setup IDs, values=Setup objects
self.interventions = []
# contains model interventions in the order they will occur
self.groups = {}
# dictionary with keys=group IDs, values=lists of hosts/vectors
self.history = {}
# dictionary with keys=time values, values=Model objects that are
# snapshots of Model at that timepoint
self.global_trackers = {
# dictionary keeping track of some global indicators over all
# the course of the simulation
'num_events' : { id:0 for id in Gillespie.EVENT_IDS.values() },
# tracks the number of each kind of event in the simulation
'last_event_time' : 0,
# time point at which the last event in the simulation happened
'genomes_seen' : [],
# list of all unique genomes that have appeared in the
# simulation
'custom_conditions' : {}
# dictionary with keys=ID of custom condition, values=lists of
# times; every time True is returned by a function in
# custom_condition_trackers, the simulation time will be stored
# under the corresponding ID inside
# global_trackers['custom_condition']
}
self.custom_condition_trackers = {}
# dictionary with keys=ID of custom condition, values=functions that
# take a Model object as argument and return True or False; every
# time True is returned by a function in custom_condition_trackers,
# the simulation time will be stored under the corresponding ID
# inside global_trackers['custom_condition']
self.t_var = 0 # used as time variable during simulation
### MODEL METHODS ###
### Model initialization and simulation: ###
[docs]
def setRandomSeed(self, seed):
"""Set random seed for numpy random number generator.
Arguments:
seed (int): int for the random seed to be passed to numpy.
"""
np.random.seed(seed)
[docs]
def newSetup(
self, name, preset=None,
num_loci=None, possible_alleles=None,
fitnessHost=None, contactHost=None, receiveContactHost=None,
mortalityHost=None, natalityHost=None,
recoveryHost=None, migrationHost=None,
populationContactHost=None, receivePopulationContactHost=None,
mutationHost=None,
recombinationHost=None, fitnessVector=None,
contactVector=None, receiveContactVector=None, mortalityVector=None,
natalityVector=None, recoveryVector=None,
migrationVector=None, populationContactVector=None,
receivePopulationContactVector=None,
mutationVector=None, recombinationVector=None,
contact_rate_host_vector=None,
transmission_efficiency_host_vector=None,
transmission_efficiency_vector_host=None,
contact_rate_host_host=None,
transmission_efficiency_host_host=None,
mean_inoculum_host=None, mean_inoculum_vector=None,
recovery_rate_host=None, recovery_rate_vector=None,
mortality_rate_host=None, mortality_rate_vector=None,
recombine_in_host=None, recombine_in_vector=None,
num_crossover_host=None, num_crossover_vector=None,
mutate_in_host=None, mutate_in_vector=None,
death_rate_host=None, death_rate_vector=None,
birth_rate_host=None, birth_rate_vector=None,
vertical_transmission_host=None, vertical_transmission_vector=None,
inherit_protection_host=None, inherit_protection_vector=None,
protection_upon_recovery_host=None,
protection_upon_recovery_vector=None):
"""Create a new `Setup`, save it in setups dict under given name.
Two preset setups exist: "vector-borne" and "host-host". You may select
one of the preset setups with the preset keyword argument and then
modify individual parameters with additional keyword arguments, without
having to specify all of them.
**"host-host":**
- `num_loci` = 10
- `possible_alleles` = 'ATCG'
- `fitnessHost` = (lambda g: 1)
- `contactHost` = (lambda g: 1)
- `receiveContactHost` = (lambda g: 1)
- `mortalityHost` = (lambda g: 1)
- `natalityHost` = (lambda g: 1)
- `recoveryHost` = (lambda g: 1)
- `migrationHost` = (lambda g: 1)
- `populationContactHost` = (lambda g: 1)
- `receivePopulationContactHost` = (lambda g: 1)
- `mutationHost` = (lambda g: 1)
- `recombinationHost` = (lambda g: 1)
- `fitnessVector` = (lambda g: 1)
- `contactVector` = (lambda g: 1)
- `receiveContactVector` = (lambda g: 1)
- `mortalityVector` = (lambda g: 1)
- `natalityVector` = (lambda g: 1)
- `recoveryVector` = (lambda g: 1)
- `migrationVector` = (lambda g: 1)
- `populationContactVector` = (lambda g: 1)
- `receivePopulationContactVector` = (lambda g: 1)
- `mutationVector` = (lambda g: 1)
- `recombinationVector` = (lambda g: 1)
- `contact_rate_host_vector` = 0
- `transmission_efficiency_host_vector` = 0
- `transmission_efficiency_vector_host` = 0
- `contact_rate_host_host` = 2e-1
- `transmission_efficiency_host_host` = 1
- `mean_inoculum_host` = 1e1
- `mean_inoculum_vector` = 0
- `recovery_rate_host` = 1e-1
- `recovery_rate_vector` = 0
- `mortality_rate_host` = 0
- `mortality_rate_vector` = 0
- `recombine_in_host` = 1e-4
- `recombine_in_vector` = 0
- `num_crossover_host` = 1
- `num_crossover_vector` = 0
- `mutate_in_host` = 1e-6
- `mutate_in_vector` = 0
- `death_rate_host` = 0
- `death_rate_vector` = 0
- `birth_rate_host` = 0
- `birth_rate_vector` = 0
- `vertical_transmission_host` = 0
- `vertical_transmission_vector` = 0
- `inherit_protection_host` = 0
- `inherit_protection_vector` = 0
- `protection_upon_recovery_host` = None
- `protection_upon_recovery_vector` = None
**"vector-borne":**
- `num_loci` = 10
- `possible_alleles` = 'ATCG'
- `fitnessHost` = (lambda g: 1)
- `contactHost` = (lambda g: 1)
- `receiveContactHost` = (lambda g: 1)
- `mortalityHost` = (lambda g: 1)
- `natalityHost` = (lambda g: 1)
- `recoveryHost` = (lambda g: 1)
- `migrationHost` = (lambda g: 1)
- `populationContactHost` = (lambda g: 1)
- `receivePopulationContactHost` = (lambda g: 1)
- `mutationHost` = (lambda g: 1)
- `recombinationHost` = (lambda g: 1)
- `fitnessVector` = (lambda g: 1)
- `contactVector` = (lambda g: 1)
- `receiveContactVector` = (lambda g: 1)
- `mortalityVector` = (lambda g: 1)
- `natalityVector` = (lambda g: 1)
- `recoveryVector` = (lambda g: 1)
- `migrationVector` = (lambda g: 1)
- `populationContactVector` = (lambda g: 1)
- `receivePopulationContactVector` = (lambda g: 1)
- `mutationVector` = (lambda g: 1)
- `recombinationVector` = (lambda g: 1)
- `contact_rate_host_vector` = 2e-1
- `transmission_efficiency_host_vector` = 1
- `transmission_efficiency_vector_host` = 1
- `contact_rate_host_host` = 0
- `transmission_efficiency_host_host` = 0
- `mean_inoculum_host` = 1e2
- `mean_inoculum_vector` = 1e0
- `recovery_rate_host` = 1e-1
- `recovery_rate_vector` = 1e-1
- `mortality_rate_host` = 0
- `mortality_rate_vector` = 0
- `recombine_in_host` = 0
- `recombine_in_vector` = 1e-4
- `num_crossover_host` = 0
- `num_crossover_vector` = 1
- `mutate_in_host` = 1e-6
- `mutate_in_vector` = 0
- `death_rate_host` = 0
- `death_rate_vector` = 0
- `birth_rate_host` = 0
- `birth_rate_vector` = 0
- `vertical_transmission_host` = 0
- `vertical_transmission_vector` = 0
- `inherit_protection_host` = 0
- `inherit_protection_vector` = 0
- `protection_upon_recovery_host` = None
- `protection_upon_recovery_vector` = None
Arguments:
name (String): name of setup to be used as a key in model setups dictionary.
Keyword arguments:
preset (None or String): preset setup to be used: "vector-borne" or "host-host", if
None, must define all other keyword arguments. Defaults to None.
num_loci (int>0): length of each pathogen genome string.
possible_alleles (String or list of Strings with num_loci elements): set of possible
characters in all genome string, or at each position in genome string.
fitnessHost (callable, takes a String argument and returns a number >= 0): function
that evaluates relative fitness in head-to-head competition for different genomes
within the same host.
contactHost (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying probability of a given host being chosen to be the
infector in a contact event, based on genome sequence of pathogen.
receiveContactHost (callable, takes a String argument and returns a number 0-1): function
that returns coefficient modifying probability of a given host being chosen to be
the infected in a contact event, based on genome sequence of pathogen.
mortalityHost (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying death rate for a given host, based on genome sequence
of pathogen.
natalityHost (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying birth rate for a given host, based on genome sequence
of pathogen.
recoveryHost (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying recovery rate for a given host based on genome sequence
of pathogen.
migrationHost (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying migration rate for a given host based on genome sequence
of pathogen.
populationContactHost (callable, takes a String argument and returns a number 0-1): function
that returns coefficient modifying population contact rate for a given host based on
genome sequence of pathogen.
mutationHost (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying mutation rate for a given host based on genome sequence
of pathogen.
recombinationHost (callable, takes a String argument and returns a number 0-1): function
that returns coefficient modifying recombination rate for a given host based on genome
sequence of pathogen.
fitnessVector (callable, takes a String argument and returns a number >=0): function that
evaluates relative fitness in head-to-head competition for different genomes within
the same vector.
contactVector (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying probability of a given vector being chosen to be the
infector in a contact event, based on genome sequence of pathogen.
receiveContactVector (callable, takes a String argument and returns a number 0-1): function
that returns coefficient modifying probability of a given vector being chosen to be the
infected in a contact event, based on genome sequence of pathogen.
mortalityVector (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying death rate for a given vector, based on genome sequence
of pathogen.
natalityVector (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying birth rate for a given vector, based on genome sequence
of pathogen.
recoveryVector (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying recovery rate for a given vector based on genome sequence
of pathogen.
migrationVector (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying migration rate for a given vector based on genome sequence
of pathogen.
populationContactVector (callable, takes a String argument and returns a number 0-1): function
that returns coefficient modifying population contact rate for a given vector based on
genome sequence of pathogen.
mutationVector (callable, takes a String argument and returns a number 0-1): function that
returns coefficient modifying mutation rate for a given vector based on genome sequence
of pathogen.
recombinationVector (callable, takes a String argument and returns a number 0-1): function
that returns coefficient modifying recombination rate for a given vector based on genome
sequence of pathogen.
contact_rate_host_vector (number >= 0): rate of host-vector contact events, not necessarily
transmission, assumes constant population density; evts/time.
transmission_efficiency_host_vector (float): fraction of host-vector contacts
that result in successful transmission.
transmission_efficiency_vector_host (float): fraction of vector-host contacts
that result in successful transmission.
contact_rate_host_host (number >= 0): rate of host-host contact events, not
necessarily transmission, assumes constant population density; evts/time.
transmission_efficiency_host_host (float): fraction of host-host contacts
that result in successful transmission.
mean_inoculum_host (int >= 0): mean number of pathogens that are transmitted from
a vector or host into a new host during a contact event.
mean_inoculum_vector (int >= 0) mean number of pathogens that are transmitted
from a host to a vector during a contact event.
recovery_rate_host (number >= 0): rate at which hosts clear all pathogens;
1/time.
recovery_rate_vector (number >= 0): rate at which vectors clear all pathogens
1/time.
recovery_rate_vector (number >= 0): rate at which vectors clear all pathogens
1/time.
mortality_rate_host (number 0-1): rate at which infected hosts die from disease.
mortality_rate_vector (number 0-1): rate at which infected vectors die from
disease.
recombine_in_host (number >= 0): rate at which recombination occurs in host;
evts/time.
recombine_in_vector (number >= 0): rate at which recombination occurs in vector;
evts/time.
num_crossover_host (number >= 0): mean of a Poisson distribution modeling the number
of crossover events of host recombination events.
num_crossover_vector (number >= 0): mean of a Poisson distribution modeling the
number of crossover events of vector recombination events.
mutate_in_host (number >= 0): rate at which mutation occurs in host; evts/time.
mutate_in_vector (number >= 0): rate at which mutation occurs in vector; evts/time.
death_rate_host (number >= 0): natural host death rate; 1/time.
death_rate_vector (number >= 0): natural vector death rate; 1/time.
birth_rate_host (number >= 0): infected host birth rate; 1/time.
birth_rate_vector (number >= 0): infected vector birth rate; 1/time.
vertical_transmission_host (number 0-1): probability that a host is infected by its
parent at birth.
vertical_transmission_vector (number 0-1): probability that a vector is infected by
its parent at birth.
inherit_protection_host (number 0-1): probability that a host inherits all
protection sequences from its parent.
inherit_protection_vector (number 0-1): probability that a vector inherits all
protection sequences from its parent.
protection_upon_recovery_host (None or array-like of length 2 with int 0-num_loci): defines
indexes in genome string that define substring to be added to host protection sequences
after recovery.
protection_upon_recovery_vector (None or array-like of length 2 with int 0-num_loci): defines
indexes in genome string that define substring to be added to vector protection sequences
after recovery.
"""
if preset == "vector-borne":
num_loci = 10 if num_loci is None else num_loci
possible_alleles = \
'ATCG' if possible_alleles is None else possible_alleles
fitnessHost = (lambda g: 1) if fitnessHost is None else fitnessHost
contactHost = (lambda g: 1) if contactHost is None else contactHost
receiveContactHost = \
(lambda g: 1) if receiveContactHost is None else receiveContactHost
mortalityHost = \
(lambda g: 1) if mortalityHost is None else mortalityHost
natalityHost = \
(lambda g: 1) if natalityHost is None else natalityHost
recoveryHost = \
(lambda g: 1) if recoveryHost is None else recoveryHost
migrationHost = \
(lambda g: 1) if migrationHost is None else migrationHost
populationContactHost = \
(lambda g: 1) if populationContactHost is None else populationContactHost
receivePopulationContactHost = \
(lambda g: 1) if receivePopulationContactHost is None else receivePopulationContactHost
mutationHost = \
(lambda g: 1) if mutationHost is None else mutationHost
recombinationHost = \
(lambda g: 1) if recombinationHost is None else recombinationHost
fitnessVector = \
(lambda g: 1) if fitnessVector is None else fitnessVector
contactVector = \
(lambda g: 1) if contactVector is None else contactVector
receiveContactVector = \
(lambda g: 1) if receiveContactVector is None else receiveContactVector
mortalityVector = \
(lambda g: 1) if mortalityVector is None else mortalityVector
natalityVector = \
(lambda g: 1) if natalityVector is None else natalityVector
recoveryVector = \
(lambda g: 1) if recoveryVector is None else recoveryVector
migrationVector = \
(lambda g: 1) if migrationVector is None else migrationVector
populationContactVector = \
(lambda g: 1) if populationContactVector is None else populationContactVector
receivePopulationContactVector = \
(lambda g: 1) if receivePopulationContactVector is None else receivePopulationContactVector
mutationVector = \
(lambda g: 1) if mutationVector is None else mutationVector
recombinationVector = \
(lambda g: 1) if recombinationVector is None else recombinationVector
contact_rate_host_vector = \
2e-1 if contact_rate_host_vector is None \
else contact_rate_host_vector
transmission_efficiency_host_vector = \
1 if transmission_efficiency_host_vector is None \
else transmission_efficiency_host_vector
transmission_efficiency_vector_host = \
1 if transmission_efficiency_vector_host is None \
else transmission_efficiency_vector_host
contact_rate_host_host = \
0 if contact_rate_host_host is None else contact_rate_host_host
transmission_efficiency_host_host = \
0 if transmission_efficiency_host_host is None \
else transmission_efficiency_host_host
mean_inoculum_host = \
1e2 if mean_inoculum_host is None else mean_inoculum_host
mean_inoculum_vector = \
1 if mean_inoculum_vector is None else mean_inoculum_vector
recovery_rate_host = \
1e-1 if recovery_rate_host is None else recovery_rate_host
recovery_rate_vector = \
1e-1 if recovery_rate_vector is None else recovery_rate_vector
mortality_rate_host = \
0 if mortality_rate_host is None else mortality_rate_host
mortality_rate_vector = \
0 if mortality_rate_vector is None else mortality_rate_vector
recombine_in_host = \
0 if recombine_in_host is None else recombine_in_host
recombine_in_vector = \
1e-4 if recombine_in_vector is None else recombine_in_vector
num_crossover_host = 0 \
if num_crossover_host is None else num_crossover_host
num_crossover_vector = \
1 if num_crossover_vector is None else num_crossover_vector
mutate_in_host = 1e-6 if mutate_in_host is None else mutate_in_host
mutate_in_vector = \
0 if mutate_in_vector is None else mutate_in_vector
death_rate_host = 0 if death_rate_host is None else death_rate_host
death_rate_vector = \
0 if death_rate_vector is None else death_rate_vector
birth_rate_host = 0 if birth_rate_host is None else birth_rate_host
birth_rate_vector = \
0 if birth_rate_vector is None else birth_rate_vector
vertical_transmission_host = \
0 if vertical_transmission_host is None \
else vertical_transmission_host
vertical_transmission_vector = \
0 if vertical_transmission_vector is None \
else vertical_transmission_vector
inherit_protection_host = \
0 if inherit_protection_host is None \
else inherit_protection_host
inherit_protection_vector = \
0 if inherit_protection_vector is None \
else inherit_protection_vector
protection_upon_recovery_host = protection_upon_recovery_host
protection_upon_recovery_vector = protection_upon_recovery_vector
elif preset == "host-host":
num_loci = 10 if num_loci is None else num_loci
possible_alleles = \
'ATCG' if possible_alleles is None else possible_alleles
fitnessHost = (lambda g: 1) if fitnessHost is None else fitnessHost
contactHost = (lambda g: 1) if contactHost is None else contactHost
receiveContactHost = \
(lambda g: 1) if receiveContactHost is None else receiveContactHost
mortalityHost = \
(lambda g: 1) if mortalityHost is None else mortalityHost
natalityHost = \
(lambda g: 1) if natalityHost is None else natalityHost
recoveryHost = \
(lambda g: 1) if recoveryHost is None else recoveryHost
migrationHost = \
(lambda g: 1) if migrationHost is None else migrationHost
populationContactHost = \
(lambda g: 1) if populationContactHost is None else populationContactHost
receivePopulationContactHost = \
(lambda g: 1) if receivePopulationContactHost is None else receivePopulationContactHost
mutationHost = \
(lambda g: 1) if mutationHost is None else mutationHost
recombinationHost = \
(lambda g: 1) if recombinationHost is None else recombinationHost
fitnessVector = \
(lambda g: 1) if fitnessVector is None else fitnessVector
contactVector = \
(lambda g: 1) if contactVector is None else contactVector
receiveContactVector = \
(lambda g: 1) if receiveContactVector is None else receiveContactVector
mortalityVector = \
(lambda g: 1) if mortalityVector is None else mortalityVector
natalityVector = \
(lambda g: 1) if natalityVector is None else natalityVector
recoveryVector = \
(lambda g: 1) if recoveryVector is None else recoveryVector
mortality_rate_host = \
0 if mortality_rate_host is None else mortality_rate_host
mortality_rate_vector = \
0 if mortality_rate_vector is None else mortality_rate_vector
migrationVector = \
(lambda g: 1) if migrationVector is None else migrationVector
populationContactVector = \
(lambda g: 1) if populationContactVector is None else populationContactVector
receivePopulationContactVector = \
(lambda g: 1) if receivePopulationContactVector is None else receivePopulationContactVector
mutationVector = \
(lambda g: 1) if mutationVector is None else mutationVector
recombinationVector = \
(lambda g: 1) if recombinationVector is None else recombinationVector
contact_rate_host_vector = \
0 if contact_rate_host_vector is None \
else contact_rate_host_vector
transmission_efficiency_host_vector = \
0 if transmission_efficiency_host_vector is None \
else transmission_efficiency_host_vector
transmission_efficiency_vector_host = \
0 if transmission_efficiency_vector_host is None \
else transmission_efficiency_vector_host
contact_rate_host_host = \
2e-1 if contact_rate_host_host is None \
else contact_rate_host_host
transmission_efficiency_host_host = \
1 if transmission_efficiency_host_host is None \
else transmission_efficiency_host_host
mean_inoculum_host = \
1e1 if mean_inoculum_host is None else mean_inoculum_host
mean_inoculum_vector = \
0 if mean_inoculum_vector is None else mean_inoculum_vector
recovery_rate_host = \
1e-1 if recovery_rate_host is None else recovery_rate_host
recovery_rate_vector = \
0 if recovery_rate_vector is None else recovery_rate_vector
recombine_in_host = \
1e-4 if recombine_in_host is None else recombine_in_host
recombine_in_vector = \
0 if recombine_in_vector is None else recombine_in_vector
num_crossover_host = 1 \
if num_crossover_host is None else num_crossover_host
num_crossover_vector = \
0 if num_crossover_vector is None else num_crossover_vector
mutate_in_host = \
1e-6 if mutate_in_host is None else mutate_in_host
mutate_in_vector = \
0 if mutate_in_vector is None else mutate_in_vector
death_rate_host = \
0 if death_rate_host is None else death_rate_host
death_rate_vector = \
0 if death_rate_vector is None else death_rate_vector
birth_rate_host = 0 if birth_rate_host is None else birth_rate_host
birth_rate_vector = \
0 if birth_rate_vector is None else birth_rate_vector
vertical_transmission_host = \
0 if vertical_transmission_host is None \
else vertical_transmission_host
vertical_transmission_vector = \
0 if vertical_transmission_vector is None \
else vertical_transmission_vector
inherit_protection_host = \
0 if inherit_protection_host is None \
else inherit_protection_host
inherit_protection_vector = \
0 if inherit_protection_vector is None \
else inherit_protection_vector
protection_upon_recovery_host = protection_upon_recovery_host
protection_upon_recovery_vector = protection_upon_recovery_vector
self.setups[name] = Setup(
name,
num_loci, possible_alleles,
fitnessHost, contactHost, receiveContactHost, mortalityHost,
natalityHost, recoveryHost, migrationHost,
populationContactHost, receivePopulationContactHost,
mutationHost, recombinationHost,
fitnessVector, contactVector, receiveContactVector, mortalityVector,
natalityVector,recoveryVector, migrationVector,
populationContactVector, receivePopulationContactVector,
mutationVector, recombinationVector,
contact_rate_host_vector,
transmission_efficiency_host_vector,
transmission_efficiency_vector_host,
contact_rate_host_host,
transmission_efficiency_host_host,
mean_inoculum_host, mean_inoculum_vector,
recovery_rate_host, recovery_rate_vector,
mortality_rate_host,mortality_rate_vector,
recombine_in_host, recombine_in_vector,
num_crossover_host, num_crossover_vector,
mutate_in_host, mutate_in_vector, death_rate_host,death_rate_vector,
birth_rate_host, birth_rate_vector,
vertical_transmission_host, vertical_transmission_vector,
inherit_protection_host, inherit_protection_vector,
protection_upon_recovery_host, protection_upon_recovery_vector
)
[docs]
def newIntervention(self, time, method_name, args):
"""Create a new intervention to be carried out at a specific time.
Arguments:
time (number >= 0): time at which intervention will take place.
method_name (String): intervention to be carried out, must correspond to the
name of a method of the `Model` object.
args (array-like): contains arguments for function in positinal order.
"""
self.interventions.append( Intervention(time, method_name, args, self) )
[docs]
def addCustomConditionTracker(self, condition_id, trackerFunction):
"""Add a function to track occurrences of custom events in simulation.
Adds function `trackerFunction` to dictionary `custom_condition_trackers`
under key `condition_id`. Function `trackerFunction` will be executed at
every event in the simulation. Every time True is returned,
the simulation time will be stored under the corresponding `condition_id`
key inside `global_trackers['custom_condition']`.
Arguments:
condition_id (String): ID of this specific condition-
trackerFunction (callable): function that take a `Model` object as argument
and returns True or False.
"""
self.custom_condition_trackers['condition_id'] = trackerFunction
self.global_trackers['custom_conditions']['condition_id'] = []
[docs]
def run(self,t0,tf,time_sampling=0,host_sampling=0,vector_sampling=0):
"""Simulate model for a specified time between two time points.
Simulates a time series using the Gillespie algorithm.
Saves a dictionary containing model state history, with `keys=times` and
`values=Model` objects with model snapshot at that time point under this
model's history attribute.
Arguments:
t0 (number >= 0): initial time point to start simulation at.
tf (number >= 0): initial time point to end simulation at.
Keyword arguments:
time_sampling (int): how many events to skip before saving a snapshot of the
system state (saves all by default), if <0, saves only final state. Defaults to 0.
host_sampling (int >= 0): how many hosts to skip before saving one in a snapshot
of the system state (saves all by default). Defaults to 0.
vector_sampling (int >= 0): how many vectors to skip before saving one in a
snapshot of the system state (saves all by default). Defaults to 0.
"""
sim = Gillespie(self)
self.history = sim.run(
t0, tf, time_sampling, host_sampling, vector_sampling
)
[docs]
def runReplicates(
self,t0,tf,replicates,host_sampling=0,vector_sampling=0,n_cores=0,
**kwargs):
"""Simulate replicates of a model, save only end results.
Simulates replicates of a time series using the Gillespie algorithm.
Saves a dictionary containing model end state state, with `keys=times` and
`values=Model` objects with model snapshot. The time is the final timepoint.
Arguments:
t0 (number >= 0): initial time point to start simulation at.
tf (number >= 0): initial time point to end simulation at.
replicates (int >= 1): how many replicates to simulate.
Keyword arguments:
host_sampling (int >= 0): how many hosts to skip before saving one in a snapshot
of the system state (saves all by default). Defaults to 0.
vector_sampling (int >= 0): how many vectors to skip before saving one in a
snapshot of the system state (saves all by default). Defaults to 0.
n_cores (int >= 0): number of cores to parallelize file export across, if 0, all
cores available are used. Defaults to 0.
**kwargs: additional arguents for joblib multiprocessing.
Returns:
List of `Model` objects with the final snapshots.
"""
if not n_cores:
n_cores = jl.cpu_count()
print('Starting parallel simulations...')
def run(sim_num):
model = self.deepCopy()
sim = Gillespie(model)
mod = sim.run(
t0,tf,time_sampling=-1,
host_sampling=host_sampling,vector_sampling=vector_sampling
)[tf]
mod.history = { tf:mod }
return mod
return jl.Parallel(n_jobs=n_cores, verbose=10, **kwargs) (
jl.delayed( run ) (_) for _ in range(replicates)
)
[docs]
def runParamSweep(
self,t0,tf,setup_id,
param_sweep_dic={},
host_population_size_sweep={}, vector_population_size_sweep={},
host_migration_sweep_dic={}, vector_migration_sweep_dic={},
host_host_population_contact_sweep_dic={},
host_vector_population_contact_sweep_dic={},
vector_host_population_contact_sweep_dic={},
replicates=1,host_sampling=0,vector_sampling=0,n_cores=0,
**kwargs):
"""Simulate a parameter sweep with a model, save only end results.
Simulates variations of a time series using the Gillespie algorithm.
Saves a dictionary containing model end state state, with `keys=times` and
`values=Model` objects with model snapshot. The time is the final
timepoint.
Arguments:
t0 (number >= 0): initial time point to start simulation at.
tf (number >= 0): initial time point to end simulation at.
setup_id (String): ID of setup to be assigned.
Keyword arguments:
param_sweep_dic -- dictionary with keys=parameter names (attributes of
Setup), values=list of values for parameter (list, class of elements
depends on parameter)
host_population_size_sweep -- dictionary with keys=population IDs
(Strings), values=list of values with host population sizes
(must be greater than original size set for each population, list of
numbers)
vector_population_size_sweep -- dictionary with keys=population IDs
(Strings), values=list of values with vector population sizes
(must be greater than original size set for each population, list of
numbers)
host_migration_sweep_dic -- dictionary with keys=population IDs of
origin and destination, separated by a colon ';' (Strings),
values=list of values (list of numbers)
vector_migration_sweep_dic -- dictionary with keys=population IDs of
origin and destination, separated by a colon ';' (Strings),
values=list of values (list of numbers)
host_host_population_contact_sweep_dic -- dictionary with
keys=population IDs of origin and destination, separated by a colon
';' (Strings), values=list of values (list of numbers)
host_vector_population_contact_sweep_dic -- dictionary with
keys=population IDs of origin and destination, separated by a colon
';' (Strings), values=list of values (list of numbers)
vector_host_population_contact_sweep_dic -- dictionary with
keys=population IDs of origin and destination, separated by a colon
';' (Strings), values=list of values (list of numbers)
replicates -- how many replicates to simulate (int >= 1)
host_sampling -- how many hosts to skip before saving one in a snapshot
of the system state (saves all by default) (int >= 0, default 0)
vector_sampling -- how many vectors to skip before saving one in a
snapshot of the system state (saves all by default)
(int >= 0, default 0)
n_cores -- number of cores to parallelize file export across, if 0, all
cores available are used (default 0; int >= 0)
**kwargs -- additional arguents for joblib multiprocessing
Returns:
DataFrame with parameter combinations, list of Model objects with the
final snapshots.
"""
if not n_cores:
n_cores = jl.cpu_count()
for p in host_population_size_sweep:
param_sweep_dic['pop_size_host:'+p] = host_population_size_sweep[p]
for p in vector_population_size_sweep:
param_sweep_dic['pop_size_vector:'+p] = vector_population_size_sweep[p]
for p in host_migration_sweep_dic:
param_sweep_dic['migrate_host:'+p] = host_migration_sweep_dic[p]
for p in vector_migration_sweep_dic:
param_sweep_dic['migrate_vector:'+p] = vector_migration_sweep_dic[p]
for p in host_host_population_contact_sweep_dic:
param_sweep_dic['population_contact_host_host:'+p] \
= host_host_population_contact_sweep_dic[p]
for p in host_vector_population_contact_sweep_dic:
param_sweep_dic['population_contact_host_vector:'+p] \
= host_vector_population_contact_sweep_dic[p]
for p in vector_host_population_contact_sweep_dic:
param_sweep_dic['population_contact_vector_host:'+p] \
= vector_host_population_contact_sweep_dic[p]
if len(param_sweep_dic) == 0:
raise ValueError(
'param_sweep_dic, host_migration_sweep_dic, vector_migration_sweep_dic, host_host_population_contact_sweep_dic, host_vector_population_contact_sweep_dic, and vector_host_population_contact_sweep_dic cannot all be empty in runParamSweep()'
)
params = param_sweep_dic.keys()
value_lists = [ param_sweep_dic[param] for param in params ]
combinations = list( it.product( *value_lists ) ) * replicates
param_df = pd.DataFrame(combinations)
param_df.columns = params
results = {}
print('Starting parallel simulations...')
def run(param_values):
model = self.deepCopy()
for i,param_name in enumerate(params):
if ':' in param_name:
pops = param_name.split(':')[1].split(';')
if 'pop_size_host:' in param_name:
pop = cp.deepcopy( model.populations[pops[0]] )
add_hosts = param_values[i] - len(pop.hosts)
if add_hosts < 0:
raise ValueError(
'Value ' + str(param_values[i]) + ' assigned to ' + pops[0] + ' in host_population_size_sweep must be greater or equal to the population\'s original number of hosts.'
)
else:
pop.addHosts(add_hosts)
model.populations[pops[0]] = pop
pop.model = model
elif 'pop_size_vector:' in param_name:
pop = cp.deepcopy( model.populations[pops[0]] )
add_vectors = param_values[i] - len(pop.vectors)
if add_vectors < 0:
raise ValueError(
'Values ' + str(param_values[i]) + ' assigned to ' + pops[0] + ' in vector_population_size_sweep must be greater or equal to the population\'s original number of vectors.'
)
else:
pop.addVectors(add_vectors)
model.populations[pops[0]] = pop
pop.model = model
elif 'migrate_host:' in param_name:
new_pops = [
cp.deepcopy( model.populations[pops[0]] ),
cp.deepcopy( model.populations[pops[1]] )
]
new_pops[0].model = model
new_pops[1].model = model
model.populations[pops[0]] = new_pops[0]
model.populations[pops[1]] = new_pops[1]
model.linkPopulationsHostMigration(
new_pops[0],new_pops[1],param_values[i]
)
elif 'migrate_vector:' in param_name:
new_pops = [
cp.deepcopy( model.populations[pops[0]] ),
cp.deepcopy( model.populations[pops[1]] )
]
new_pops[0].model = model
new_pops[1].model = model
model.populations[pops[0]] = new_pops[0]
model.populations[pops[1]] = new_pops[1]
model.linkPopulationsVectorMigration(
pops[0],pops[1],param_values[i]
)
elif 'population_contact_host_host:' in param_name:
new_pops = [
cp.deepcopy( model.populations[pops[0]] ),
cp.deepcopy( model.populations[pops[1]] )
]
new_pops[0].model = model
new_pops[1].model = model
model.populations[pops[0]] = new_pops[0]
model.populations[pops[1]] = new_pops[1]
model.linkPopulationsHostHostContact(
pops[0],pops[1],param_values[i]
)
model.linkPopulationsHostHostContact(
pops[1],pops[0],param_values[i]
)
elif 'population_contact_host_vector:' in param_name:
new_pops = [
cp.deepcopy( model.populations[pops[0]] ),
cp.deepcopy( model.populations[pops[1]] )
]
new_pops[0].model = model
new_pops[1].model = model
model.populations[pops[0]] = new_pops[0]
model.populations[pops[1]] = new_pops[1]
model.linkPopulationsHostVectorContact(
pops[0],pops[1],param_values[i]
)
model.linkPopulationsHostVectorContact(
pops[1],pops[0],param_values[i]
)
elif 'population_contact_vector_host:' in param_name:
new_pops = [
cp.deepcopy( model.populations[pops[0]] ),
cp.deepcopy( model.populations[pops[1]] )
]
new_pops[0].model = model
new_pops[1].model = model
model.populations[pops[0]] = new_pops[0]
model.populations[pops[1]] = new_pops[1]
model.linkPopulationsVectorHostContact(
pops[0],pops[1],param_values[i]
)
model.linkPopulationsVectorHostContact(
pops[1],pops[0],param_values[i]
)
else:
setattr(model.setups[setup_id],param_name,param_values[i])
for name,pop in model.populations.items():
pop.setSetup( model.setups[pop.setup.id] )
sim = Gillespie(model)
mod = sim.run(
t0,tf,time_sampling=-1,
host_sampling=host_sampling,vector_sampling=vector_sampling
)[tf]
mod.history = { tf:mod }
return mod
return (
param_df,
jl.Parallel(n_jobs=n_cores, verbose=10, **kwargs) (
jl.delayed( run ) (param_values)
for param_values in combinations
)
)
[docs]
def copyState(self,host_sampling=0,vector_sampling=0):
"""Returns a slimmed-down representation of the current model state.
Keyword arguments:
host_sampling (int >= 0): how many hosts to skip before saving one in a snapshot
of the system state (saves all by default). Defaults to 0.
vector_sampling (int >= 0): how many vectors to skip before saving one in a
snapshot of the system state (saves all by default). Defaults to 0.
Returns:
Model object with current population host and vector lists.
"""
copy = Model()
copy.populations = {
id: p.copyState(host_sampling,vector_sampling)
for id,p in self.populations.items()
}
return copy
[docs]
def deepCopy(self):
"""Returns a full copy of the current model with inner references.
Returns:
Copied Model object.
"""
model = cp.deepcopy(self)
for intervention in model.interventions:
intervention.model = model
for pop in model.populations:
model.populations[pop].model = model
for h in model.populations[pop].hosts:
h.population = model.populations[pop]
for v in model.populations[pop].vectors:
v.population = model.populations[pop]
return model
### Output and Plots: ###
[docs]
def saveToDataFrame(self,save_to_file,n_cores=0,**kwargs):
"""Save status of model to dataframe, write to file location given.
Creates a pandas Dataframe in long format with the given model history,
with one host or vector per simulation time in each row, and columns:
- Time - simulation time of entry
- Population - ID of this host/vector's population
- Organism - host/vector
- ID - ID of host/vector
- Pathogens - all genomes present in this host/vector separated by ';'
- Protection - all genomes present in this host/vector separated by ';'
- Alive - whether host/vector is alive at this time, True/False
Arguments:
save_to_file (String): file path and name to save model data under.
Keyword arguments:
n_cores (int >= 0): number of cores to parallelize file export across, if 0, all
cores available are used. Defaults to 0.
**kwargs: additional arguents for joblib multiprocessing.
Returns:
`pandas dataframe` with model history as described above.
"""
data = saveToDf(
self.history,save_to_file,n_cores,**kwargs
)
return data
[docs]
def getPathogens(self, dat, save_to_file=""):
"""Create Dataframe with counts for all pathogen genomes in data.
Returns sorted pandas Dataframe with counts for occurrences of all
pathogen genomes in data passed.
Arguments:
data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function.
Keyword arguments:
save_to_file (String): file path and name to save model data under, no saving
occurs if empty string. Defaults to "".
Returns:
`pandas dataframe` with Series as described above.
"""
return getPathogens(dat, save_to_file=save_to_file)
[docs]
def getProtections(self, dat, save_to_file=""):
"""Create Dataframe with counts for all protection sequences in data.
Returns sorted `pandas Dataframe` with counts for occurrences of all
protection sequences in data passed.
Arguments:
data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function.
Keyword arguments:
save_to_file (String): file path and name to save model data under, no saving
occurs if empty string. Defaults to "".
Returns:
pandas DataFrame with Series as described above.
"""
return getProtections(dat, save_to_file=save_to_file)
[docs]
def populationsPlot(
self, file_name, data, compartment='Infected',
hosts=True, vectors=False, num_top_populations=7,
track_specific_populations=[], save_data_to_file="",
x_label='Time', y_label='Hosts', figsize=(8, 4), dpi=200,
palette=CB_PALETTE, stacked=False):
"""Create plot with aggregated totals per population across time.
Creates a line or stacked line plot with dynamics of a compartment
across populations in the model, with one line for each population.
A host or vector is considered part of the recovered compartment
if it has protection sequences of any kind and is not infected.
Arguments:
file_name (String): file path, name, and extension to save plot under.
data (pandas DataFrame): dataframe with model history as produced by saveToDf function.
Keyword arguments:
compartment (String): subset of hosts/vectors to count totals of, can be either
'Naive','Infected','Recovered', or 'Dead'. Defaults to 'Infected'.
hosts (Boolean): whether to count hosts. Defaults to True.
vectors (Boolean) whether to count vectors. Defaults to False.
num_top_populations (int): how many populations to count separately and
include as columns, remainder will be counted under column "Other";
if <0, includes all populations in model. Defaults to 7.
track_specific_populations (list of Strings): contains IDs of specific populations to
have as a separate column if not part of the top num_top_populations
populations. Defaults to [].
save_data_to_file (String): file path and name to save model plot data under,
no saving occurs if empty string. Defaults to "".
x_label (String): X axis title. Defaults to 'Time'.
y_label (String): Y axis title. Defaults to 'Hosts'.
legend_title (String): legend title. Defaults to 'Population'.
legend_values (list of Strings): labels for each trace, if empty list, uses population
IDs. Defaults to [].
figsize (array-like of two ints): dimensions of figure. Defaults to (8,4).
dpi (int): figure resolution. Defaults to 200.
palette (list of color Strings): color palette to use for traces. Defaults to `CB_PALETTE`.
stacked (Boolean): whether to draw a regular line plot instead of a stacked one. Defaults to False.
Returns:
`axis object` for plot with model population dynamics as described above.
"""
return populationsPlot(
file_name, data, compartment=compartment, hosts=hosts,
vectors=vectors, num_top_populations=num_top_populations,
track_specific_populations=track_specific_populations,
save_data_to_file=save_data_to_file,
x_label=x_label, y_label=y_label, figsize=figsize, dpi=dpi,
palette=palette, stacked=stacked
)
[docs]
def compartmentPlot(
self, file_name, data, populations=[], hosts=True, vectors=False,
save_data_to_file="", x_label='Time', y_label='Hosts',
figsize=(8, 4), dpi=200, palette=CB_PALETTE, stacked=False):
"""Create plot with number of naive,inf,rec,dead hosts/vectors vs. time.
Creates a line or stacked line plot with dynamics of all compartments
(naive, infected, recovered, dead) across selected populations in the
model, with one line for each compartment.
A host or vector is considered part of the recovered compartment
if it has protection sequences of any kind and is not infected.
Arguments:
file_name (String): file path, name, and extension to save plot under.
data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function.
Keyword arguments:
populations (list of Strings): IDs of populations to include in analysis; if empty, uses
all populations in model. Defaults to [].
hosts (Boolean): whether to count hosts. Defaults to True.
vectors (Boolean): whether to count vectors. Defaults to False.
save_data_to_file (String): file path and name to save model data under, no
saving occurs if empty string. Defaults to "".
x_label (String): X axis title. Defaults to 'Time'.
y_label (String): Y axis title. Defaults to 'Hosts'.
legend_title (String): legend title. Defaults to 'Population'.
legend_values (list of Strings): labels for each trace, if empty list, uses population
IDs. Defaults to [].
figsize (array-like of two ints): dimensions of figure. Defaults to (8,4).
dpi (int)): figure resolution. Defaults to 200.
palette (list of color Strings): color palette to use for traces. Defaults to `CB_PALETTE`.
stacked -- whether to draw a regular line plot instead of a stacked one
(default False, Boolean)
Returns:
axis object for plot with model compartment dynamics as described above
"""
return compartmentPlot(
file_name, data, populations=populations, hosts=hosts,
vectors=vectors, save_data_to_file=save_data_to_file,
x_label=x_label, y_label=y_label, figsize=figsize, dpi=dpi,
palette=palette, stacked=stacked
)
[docs]
def compositionPlot(
self, file_name, data, composition_dataframe=None, populations=[],
type_of_composition='Pathogens', hosts=True, vectors=False,
num_top_sequences=7, track_specific_sequences=[],
save_data_to_file="", x_label='Time', y_label='Infections',
figsize=(8, 4), dpi=200, palette=CB_PALETTE, stacked=True,
remove_legend=False, genomic_positions=[],population_fraction=False,
count_individuals_based_on_model=None,
legend_title='Genotype', legend_values=[], **kwargs):
"""Create plot with counts for pathogen genomes or resistance vs. time.
Creates a line or stacked line plot with dynamics of the pathogen
strains or protection sequences across selected populations in the
model, with one line for each pathogen genome or protection sequence
being shown.
Of note: sum of totals for all sequences in one time point does not
necessarily equal the number of infected hosts and/or vectors, given
multiple infections in the same host/vector are counted separately.
Arguments:
file_name (String): file path, name, and extension to save plot under.
data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function.
Keyword arguments:
composition_dataframe (pandas DataFrame): output of compositionDf() if already computed
Defaults to None.
populations (list of Strings): IDs of populations to include in analysis; if empty, uses
all populations in model. Defaults to [].
type_of_composition (String) field of data to count totals of, can be either
'Pathogens' or 'Protection'. Defaults to 'Pathogens'.
hosts (Boolean) whether to count hosts. Defaults to True.
vectors (Boolean): whether to count vectors. Defaults to False.
num_top_sequences (int): how many sequences to count separately and include
as columns, remainder will be counted under column "Other"; if <0,
includes all genomes in model. Defaults to 7.
track_specific_sequences (list of Strings): contains specific sequences to have
as a separate column if not part of the top num_top_sequences
sequences. Defaults to [].
genomic_positions (list of lists of int): list in which each element is a list with loci
positions to extract (e.g. genomic_positions=[ [0,3], [5,6] ]
extracts positions 0, 1, 2, and 5 from each genome); if empty, takes
full genomes. Defaults to [].
count_individuals_based_on_model (None or Model): `Model` object with populations and
fitness functions used to evaluate the most fit pathogen genome in
each host/vector in order to count only a single pathogen per
host/vector, as opposed to all pathogens within each host/vector; if
None, counts all pathogens. Defaults to None.
save_data_to_file (String): file path and name to save model data under, no
saving occurs if empty string. Defaults to "".
x_label (String): X axis title. Defaults to 'Time'.
y_label (String): Y axis title. Defaults to 'Hosts'.
legend_title (String): legend title. Defaults to 'Population'.
legend_values (list of Strings): labels for each trace, if empty list, uses population
IDs. Defaults to [].
figsize (array-like of two ints): dimensions of figure. Defaults to (8,4).
dpi (int): figure resolution. Defaults to 200.
palette (list of color Strings): color palette to use for traces. Defaults to `CB_PALETTE`.
stacked (Boolean): whether to draw a regular line plot instead of a stacked one. Defaults to False.
remove_legend (Boolean): whether to print the sequences on the figure legend
instead of printing them on a separate csv file. Defaults to True.
**kwargs: additional arguents for joblib multiprocessing.
Returns:
axis object for plot with model sequence composition dynamics as described.
"""
return compositionPlot(
file_name, data,
composition_dataframe=composition_dataframe,populations=populations,
type_of_composition=type_of_composition, hosts=hosts,
vectors=vectors, num_top_sequences=num_top_sequences,
track_specific_sequences=track_specific_sequences,
save_data_to_file=save_data_to_file,
x_label=x_label, y_label=y_label, figsize=figsize, dpi=dpi,
palette=palette, stacked=stacked, remove_legend=remove_legend,
genomic_positions=genomic_positions,
count_individuals_based_on_model=count_individuals_based_on_model,
population_fraction=population_fraction, legend_title=legend_title,
legend_values=legend_values,
**kwargs
)
[docs]
def clustermap(
self,
file_name, data, num_top_sequences=-1, track_specific_sequences=[],
seq_names=[], n_cores=0, method='weighted', metric='euclidean',
save_data_to_file="", legend_title='Distance', legend_values=[],
figsize=(10,10), dpi=200, color_map=DEF_CMAP):
"""Create a heatmap and dendrogram for pathogen genomes in data passed.
Arguments:
file_name (String): file path, name, and extension to save plot under.
data (pandas DataFrame): dataframe with model history as produced by `saveToDf`
function.
Keyword arguments:
num_top_sequences (int): how many sequences to include in matrix; if <0,
includes all genomes in data passed. Defaults to -1.
track_specific_sequences (list of Strings): contains specific sequences to include
in matrix if not part of the top num_top_sequences sequences. Defaults to [].
seq_names (list ofStrings): list with names to be used for sequence labels in matrix
must be of same length as number of sequences to be displayed; if empty, uses sequences
themselves. Defaults to [].
n_cores (int >= 0): number of cores to parallelize distance compute across, if 0,
all cores available are used. Defaults to 0.
method (String): clustering algorithm to use with seaborn clustermap. Defaults to 'weighted'.
metric (String): distance metric to use with seaborn clustermap. Defaults to 'euclidean'.
save_data_to_file (String): file path and name to save model data under, no
saving occurs if empty string. Defaults to "".
legend_title (String): legend title. Defaults to 'Distance'.
figsize (array-like of two ints): dimensions of figure. Defaults to (8,4).
dpi (int): figure resolution. Defaults to 200.
color_map (matplotlib cmap object): color map to use for traces. Defaults to `DEF_CMAP`.
Returns:
figure object for plot with heatmap and dendrogram as described.
"""
return clustermap(
file_name, data, num_top_sequences=num_top_sequences,
track_specific_sequences=track_specific_sequences,
seq_names=seq_names, n_cores=n_cores, method=method,
metric=metric, save_data_to_file=save_data_to_file,
legend_title=legend_title, legend_values=legend_values,
figsize=figsize, dpi=dpi, color_map=color_map
)
[docs]
def pathogenDistanceHistory(
self,
data, samples=-1, num_top_sequences=-1, track_specific_sequences=[],
seq_names=[], n_cores=0, save_to_file=''):
"""Create DataFrame with pairwise Hamming distances for pathogen
sequences in data.
DataFrame has indexes and columns named according to genomes or argument
seq_names, if passed. Distance is measured as percent Hamming distance
from an optimal genome sequence.
Arguments:
data (pandas DataFrame): dataframe with model history as produced by `saveToDf`
function.
Keyword arguments:
samples (int): how many timepoints to uniformly sample from the total
timecourse; if <0, takes all timepoints. Defaults to 1.
num_top_sequences (int) how many sequences to include in matrix; if <0,
includes all genomes in data passed. Defaults to -1.
track_specific_sequences (list of Strings): contains specific sequences to include in
matrix if not part of the top num_top_sequences sequences. Defaults to [].
seq_names (list of Strings): list with names to be used for sequence labels in matrix
must be of same length as number of sequences to be displayed; if
empty, uses sequences themselves. Defaults to [].
n_cores (int >= 0): number of cores to parallelize distance compute across, if 0,
all cores available are used. Defaults to 0.
save_to_file (String): file path and name to save model data under, no saving
occurs if empty string. Defaults to "".
Returns:
pandas DataFrame with distance matrix as described above.
"""
return getPathogenDistanceHistoryDf(data,
samples=samples, num_top_sequences=num_top_sequences,
track_specific_sequences=track_specific_sequences,
seq_names=seq_names, n_cores=n_cores, save_to_file=save_to_file)
[docs]
def getGenomeTimes(
self,
data, samples=-1, num_top_sequences=-1, track_specific_sequences=[],
seq_names=[], n_cores=0, save_to_file=''):
"""Create DataFrame with times genomes first appeared during simulation.
Arguments:
data (pandas DataFrame): dataframe with model history as produced by `saveToDf`
function.
Keyword arguments:
samples (int): how many timepoints to uniformly sample from the total
timecourse; if <0, takes all timepoints. Defaults to 1.
save_to_file (String): file path and name to save model data under, no saving
occurs if empty string. Defaults to "".
n_cores (int): number of cores to parallelize across, if 0, all cores
available are used. Defaults to 0.
Returns:
pandas DataFrame with genomes and times as described above.
"""
return getGenomeTimesDf(data,
samples=samples, num_top_sequences=num_top_sequences,
track_specific_sequences=track_specific_sequences,
seq_names=seq_names, n_cores=n_cores, save_to_file=save_to_file)
[docs]
def getCompositionData(
self, data=None, populations=[], type_of_composition='Pathogens',
hosts=True, vectors=False, num_top_sequences=-1,
track_specific_sequences=[], genomic_positions=[],
count_individuals_based_on_model=None, save_data_to_file="",
n_cores=0, **kwargs):
"""Create dataframe with counts for pathogen genomes or resistance.
Creates a pandas Dataframe with dynamics of the pathogen strains or
protection sequences across selected populations in the model,
with one time point in each row and columns for pathogen genomes or
protection sequences.
Of note: sum of totals for all sequences in one time point does not
necessarily equal the number of infected hosts and/or vectors, given
multiple infections in the same host/vector are counted separately.
Keyword arguments:
data (pandas DataFrame): dataframe with model history as produced by `saveToDf`
function; if None, computes this dataframe and saves it under 'raw_data_'+'save_data_to_file'.
Defaults to None.
populations (list of Strings): IDs of populations to include in analysis; if empty, uses
all populations in model. Defaults to [].
type_of_composition (String): field of data to count totals of, can be either
'Pathogens' or 'Protection'. Defaults to 'Pathogens'.
hosts (Boolean): whether to count hosts. Defaults to True.
vectors (Boolean): whether to count vectors. Defaults to False.
num_top_sequences (int): how many sequences to count separately and include
as columns, remainder will be counted under column "Other"; if <0,
includes all genomes in model. Defaults to -1.
track_specific_sequences (list of Strings): contains specific sequences to have
as a separate column if not part of the top num_top_sequences
sequences. Defaults to [].
genomic_positions (list of lists of int): list in which each element is a list with
loci positions to extract (e.g. genomic_positions=[ [0,3], [5,6] ]
extracts positions 0, 1, 2, and 5 from each genome); if empty, takes
full genomes. Defaults to [].
count_individuals_based_on_model (None or Model object): Model object with populations and
fitness functions used to evaluate the most fit pathogen genome in
each host/vector in order to count only a single pathogen per
host/vector, asopposed to all pathogens within each host/vector; if
None, counts all pathogens. Defaults to None.
save_data_to_file (String): file path and name to save model data under, no
saving occurs if empty string. Defaults to "".
n_cores (int): number of cores to parallelize processing across, if 0, all
cores available are used. Defaults to 0.
**kwargs: additional arguents for joblib multiprocessing.
Returns:
pandas DataFrame with model sequence composition dynamics as described
above.
"""
if data is None:
data = saveToDf(
self.history,'raw_data_'+save_to_file,n_cores,verbose=verbose,
**kwargs
)
return compositionDf(
data, populations=populations,
type_of_composition=type_of_composition,
hosts=hosts, vectors=vectors, num_top_sequences=num_top_sequences,
track_specific_sequences=track_specific_sequences,
genomic_positions=genomic_positions,
count_individuals_based_on_model=count_individuals_based_on_model,
save_to_file=save_data_to_file, n_cores=n_cores, **kwargs
)
### Model interventions: ###
[docs]
def newPopulation(self, id, setup_name, num_hosts=0, num_vectors=0):
"""Create a new Population object with setup parameters.
If population ID is already in use, appends _2 to it
Arguments:
id (String): unique identifier for this population in the model.
setup_name (Setup object): setup object with parameters for this population.
Keyword arguments:
num_hosts (int >= 0): number of hosts to initialize population with. Defaults to 100.
num_vectors (int >= 0): number of vectors to initialize population with. Defaults to 100.
"""
if id in self.populations.keys():
id = id+'_2'
self.populations[id] = Population(
self, id, self.setups[setup_name], num_hosts, num_vectors
)
for p in self.populations:
self.populations[id].setHostMigrationNeighbor(self.populations[p],0)
self.populations[id].setVectorMigrationNeighbor(
self.populations[p],0
)
self.populations[id].setHostHostPopulationContactNeighbor(
self.populations[p],0
)
self.populations[id].setVectorHostPopulationContactNeighbor(
self.populations[p],0
)
self.populations[id].setHostVectorPopulationContactNeighbor(
self.populations[p],0
)
self.populations[p].setHostMigrationNeighbor(
self.populations[id],0
)
self.populations[p].setVectorMigrationNeighbor(
self.populations[id],0
)
self.populations[p].setHostHostPopulationContactNeighbor(
self.populations[id],0
)
self.populations[p].setVectorHostPopulationContactNeighbor(
self.populations[id],0
)
self.populations[p].setHostVectorPopulationContactNeighbor(
self.populations[id],0
)
[docs]
def linkPopulationsHostMigration(self, pop1_id, pop2_id, rate):
"""Set host migration rate from one population towards another.
Arguments:
pop1_id (String): origin population for which migration rate will be specified.
pop1_id (String): destination population for which migration rate will be
specified.
rate (number >= 0): migration rate from one population to the neighbor; evts/time.
"""
self.populations[pop1_id].setHostMigrationNeighbor(
self.populations[pop2_id], rate
)
[docs]
def linkPopulationsVectorMigration(self, pop1_id, pop2_id, rate):
"""Set vector migration rate from one population towards another.
Arguments:
pop1_id (String): origin population for which migration rate will be specified.
pop1_id (String): destination population for which migration rate will be
specified.
rate (number >= 0): migration rate from one population to the neighbor; evts/time.
"""
self.populations[pop1_id].setVectorMigrationNeighbor(
self.populations[pop2_id], rate
)
[docs]
def createInterconnectedPopulations(
self, num_populations, id_prefix, setup_name,
host_migration_rate=0, vector_migration_rate=0,
host_host_contact_rate=0,
host_vector_contact_rate=0, vector_host_contact_rate=0,
num_hosts=100, num_vectors=100):
"""Create new populations, link all of them to each other.
All populations in this cluster are linked with the same migration rate,
starting number of hosts and vectors, and setup parameters. Their IDs
are numbered onto prefix given as 'id_prefix_0', 'id_prefix_1',
'id_prefix_2', etc.
Arguments:
num_populations (int): number of populations to be created.
id_prefix (String): prefix for IDs to be used for this population in the model.
setup_name (Setup object): setup object with parameters for all populations.
Keyword arguments:
host_migration_rate (number >= 0): host migration rate between populations;
evts/time. Defaults to 0.
vector_migration_rate (number >= 0): vector migration rate between populations;
evts/time. Defaults to 0.
host_host_contact_rate (number >= 0): host-host inter-population contact rate
between populations; evts/time. Defaults to 0.
host_vector_contact_rate (number >= 0): host-vector inter-population contact rate
between populations; evts/time. Defaults to 0.
vector_host_contact_rate (number >= 0): vector-host inter-population contact rate
between populations; evts/time. Defaults to 0.
num_hosts (int): number of hosts to initialize population with. Defaults to 100.
num_vectors (int): number of hosts to initialize population with. Defaults to 100.
"""
new_pops = [
Population(
self, str(id_prefix) + str(i), self.setups[setup_name],
num_hosts, num_vectors
) for i in range(num_populations)
]
new_pop_ids = []
for pop in new_pops:
if pop.id in self.populations.keys():
pop.id = pop.id+'_2'
self.populations[pop.id] = pop
new_pop_ids.append(pop.id)
for p in self.populations:
pop.setHostMigrationNeighbor(self.populations[p],0)
pop.setVectorMigrationNeighbor(self.populations[p],0)
pop.setHostHostPopulationContactNeighbor(self.populations[p],0)
pop.setHostVectorPopulationContactNeighbor(self.populations[p],0)
pop.setVectorHostPopulationContactNeighbor(self.populations[p],0)
self.populations[p].setHostMigrationNeighbor(pop,0)
self.populations[p].setVectorMigrationNeighbor(pop,0)
self.populations[p].setHostHostPopulationContactNeighbor(pop,0)
self.populations[p].setHostVectorPopulationContactNeighbor(pop,0)
self.populations[p].setVectorHostPopulationContactNeighbor(pop,0)
for p1_id in new_pop_ids:
for p2_id in new_pop_ids:
self.linkPopulationsHostMigration(
p1_id,p2_id,host_migration_rate
)
self.linkPopulationsVectorMigration(
p1_id,p2_id,vector_migration_rate
)
self.linkPopulationsHostHostContact(
p1_id,p2_id,host_host_contact_rate
)
self.linkPopulationsHostVectorContact(
p1_id,p2_id,host_vector_contact_rate
)
self.linkPopulationsVectorHostContact(
p1_id,p2_id,vector_host_contact_rate
)
[docs]
def newHostGroup(self, pop_id, group_id, hosts=-1, type='any'):
"""Return a list of random hosts in population.
Arguments:
pop_id (String): ID of population to be sampled from.
group_id (String): ID to name group with.
Keyword arguments:
hosts (number): number of hosts to be sampled randomly: if <0, samples from
whole population; if <1, takes that fraction of population; if >=1,
samples that integer number of hosts. Defaults to -1.
type (String = {'healthy', 'infected', 'any'}): whether to sample healthy hosts only,
infected hosts only, or any hosts. Defaults to 'any'.
Returns:
list containing sampled hosts.
"""
self.groups[group_id] = self.populations[pop_id].newHostGroup(
hosts, type
)
[docs]
def newVectorGroup(self, pop_id, group_id, vectors=-1, type='any'):
"""Return a list of random vectors in population.
Arguments:
pop_id (String): ID of population to be sampled from.
group_id (String): ID to name group with.
Keyword arguments:
vectors (number): number of vectors to be sampled randomly: if <0, samples from
whole population; if <1, takes that fraction of population; if >=1,
samples that integer number of vectors. Defaults to -1.
type (String = {'healthy', 'infected', 'any'}): whether to sample healthy vectors only, infected vectors
only, or any vectors. Defaults to 'any'.
Returns:
list containing sampled vectors.
"""
self.groups[group_id] = self.populations[pop_id].newVectorGroup(
vectors, type
)
[docs]
def addHosts(self, pop_id, num_hosts):
"""Add a number of healthy hosts to population, return list with them.
Arguments:
pop_id (String): ID of population to be modified.
num_hosts (int): number of hosts to be added.
Returns:
list containing new hosts.
"""
self.populations[pop_id].addHosts(num_hosts)
[docs]
def addVectors(self, pop_id, num_vectors):
"""Add a number of healthy vectors to population, return list with them.
Arguments:
pop_id (String): ID of population to be modified.
num_vectors (int): number of vectors to be added.
Returns:
list containing new vectors.
"""
self.populations[pop_id].addVectors(num_vectors)
[docs]
def removeHosts(self, pop_id, num_hosts_or_list):
"""Remove a number of specified or random hosts from population.
Arguments:
pop_id (String): ID of population to be modified.
num_hosts_or_list (int or list of Hosts): number of hosts to be sampled randomly for removal
or list of hosts to be removed, must be hosts in this population.
"""
self.populations[pop_id].removeHosts(num_hosts_or_list)
[docs]
def removeVectors(self, pop_id, num_vectors_or_list):
"""Remove a number of specified or random vectors from population.
Arguments:
pop_id (String): ID of population to be modified.
num_vectors_or_list (int or list of Vectors): number of vectors to be sampled randomly for
removal or list of vectors to be removed, must be vectors in this
population.
"""
self.populations[pop_id].removeVectors(num_vectors_or_list)
[docs]
def addPathogensToHosts(self, pop_id, genomes_numbers, group_id=""):
"""Add specified pathogens to random hosts, optionally from a list.
Arguments:
pop_id (String): ID of population to be modified.
genomes_numbers (dict with keys=Strings, values=int) dictionary containing pathogen
genomes to add as keys and number of hosts each one will be added to as values.
Keyword arguments:
group_id (String): ID of specific hosts to sample from, if empty, samples
from whole population. Defaults to "".
"""
if group_id == "":
hosts = self.populations[pop_id].hosts
else:
hosts = self.groups[group_id]
self.populations[pop_id].addPathogensToHosts(genomes_numbers,hosts)
[docs]
def addPathogensToVectors(self, pop_id, genomes_numbers, group_id=""):
"""Add specified pathogens to random vectors, optionally from a list.
Arguments:
pop_id (String): ID of population to be modified.
genomes_numbers (dict with keys=Strings, values=int): dictionary containing pathogen
genomes to add as keys and number of vectors each one will be added to as values.
Keyword arguments:
group_id (String): ID of specific vectors to sample from, if empty, samples
from whole population. Defaults to "".
"""
if group_id == "":
vectors = self.populations[pop_id].vectors
else:
vectors = self.groups[group_id]
self.populations[pop_id].addPathogensToVectors(genomes_numbers,vectors)
[docs]
def treatHosts(self, pop_id, frac_hosts, resistance_seqs, group_id=""):
"""Treat random fraction of infected hosts against some infection.
Removes all infections with genotypes susceptible to given treatment.
Pathogens are removed if they are missing at least one of the sequences
in resistance_seqs from their genome. Removes this organism from
population infected list and adds to healthy list if appropriate.
Arguments:
pop_id (String): ID of population to be modified.
frac_hosts (number between 0 and 1): fraction of hosts considered to be randomly selected.
resistance_seqs (list of Strings): contains sequences required for treatment resistance.
Keyword arguments:
group_id (String): ID of specific hosts to sample from, if empty, samples
from whole population. Defaults to "".
"""
if group_id == "":
hosts = self.populations[pop_id].hosts
else:
hosts = self.groups[group_id]
self.populations[pop_id].treatHosts(frac_hosts,resistance_seqs,hosts)
[docs]
def treatVectors(self, pop_id, frac_vectors, resistance_seqs, group_id=""):
"""Treat random fraction of infected vectors agains some infection.
Removes all infections with genotypes susceptible to given treatment.
Pathogens are removed if they are missing at least one of the sequences
in resistance_seqs from their genome. Removes this organism from
population infected list and adds to healthy list if appropriate.
Arguments:
pop_id (String): ID of population to be modified.
frac_vectors (number between 0 and 1): fraction of vectors considered to be
randomly selected.
resistance_seqs (list of Strings): contains sequences required for treatment resistance.
Keyword arguments:
group_id (String): ID of specific vectors to sample from, if empty, samples
from whole population. Defaults to "".
"""
if group_id == "":
vectors = self.populations[pop_id].vectors
else:
vectors = self.groups[group_id]
self.populations[pop_id].treatVectors(
frac_vectors,resistance_seqs,vectors
)
[docs]
def protectHosts(
self, pop_id, frac_hosts, protection_sequence, group_id=""):
"""Protect a random fraction of infected hosts against some infection.
Adds protection sequence specified to a random fraction of the hosts
specified. Does not cure them if they are already infected.
Arguments:
pop_id (String): ID of population to be modified.
frac_hosts (number between 0 and 1): fraction of hosts considered to be
randomly selected.
protection_sequence (String): sequence against which to protect.
Keyword arguments:
group_id (String): ID of specific hosts to sample from, if empty, samples
from whole population. Defaults to "".
"""
if group_id == "":
hosts = self.populations[pop_id].hosts
else:
hosts = self.groups[group_id]
self.populations[pop_id].protectHosts(
frac_hosts,protection_sequence,hosts
)
[docs]
def protectVectors(
self, pop_id, frac_vectors, protection_sequence, group_id=""):
"""Protect a random fraction of infected vectors against some infection.
Adds protection sequence specified to a random fraction of the vectors
specified. Does not cure them if they are already infected.
Arguments:
pop_id (String): ID of population to be modified.
frac_vectors (number between 0 and 1): fraction of vectors considered to be randomly selected.
protection_sequence (String): sequence against which to protect.
Keyword arguments:
group_id (String): ID of specific vectors to sample from, if empty, samples
from whole population. Defaults to "".
"""
if group_id == "":
vectors = self.populations[pop_id].vectors
else:
vectors = self.groups[group_id]
self.populations[pop_id].protectVectors(
frac_vectors,protection_sequence,vectors
)
[docs]
def wipeProtectionHosts(self, pop_id, group_id=""):
"""Removes all protection sequences from hosts.
Arguments:
pop_id (String): ID of population to be modified.
Keyword arguments:
group_id (String): ID of specific hosts to sample from, if empty, samples from
whole from whole population. Defaults to "".
"""
if group_id == "":
hosts = self.populations[pop_id].hosts
else:
hosts = self.groups[group_id]
self.populations[pop_id].wipeProtectionHosts(hosts)
[docs]
def wipeProtectionVectors(self, pop_id, group_id=""):
"""Removes all protection sequences from vectors.
Arguments:
pop_id (String): ID of population to be modified.
Keyword arguments:
group_id (String): ID of specific vectors to sample from, if empty, samples
from whole population. Defaults to "".
"""
if group_id == "":
vectors = self.populations[pop_id].vectors
else:
vectors = self.groups[group_id]
self.populations[pop_id].wipeProtectionVectors(vectors)
### Modify population parameters: ###
[docs]
def setSetup(self, pop_id, setup_id):
"""Assign parameters stored in Setup object to this population.
Arguments:
pop_id (String): ID of population to be modified.
setup_id (String): ID of setup to be assigned.
"""
self.populations[pop_id].setSetup( self.setups[setup_id] )
### Utility: ###
[docs]
def customModelFunction(self, function):
"""Returns output of given function, passing this model as a parameter.
Arguments:
function (callable): function to be evaluated; must take a Model object as the
only parameter.
Returns:
output of function passed as parameter.
"""
return function(self)
### Preset fitness functions: ###
[docs]
@staticmethod
def peakLandscape(genome, peak_genome, min_value):
"""Return genome phenotype by decreasing with distance from optimal seq.
A purifying selection fitness function based on exponential decay of
fitness as genomes move away from the optimal sequence. Distance is
measured as percent Hamming distance from an optimal genome sequence.
Arguments:
genome (String): the genome to be evaluated.
peak_genome (String): the genome sequence to measure distance against, has
value of 1.
min_value (number 0-1): minimum value at maximum distance from optimal
genome.
Return:
value of genome (number).
"""
distance = td.hamming(genome, peak_genome) / len(genome)
value = np.exp( np.log( min_value ) * distance )
return value
[docs]
@staticmethod
def valleyLandscape(genome, valley_genome, min_value):
"""Return genome phenotype by increasing with distance from worst seq.
A disruptive selection fitness function based on exponential decay of
fitness as genomes move closer to the worst possible sequence. Distance
is measured as percent Hamming distance from the worst possible genome
sequence.
Arguments:
genome (String): the genome to be evaluated.
valley_genome (String): the genome sequence to measure distance against, has
value of min_value.
min_value (number 0-1): fitness value of worst possible genome.
Return:
value of genome (number).
"""
distance = td.hamming(genome, valley_genome) / len(genome)
value = np.exp( np.log( min_value ) * ( 1 - distance ) )
return value