Source code for opqua.model


"""Contains class Model; main class user interacts with."""

import numpy as np
import pandas as pd
import textdistance as td
import itertools as it
import copy as cp
import seaborn as sns
import joblib as jl

from opqua.internal.host import Host
from opqua.internal.vector import Vector
from opqua.internal.population import Population
from opqua.internal.setup import Setup
from opqua.internal.intervention import Intervention
from opqua.internal.gillespie import Gillespie
from opqua.internal.data import saveToDf, getPathogens, getProtections, \
    getPathogenDistanceHistoryDf
from opqua.internal.plot import populationsPlot, compartmentPlot, \
    compositionPlot, clustermap

[docs] class Model(object): """Class defines a Model. This is the main class that the user interacts with. The Model class contains populations, setups, and interventions to be used in simulation. Also contains groups of hosts/vectors for manipulations and stores model history as snapshots for each time point. **CONSTANTS:** - `CB_PALETTE`: a colorblind-friendly 8-color color scheme. - `DEF_CMAP`: a colormap object for Seaborn plots. Attributes: populations: dictionary with keys=population IDs, values=Population objects. setups: dictionary with keys=setup IDs, values=Setup objects. interventions: contains model interventions in the order they will occur. groups: dictionary with keys=group IDs, values=lists of hosts/vectors. history: dictionary with keys=time values, values=Model objects that are snapshots of Model at that timepoint. t_var: variable that tracks time in simulations. """ ### CONSTANTS ### ### Color scheme constants ### CB_PALETTE = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999"] # www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette # http://jfly.iam.u-tokyo.ac.jp/color/ DEF_CMAP = sns.cubehelix_palette( start=.5, rot=-.75, as_cmap=True, reverse=True ) ### CLASS CONSTRUCTOR ### def __init__(self): """Create a new Model object.""" super(Model, self).__init__() self.populations = {} # dictionary with keys=population IDs, values=Population objects self.setups = {} # dictionary with keys=setup IDs, values=Setup objects self.interventions = [] # contains model interventions in the order they will occur self.groups = {} # dictionary with keys=group IDs, values=lists of hosts/vectors self.history = {} # dictionary with keys=time values, values=Model objects that are # snapshots of Model at that timepoint self.global_trackers = { # dictionary keeping track of some global indicators over all # the course of the simulation 'num_events' : { id:0 for id in Gillespie.EVENT_IDS.values() }, # tracks the number of each kind of event in the simulation 'last_event_time' : 0, # time point at which the last event in the simulation happened 'genomes_seen' : [], # list of all unique genomes that have appeared in the # simulation 'custom_conditions' : {} # dictionary with keys=ID of custom condition, values=lists of # times; every time True is returned by a function in # custom_condition_trackers, the simulation time will be stored # under the corresponding ID inside # global_trackers['custom_condition'] } self.custom_condition_trackers = {} # dictionary with keys=ID of custom condition, values=functions that # take a Model object as argument and return True or False; every # time True is returned by a function in custom_condition_trackers, # the simulation time will be stored under the corresponding ID # inside global_trackers['custom_condition'] self.t_var = 0 # used as time variable during simulation ### MODEL METHODS ### ### Model initialization and simulation: ###
[docs] def setRandomSeed(self, seed): """Set random seed for numpy random number generator. Arguments: seed (int): int for the random seed to be passed to numpy. """ np.random.seed(seed)
[docs] def newSetup( self, name, preset=None, num_loci=None, possible_alleles=None, fitnessHost=None, contactHost=None, receiveContactHost=None, mortalityHost=None, natalityHost=None, recoveryHost=None, migrationHost=None, populationContactHost=None, receivePopulationContactHost=None, mutationHost=None, recombinationHost=None, fitnessVector=None, contactVector=None, receiveContactVector=None, mortalityVector=None, natalityVector=None, recoveryVector=None, migrationVector=None, populationContactVector=None, receivePopulationContactVector=None, mutationVector=None, recombinationVector=None, contact_rate_host_vector=None, transmission_efficiency_host_vector=None, transmission_efficiency_vector_host=None, contact_rate_host_host=None, transmission_efficiency_host_host=None, mean_inoculum_host=None, mean_inoculum_vector=None, recovery_rate_host=None, recovery_rate_vector=None, mortality_rate_host=None, mortality_rate_vector=None, recombine_in_host=None, recombine_in_vector=None, num_crossover_host=None, num_crossover_vector=None, mutate_in_host=None, mutate_in_vector=None, death_rate_host=None, death_rate_vector=None, birth_rate_host=None, birth_rate_vector=None, vertical_transmission_host=None, vertical_transmission_vector=None, inherit_protection_host=None, inherit_protection_vector=None, protection_upon_recovery_host=None, protection_upon_recovery_vector=None): """Create a new `Setup`, save it in setups dict under given name. Two preset setups exist: "vector-borne" and "host-host". You may select one of the preset setups with the preset keyword argument and then modify individual parameters with additional keyword arguments, without having to specify all of them. **"host-host":** - `num_loci` = 10 - `possible_alleles` = 'ATCG' - `fitnessHost` = (lambda g: 1) - `contactHost` = (lambda g: 1) - `receiveContactHost` = (lambda g: 1) - `mortalityHost` = (lambda g: 1) - `natalityHost` = (lambda g: 1) - `recoveryHost` = (lambda g: 1) - `migrationHost` = (lambda g: 1) - `populationContactHost` = (lambda g: 1) - `receivePopulationContactHost` = (lambda g: 1) - `mutationHost` = (lambda g: 1) - `recombinationHost` = (lambda g: 1) - `fitnessVector` = (lambda g: 1) - `contactVector` = (lambda g: 1) - `receiveContactVector` = (lambda g: 1) - `mortalityVector` = (lambda g: 1) - `natalityVector` = (lambda g: 1) - `recoveryVector` = (lambda g: 1) - `migrationVector` = (lambda g: 1) - `populationContactVector` = (lambda g: 1) - `receivePopulationContactVector` = (lambda g: 1) - `mutationVector` = (lambda g: 1) - `recombinationVector` = (lambda g: 1) - `contact_rate_host_vector` = 0 - `transmission_efficiency_host_vector` = 0 - `transmission_efficiency_vector_host` = 0 - `contact_rate_host_host` = 2e-1 - `transmission_efficiency_host_host` = 1 - `mean_inoculum_host` = 1e1 - `mean_inoculum_vector` = 0 - `recovery_rate_host` = 1e-1 - `recovery_rate_vector` = 0 - `mortality_rate_host` = 0 - `mortality_rate_vector` = 0 - `recombine_in_host` = 1e-4 - `recombine_in_vector` = 0 - `num_crossover_host` = 1 - `num_crossover_vector` = 0 - `mutate_in_host` = 1e-6 - `mutate_in_vector` = 0 - `death_rate_host` = 0 - `death_rate_vector` = 0 - `birth_rate_host` = 0 - `birth_rate_vector` = 0 - `vertical_transmission_host` = 0 - `vertical_transmission_vector` = 0 - `inherit_protection_host` = 0 - `inherit_protection_vector` = 0 - `protection_upon_recovery_host` = None - `protection_upon_recovery_vector` = None **"vector-borne":** - `num_loci` = 10 - `possible_alleles` = 'ATCG' - `fitnessHost` = (lambda g: 1) - `contactHost` = (lambda g: 1) - `receiveContactHost` = (lambda g: 1) - `mortalityHost` = (lambda g: 1) - `natalityHost` = (lambda g: 1) - `recoveryHost` = (lambda g: 1) - `migrationHost` = (lambda g: 1) - `populationContactHost` = (lambda g: 1) - `receivePopulationContactHost` = (lambda g: 1) - `mutationHost` = (lambda g: 1) - `recombinationHost` = (lambda g: 1) - `fitnessVector` = (lambda g: 1) - `contactVector` = (lambda g: 1) - `receiveContactVector` = (lambda g: 1) - `mortalityVector` = (lambda g: 1) - `natalityVector` = (lambda g: 1) - `recoveryVector` = (lambda g: 1) - `migrationVector` = (lambda g: 1) - `populationContactVector` = (lambda g: 1) - `receivePopulationContactVector` = (lambda g: 1) - `mutationVector` = (lambda g: 1) - `recombinationVector` = (lambda g: 1) - `contact_rate_host_vector` = 2e-1 - `transmission_efficiency_host_vector` = 1 - `transmission_efficiency_vector_host` = 1 - `contact_rate_host_host` = 0 - `transmission_efficiency_host_host` = 0 - `mean_inoculum_host` = 1e2 - `mean_inoculum_vector` = 1e0 - `recovery_rate_host` = 1e-1 - `recovery_rate_vector` = 1e-1 - `mortality_rate_host` = 0 - `mortality_rate_vector` = 0 - `recombine_in_host` = 0 - `recombine_in_vector` = 1e-4 - `num_crossover_host` = 0 - `num_crossover_vector` = 1 - `mutate_in_host` = 1e-6 - `mutate_in_vector` = 0 - `death_rate_host` = 0 - `death_rate_vector` = 0 - `birth_rate_host` = 0 - `birth_rate_vector` = 0 - `vertical_transmission_host` = 0 - `vertical_transmission_vector` = 0 - `inherit_protection_host` = 0 - `inherit_protection_vector` = 0 - `protection_upon_recovery_host` = None - `protection_upon_recovery_vector` = None Arguments: name (String): name of setup to be used as a key in model setups dictionary. Keyword arguments: preset (None or String): preset setup to be used: "vector-borne" or "host-host", if None, must define all other keyword arguments. Defaults to None. num_loci (int>0): length of each pathogen genome string. possible_alleles (String or list of Strings with num_loci elements): set of possible characters in all genome string, or at each position in genome string. fitnessHost (callable, takes a String argument and returns a number >= 0): function that evaluates relative fitness in head-to-head competition for different genomes within the same host. contactHost (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying probability of a given host being chosen to be the infector in a contact event, based on genome sequence of pathogen. receiveContactHost (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying probability of a given host being chosen to be the infected in a contact event, based on genome sequence of pathogen. mortalityHost (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying death rate for a given host, based on genome sequence of pathogen. natalityHost (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying birth rate for a given host, based on genome sequence of pathogen. recoveryHost (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying recovery rate for a given host based on genome sequence of pathogen. migrationHost (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying migration rate for a given host based on genome sequence of pathogen. populationContactHost (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying population contact rate for a given host based on genome sequence of pathogen. mutationHost (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying mutation rate for a given host based on genome sequence of pathogen. recombinationHost (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying recombination rate for a given host based on genome sequence of pathogen. fitnessVector (callable, takes a String argument and returns a number >=0): function that evaluates relative fitness in head-to-head competition for different genomes within the same vector. contactVector (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying probability of a given vector being chosen to be the infector in a contact event, based on genome sequence of pathogen. receiveContactVector (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying probability of a given vector being chosen to be the infected in a contact event, based on genome sequence of pathogen. mortalityVector (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying death rate for a given vector, based on genome sequence of pathogen. natalityVector (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying birth rate for a given vector, based on genome sequence of pathogen. recoveryVector (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying recovery rate for a given vector based on genome sequence of pathogen. migrationVector (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying migration rate for a given vector based on genome sequence of pathogen. populationContactVector (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying population contact rate for a given vector based on genome sequence of pathogen. mutationVector (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying mutation rate for a given vector based on genome sequence of pathogen. recombinationVector (callable, takes a String argument and returns a number 0-1): function that returns coefficient modifying recombination rate for a given vector based on genome sequence of pathogen. contact_rate_host_vector (number >= 0): rate of host-vector contact events, not necessarily transmission, assumes constant population density; evts/time. transmission_efficiency_host_vector (float): fraction of host-vector contacts that result in successful transmission. transmission_efficiency_vector_host (float): fraction of vector-host contacts that result in successful transmission. contact_rate_host_host (number >= 0): rate of host-host contact events, not necessarily transmission, assumes constant population density; evts/time. transmission_efficiency_host_host (float): fraction of host-host contacts that result in successful transmission. mean_inoculum_host (int >= 0): mean number of pathogens that are transmitted from a vector or host into a new host during a contact event. mean_inoculum_vector (int >= 0) mean number of pathogens that are transmitted from a host to a vector during a contact event. recovery_rate_host (number >= 0): rate at which hosts clear all pathogens; 1/time. recovery_rate_vector (number >= 0): rate at which vectors clear all pathogens 1/time. recovery_rate_vector (number >= 0): rate at which vectors clear all pathogens 1/time. mortality_rate_host (number 0-1): rate at which infected hosts die from disease. mortality_rate_vector (number 0-1): rate at which infected vectors die from disease. recombine_in_host (number >= 0): rate at which recombination occurs in host; evts/time. recombine_in_vector (number >= 0): rate at which recombination occurs in vector; evts/time. num_crossover_host (number >= 0): mean of a Poisson distribution modeling the number of crossover events of host recombination events. num_crossover_vector (number >= 0): mean of a Poisson distribution modeling the number of crossover events of vector recombination events. mutate_in_host (number >= 0): rate at which mutation occurs in host; evts/time. mutate_in_vector (number >= 0): rate at which mutation occurs in vector; evts/time. death_rate_host (number >= 0): natural host death rate; 1/time. death_rate_vector (number >= 0): natural vector death rate; 1/time. birth_rate_host (number >= 0): infected host birth rate; 1/time. birth_rate_vector (number >= 0): infected vector birth rate; 1/time. vertical_transmission_host (number 0-1): probability that a host is infected by its parent at birth. vertical_transmission_vector (number 0-1): probability that a vector is infected by its parent at birth. inherit_protection_host (number 0-1): probability that a host inherits all protection sequences from its parent. inherit_protection_vector (number 0-1): probability that a vector inherits all protection sequences from its parent. protection_upon_recovery_host (None or array-like of length 2 with int 0-num_loci): defines indexes in genome string that define substring to be added to host protection sequences after recovery. protection_upon_recovery_vector (None or array-like of length 2 with int 0-num_loci): defines indexes in genome string that define substring to be added to vector protection sequences after recovery. """ if preset == "vector-borne": num_loci = 10 if num_loci is None else num_loci possible_alleles = \ 'ATCG' if possible_alleles is None else possible_alleles fitnessHost = (lambda g: 1) if fitnessHost is None else fitnessHost contactHost = (lambda g: 1) if contactHost is None else contactHost receiveContactHost = \ (lambda g: 1) if receiveContactHost is None else receiveContactHost mortalityHost = \ (lambda g: 1) if mortalityHost is None else mortalityHost natalityHost = \ (lambda g: 1) if natalityHost is None else natalityHost recoveryHost = \ (lambda g: 1) if recoveryHost is None else recoveryHost migrationHost = \ (lambda g: 1) if migrationHost is None else migrationHost populationContactHost = \ (lambda g: 1) if populationContactHost is None else populationContactHost receivePopulationContactHost = \ (lambda g: 1) if receivePopulationContactHost is None else receivePopulationContactHost mutationHost = \ (lambda g: 1) if mutationHost is None else mutationHost recombinationHost = \ (lambda g: 1) if recombinationHost is None else recombinationHost fitnessVector = \ (lambda g: 1) if fitnessVector is None else fitnessVector contactVector = \ (lambda g: 1) if contactVector is None else contactVector receiveContactVector = \ (lambda g: 1) if receiveContactVector is None else receiveContactVector mortalityVector = \ (lambda g: 1) if mortalityVector is None else mortalityVector natalityVector = \ (lambda g: 1) if natalityVector is None else natalityVector recoveryVector = \ (lambda g: 1) if recoveryVector is None else recoveryVector migrationVector = \ (lambda g: 1) if migrationVector is None else migrationVector populationContactVector = \ (lambda g: 1) if populationContactVector is None else populationContactVector receivePopulationContactVector = \ (lambda g: 1) if receivePopulationContactVector is None else receivePopulationContactVector mutationVector = \ (lambda g: 1) if mutationVector is None else mutationVector recombinationVector = \ (lambda g: 1) if recombinationVector is None else recombinationVector contact_rate_host_vector = \ 2e-1 if contact_rate_host_vector is None \ else contact_rate_host_vector transmission_efficiency_host_vector = \ 1 if transmission_efficiency_host_vector is None \ else transmission_efficiency_host_vector transmission_efficiency_vector_host = \ 1 if transmission_efficiency_vector_host is None \ else transmission_efficiency_vector_host contact_rate_host_host = \ 0 if contact_rate_host_host is None else contact_rate_host_host transmission_efficiency_host_host = \ 0 if transmission_efficiency_host_host is None \ else transmission_efficiency_host_host mean_inoculum_host = \ 1e2 if mean_inoculum_host is None else mean_inoculum_host mean_inoculum_vector = \ 1 if mean_inoculum_vector is None else mean_inoculum_vector recovery_rate_host = \ 1e-1 if recovery_rate_host is None else recovery_rate_host recovery_rate_vector = \ 1e-1 if recovery_rate_vector is None else recovery_rate_vector mortality_rate_host = \ 0 if mortality_rate_host is None else mortality_rate_host mortality_rate_vector = \ 0 if mortality_rate_vector is None else mortality_rate_vector recombine_in_host = \ 0 if recombine_in_host is None else recombine_in_host recombine_in_vector = \ 1e-4 if recombine_in_vector is None else recombine_in_vector num_crossover_host = 0 \ if num_crossover_host is None else num_crossover_host num_crossover_vector = \ 1 if num_crossover_vector is None else num_crossover_vector mutate_in_host = 1e-6 if mutate_in_host is None else mutate_in_host mutate_in_vector = \ 0 if mutate_in_vector is None else mutate_in_vector death_rate_host = 0 if death_rate_host is None else death_rate_host death_rate_vector = \ 0 if death_rate_vector is None else death_rate_vector birth_rate_host = 0 if birth_rate_host is None else birth_rate_host birth_rate_vector = \ 0 if birth_rate_vector is None else birth_rate_vector vertical_transmission_host = \ 0 if vertical_transmission_host is None \ else vertical_transmission_host vertical_transmission_vector = \ 0 if vertical_transmission_vector is None \ else vertical_transmission_vector inherit_protection_host = \ 0 if inherit_protection_host is None \ else inherit_protection_host inherit_protection_vector = \ 0 if inherit_protection_vector is None \ else inherit_protection_vector protection_upon_recovery_host = protection_upon_recovery_host protection_upon_recovery_vector = protection_upon_recovery_vector elif preset == "host-host": num_loci = 10 if num_loci is None else num_loci possible_alleles = \ 'ATCG' if possible_alleles is None else possible_alleles fitnessHost = (lambda g: 1) if fitnessHost is None else fitnessHost contactHost = (lambda g: 1) if contactHost is None else contactHost receiveContactHost = \ (lambda g: 1) if receiveContactHost is None else receiveContactHost mortalityHost = \ (lambda g: 1) if mortalityHost is None else mortalityHost natalityHost = \ (lambda g: 1) if natalityHost is None else natalityHost recoveryHost = \ (lambda g: 1) if recoveryHost is None else recoveryHost migrationHost = \ (lambda g: 1) if migrationHost is None else migrationHost populationContactHost = \ (lambda g: 1) if populationContactHost is None else populationContactHost receivePopulationContactHost = \ (lambda g: 1) if receivePopulationContactHost is None else receivePopulationContactHost mutationHost = \ (lambda g: 1) if mutationHost is None else mutationHost recombinationHost = \ (lambda g: 1) if recombinationHost is None else recombinationHost fitnessVector = \ (lambda g: 1) if fitnessVector is None else fitnessVector contactVector = \ (lambda g: 1) if contactVector is None else contactVector receiveContactVector = \ (lambda g: 1) if receiveContactVector is None else receiveContactVector mortalityVector = \ (lambda g: 1) if mortalityVector is None else mortalityVector natalityVector = \ (lambda g: 1) if natalityVector is None else natalityVector recoveryVector = \ (lambda g: 1) if recoveryVector is None else recoveryVector mortality_rate_host = \ 0 if mortality_rate_host is None else mortality_rate_host mortality_rate_vector = \ 0 if mortality_rate_vector is None else mortality_rate_vector migrationVector = \ (lambda g: 1) if migrationVector is None else migrationVector populationContactVector = \ (lambda g: 1) if populationContactVector is None else populationContactVector receivePopulationContactVector = \ (lambda g: 1) if receivePopulationContactVector is None else receivePopulationContactVector mutationVector = \ (lambda g: 1) if mutationVector is None else mutationVector recombinationVector = \ (lambda g: 1) if recombinationVector is None else recombinationVector contact_rate_host_vector = \ 0 if contact_rate_host_vector is None \ else contact_rate_host_vector transmission_efficiency_host_vector = \ 0 if transmission_efficiency_host_vector is None \ else transmission_efficiency_host_vector transmission_efficiency_vector_host = \ 0 if transmission_efficiency_vector_host is None \ else transmission_efficiency_vector_host contact_rate_host_host = \ 2e-1 if contact_rate_host_host is None \ else contact_rate_host_host transmission_efficiency_host_host = \ 1 if transmission_efficiency_host_host is None \ else transmission_efficiency_host_host mean_inoculum_host = \ 1e1 if mean_inoculum_host is None else mean_inoculum_host mean_inoculum_vector = \ 0 if mean_inoculum_vector is None else mean_inoculum_vector recovery_rate_host = \ 1e-1 if recovery_rate_host is None else recovery_rate_host recovery_rate_vector = \ 0 if recovery_rate_vector is None else recovery_rate_vector recombine_in_host = \ 1e-4 if recombine_in_host is None else recombine_in_host recombine_in_vector = \ 0 if recombine_in_vector is None else recombine_in_vector num_crossover_host = 1 \ if num_crossover_host is None else num_crossover_host num_crossover_vector = \ 0 if num_crossover_vector is None else num_crossover_vector mutate_in_host = \ 1e-6 if mutate_in_host is None else mutate_in_host mutate_in_vector = \ 0 if mutate_in_vector is None else mutate_in_vector death_rate_host = \ 0 if death_rate_host is None else death_rate_host death_rate_vector = \ 0 if death_rate_vector is None else death_rate_vector birth_rate_host = 0 if birth_rate_host is None else birth_rate_host birth_rate_vector = \ 0 if birth_rate_vector is None else birth_rate_vector vertical_transmission_host = \ 0 if vertical_transmission_host is None \ else vertical_transmission_host vertical_transmission_vector = \ 0 if vertical_transmission_vector is None \ else vertical_transmission_vector inherit_protection_host = \ 0 if inherit_protection_host is None \ else inherit_protection_host inherit_protection_vector = \ 0 if inherit_protection_vector is None \ else inherit_protection_vector protection_upon_recovery_host = protection_upon_recovery_host protection_upon_recovery_vector = protection_upon_recovery_vector self.setups[name] = Setup( name, num_loci, possible_alleles, fitnessHost, contactHost, receiveContactHost, mortalityHost, natalityHost, recoveryHost, migrationHost, populationContactHost, receivePopulationContactHost, mutationHost, recombinationHost, fitnessVector, contactVector, receiveContactVector, mortalityVector, natalityVector,recoveryVector, migrationVector, populationContactVector, receivePopulationContactVector, mutationVector, recombinationVector, contact_rate_host_vector, transmission_efficiency_host_vector, transmission_efficiency_vector_host, contact_rate_host_host, transmission_efficiency_host_host, mean_inoculum_host, mean_inoculum_vector, recovery_rate_host, recovery_rate_vector, mortality_rate_host,mortality_rate_vector, recombine_in_host, recombine_in_vector, num_crossover_host, num_crossover_vector, mutate_in_host, mutate_in_vector, death_rate_host,death_rate_vector, birth_rate_host, birth_rate_vector, vertical_transmission_host, vertical_transmission_vector, inherit_protection_host, inherit_protection_vector, protection_upon_recovery_host, protection_upon_recovery_vector )
[docs] def newIntervention(self, time, method_name, args): """Create a new intervention to be carried out at a specific time. Arguments: time (number >= 0): time at which intervention will take place. method_name (String): intervention to be carried out, must correspond to the name of a method of the `Model` object. args (array-like): contains arguments for function in positinal order. """ self.interventions.append( Intervention(time, method_name, args, self) )
[docs] def addCustomConditionTracker(self, condition_id, trackerFunction): """Add a function to track occurrences of custom events in simulation. Adds function `trackerFunction` to dictionary `custom_condition_trackers` under key `condition_id`. Function `trackerFunction` will be executed at every event in the simulation. Every time True is returned, the simulation time will be stored under the corresponding `condition_id` key inside `global_trackers['custom_condition']`. Arguments: condition_id (String): ID of this specific condition- trackerFunction (callable): function that take a `Model` object as argument and returns True or False. """ self.custom_condition_trackers['condition_id'] = trackerFunction self.global_trackers['custom_conditions']['condition_id'] = []
[docs] def run(self,t0,tf,time_sampling=0,host_sampling=0,vector_sampling=0): """Simulate model for a specified time between two time points. Simulates a time series using the Gillespie algorithm. Saves a dictionary containing model state history, with `keys=times` and `values=Model` objects with model snapshot at that time point under this model's history attribute. Arguments: t0 (number >= 0): initial time point to start simulation at. tf (number >= 0): initial time point to end simulation at. Keyword arguments: time_sampling (int): how many events to skip before saving a snapshot of the system state (saves all by default), if <0, saves only final state. Defaults to 0. host_sampling (int >= 0): how many hosts to skip before saving one in a snapshot of the system state (saves all by default). Defaults to 0. vector_sampling (int >= 0): how many vectors to skip before saving one in a snapshot of the system state (saves all by default). Defaults to 0. """ sim = Gillespie(self) self.history = sim.run( t0, tf, time_sampling, host_sampling, vector_sampling )
[docs] def runReplicates( self,t0,tf,replicates,host_sampling=0,vector_sampling=0,n_cores=0, **kwargs): """Simulate replicates of a model, save only end results. Simulates replicates of a time series using the Gillespie algorithm. Saves a dictionary containing model end state state, with `keys=times` and `values=Model` objects with model snapshot. The time is the final timepoint. Arguments: t0 (number >= 0): initial time point to start simulation at. tf (number >= 0): initial time point to end simulation at. replicates (int >= 1): how many replicates to simulate. Keyword arguments: host_sampling (int >= 0): how many hosts to skip before saving one in a snapshot of the system state (saves all by default). Defaults to 0. vector_sampling (int >= 0): how many vectors to skip before saving one in a snapshot of the system state (saves all by default). Defaults to 0. n_cores (int >= 0): number of cores to parallelize file export across, if 0, all cores available are used. Defaults to 0. **kwargs: additional arguents for joblib multiprocessing. Returns: List of `Model` objects with the final snapshots. """ if not n_cores: n_cores = jl.cpu_count() print('Starting parallel simulations...') def run(sim_num): model = self.deepCopy() sim = Gillespie(model) mod = sim.run( t0,tf,time_sampling=-1, host_sampling=host_sampling,vector_sampling=vector_sampling )[tf] mod.history = { tf:mod } return mod return jl.Parallel(n_jobs=n_cores, verbose=10, **kwargs) ( jl.delayed( run ) (_) for _ in range(replicates) )
[docs] def runParamSweep( self,t0,tf,setup_id, param_sweep_dic={}, host_population_size_sweep={}, vector_population_size_sweep={}, host_migration_sweep_dic={}, vector_migration_sweep_dic={}, host_host_population_contact_sweep_dic={}, host_vector_population_contact_sweep_dic={}, vector_host_population_contact_sweep_dic={}, replicates=1,host_sampling=0,vector_sampling=0,n_cores=0, **kwargs): """Simulate a parameter sweep with a model, save only end results. Simulates variations of a time series using the Gillespie algorithm. Saves a dictionary containing model end state state, with `keys=times` and `values=Model` objects with model snapshot. The time is the final timepoint. Arguments: t0 (number >= 0): initial time point to start simulation at. tf (number >= 0): initial time point to end simulation at. setup_id (String): ID of setup to be assigned. Keyword arguments: param_sweep_dic -- dictionary with keys=parameter names (attributes of Setup), values=list of values for parameter (list, class of elements depends on parameter) host_population_size_sweep -- dictionary with keys=population IDs (Strings), values=list of values with host population sizes (must be greater than original size set for each population, list of numbers) vector_population_size_sweep -- dictionary with keys=population IDs (Strings), values=list of values with vector population sizes (must be greater than original size set for each population, list of numbers) host_migration_sweep_dic -- dictionary with keys=population IDs of origin and destination, separated by a colon ';' (Strings), values=list of values (list of numbers) vector_migration_sweep_dic -- dictionary with keys=population IDs of origin and destination, separated by a colon ';' (Strings), values=list of values (list of numbers) host_host_population_contact_sweep_dic -- dictionary with keys=population IDs of origin and destination, separated by a colon ';' (Strings), values=list of values (list of numbers) host_vector_population_contact_sweep_dic -- dictionary with keys=population IDs of origin and destination, separated by a colon ';' (Strings), values=list of values (list of numbers) vector_host_population_contact_sweep_dic -- dictionary with keys=population IDs of origin and destination, separated by a colon ';' (Strings), values=list of values (list of numbers) replicates -- how many replicates to simulate (int >= 1) host_sampling -- how many hosts to skip before saving one in a snapshot of the system state (saves all by default) (int >= 0, default 0) vector_sampling -- how many vectors to skip before saving one in a snapshot of the system state (saves all by default) (int >= 0, default 0) n_cores -- number of cores to parallelize file export across, if 0, all cores available are used (default 0; int >= 0) **kwargs -- additional arguents for joblib multiprocessing Returns: DataFrame with parameter combinations, list of Model objects with the final snapshots. """ if not n_cores: n_cores = jl.cpu_count() for p in host_population_size_sweep: param_sweep_dic['pop_size_host:'+p] = host_population_size_sweep[p] for p in vector_population_size_sweep: param_sweep_dic['pop_size_vector:'+p] = vector_population_size_sweep[p] for p in host_migration_sweep_dic: param_sweep_dic['migrate_host:'+p] = host_migration_sweep_dic[p] for p in vector_migration_sweep_dic: param_sweep_dic['migrate_vector:'+p] = vector_migration_sweep_dic[p] for p in host_host_population_contact_sweep_dic: param_sweep_dic['population_contact_host_host:'+p] \ = host_host_population_contact_sweep_dic[p] for p in host_vector_population_contact_sweep_dic: param_sweep_dic['population_contact_host_vector:'+p] \ = host_vector_population_contact_sweep_dic[p] for p in vector_host_population_contact_sweep_dic: param_sweep_dic['population_contact_vector_host:'+p] \ = vector_host_population_contact_sweep_dic[p] if len(param_sweep_dic) == 0: raise ValueError( 'param_sweep_dic, host_migration_sweep_dic, vector_migration_sweep_dic, host_host_population_contact_sweep_dic, host_vector_population_contact_sweep_dic, and vector_host_population_contact_sweep_dic cannot all be empty in runParamSweep()' ) params = param_sweep_dic.keys() value_lists = [ param_sweep_dic[param] for param in params ] combinations = list( it.product( *value_lists ) ) * replicates param_df = pd.DataFrame(combinations) param_df.columns = params results = {} print('Starting parallel simulations...') def run(param_values): model = self.deepCopy() for i,param_name in enumerate(params): if ':' in param_name: pops = param_name.split(':')[1].split(';') if 'pop_size_host:' in param_name: pop = cp.deepcopy( model.populations[pops[0]] ) add_hosts = param_values[i] - len(pop.hosts) if add_hosts < 0: raise ValueError( 'Value ' + str(param_values[i]) + ' assigned to ' + pops[0] + ' in host_population_size_sweep must be greater or equal to the population\'s original number of hosts.' ) else: pop.addHosts(add_hosts) model.populations[pops[0]] = pop pop.model = model elif 'pop_size_vector:' in param_name: pop = cp.deepcopy( model.populations[pops[0]] ) add_vectors = param_values[i] - len(pop.vectors) if add_vectors < 0: raise ValueError( 'Values ' + str(param_values[i]) + ' assigned to ' + pops[0] + ' in vector_population_size_sweep must be greater or equal to the population\'s original number of vectors.' ) else: pop.addVectors(add_vectors) model.populations[pops[0]] = pop pop.model = model elif 'migrate_host:' in param_name: new_pops = [ cp.deepcopy( model.populations[pops[0]] ), cp.deepcopy( model.populations[pops[1]] ) ] new_pops[0].model = model new_pops[1].model = model model.populations[pops[0]] = new_pops[0] model.populations[pops[1]] = new_pops[1] model.linkPopulationsHostMigration( new_pops[0],new_pops[1],param_values[i] ) elif 'migrate_vector:' in param_name: new_pops = [ cp.deepcopy( model.populations[pops[0]] ), cp.deepcopy( model.populations[pops[1]] ) ] new_pops[0].model = model new_pops[1].model = model model.populations[pops[0]] = new_pops[0] model.populations[pops[1]] = new_pops[1] model.linkPopulationsVectorMigration( pops[0],pops[1],param_values[i] ) elif 'population_contact_host_host:' in param_name: new_pops = [ cp.deepcopy( model.populations[pops[0]] ), cp.deepcopy( model.populations[pops[1]] ) ] new_pops[0].model = model new_pops[1].model = model model.populations[pops[0]] = new_pops[0] model.populations[pops[1]] = new_pops[1] model.linkPopulationsHostHostContact( pops[0],pops[1],param_values[i] ) model.linkPopulationsHostHostContact( pops[1],pops[0],param_values[i] ) elif 'population_contact_host_vector:' in param_name: new_pops = [ cp.deepcopy( model.populations[pops[0]] ), cp.deepcopy( model.populations[pops[1]] ) ] new_pops[0].model = model new_pops[1].model = model model.populations[pops[0]] = new_pops[0] model.populations[pops[1]] = new_pops[1] model.linkPopulationsHostVectorContact( pops[0],pops[1],param_values[i] ) model.linkPopulationsHostVectorContact( pops[1],pops[0],param_values[i] ) elif 'population_contact_vector_host:' in param_name: new_pops = [ cp.deepcopy( model.populations[pops[0]] ), cp.deepcopy( model.populations[pops[1]] ) ] new_pops[0].model = model new_pops[1].model = model model.populations[pops[0]] = new_pops[0] model.populations[pops[1]] = new_pops[1] model.linkPopulationsVectorHostContact( pops[0],pops[1],param_values[i] ) model.linkPopulationsVectorHostContact( pops[1],pops[0],param_values[i] ) else: setattr(model.setups[setup_id],param_name,param_values[i]) for name,pop in model.populations.items(): pop.setSetup( model.setups[pop.setup.id] ) sim = Gillespie(model) mod = sim.run( t0,tf,time_sampling=-1, host_sampling=host_sampling,vector_sampling=vector_sampling )[tf] mod.history = { tf:mod } return mod return ( param_df, jl.Parallel(n_jobs=n_cores, verbose=10, **kwargs) ( jl.delayed( run ) (param_values) for param_values in combinations ) )
[docs] def copyState(self,host_sampling=0,vector_sampling=0): """Returns a slimmed-down representation of the current model state. Keyword arguments: host_sampling (int >= 0): how many hosts to skip before saving one in a snapshot of the system state (saves all by default). Defaults to 0. vector_sampling (int >= 0): how many vectors to skip before saving one in a snapshot of the system state (saves all by default). Defaults to 0. Returns: Model object with current population host and vector lists. """ copy = Model() copy.populations = { id: p.copyState(host_sampling,vector_sampling) for id,p in self.populations.items() } return copy
[docs] def deepCopy(self): """Returns a full copy of the current model with inner references. Returns: Copied Model object. """ model = cp.deepcopy(self) for intervention in model.interventions: intervention.model = model for pop in model.populations: model.populations[pop].model = model for h in model.populations[pop].hosts: h.population = model.populations[pop] for v in model.populations[pop].vectors: v.population = model.populations[pop] return model
### Output and Plots: ###
[docs] def saveToDataFrame(self,save_to_file,n_cores=0,**kwargs): """Save status of model to dataframe, write to file location given. Creates a pandas Dataframe in long format with the given model history, with one host or vector per simulation time in each row, and columns: - Time - simulation time of entry - Population - ID of this host/vector's population - Organism - host/vector - ID - ID of host/vector - Pathogens - all genomes present in this host/vector separated by ';' - Protection - all genomes present in this host/vector separated by ';' - Alive - whether host/vector is alive at this time, True/False Arguments: save_to_file (String): file path and name to save model data under. Keyword arguments: n_cores (int >= 0): number of cores to parallelize file export across, if 0, all cores available are used. Defaults to 0. **kwargs: additional arguents for joblib multiprocessing. Returns: `pandas dataframe` with model history as described above. """ data = saveToDf( self.history,save_to_file,n_cores,**kwargs ) return data
[docs] def getPathogens(self, dat, save_to_file=""): """Create Dataframe with counts for all pathogen genomes in data. Returns sorted pandas Dataframe with counts for occurrences of all pathogen genomes in data passed. Arguments: data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function. Keyword arguments: save_to_file (String): file path and name to save model data under, no saving occurs if empty string. Defaults to "". Returns: `pandas dataframe` with Series as described above. """ return getPathogens(dat, save_to_file=save_to_file)
[docs] def getProtections(self, dat, save_to_file=""): """Create Dataframe with counts for all protection sequences in data. Returns sorted `pandas Dataframe` with counts for occurrences of all protection sequences in data passed. Arguments: data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function. Keyword arguments: save_to_file (String): file path and name to save model data under, no saving occurs if empty string. Defaults to "". Returns: pandas DataFrame with Series as described above. """ return getProtections(dat, save_to_file=save_to_file)
[docs] def populationsPlot( self, file_name, data, compartment='Infected', hosts=True, vectors=False, num_top_populations=7, track_specific_populations=[], save_data_to_file="", x_label='Time', y_label='Hosts', figsize=(8, 4), dpi=200, palette=CB_PALETTE, stacked=False): """Create plot with aggregated totals per population across time. Creates a line or stacked line plot with dynamics of a compartment across populations in the model, with one line for each population. A host or vector is considered part of the recovered compartment if it has protection sequences of any kind and is not infected. Arguments: file_name (String): file path, name, and extension to save plot under. data (pandas DataFrame): dataframe with model history as produced by saveToDf function. Keyword arguments: compartment (String): subset of hosts/vectors to count totals of, can be either 'Naive','Infected','Recovered', or 'Dead'. Defaults to 'Infected'. hosts (Boolean): whether to count hosts. Defaults to True. vectors (Boolean) whether to count vectors. Defaults to False. num_top_populations (int): how many populations to count separately and include as columns, remainder will be counted under column "Other"; if <0, includes all populations in model. Defaults to 7. track_specific_populations (list of Strings): contains IDs of specific populations to have as a separate column if not part of the top num_top_populations populations. Defaults to []. save_data_to_file (String): file path and name to save model plot data under, no saving occurs if empty string. Defaults to "". x_label (String): X axis title. Defaults to 'Time'. y_label (String): Y axis title. Defaults to 'Hosts'. legend_title (String): legend title. Defaults to 'Population'. legend_values (list of Strings): labels for each trace, if empty list, uses population IDs. Defaults to []. figsize (array-like of two ints): dimensions of figure. Defaults to (8,4). dpi (int): figure resolution. Defaults to 200. palette (list of color Strings): color palette to use for traces. Defaults to `CB_PALETTE`. stacked (Boolean): whether to draw a regular line plot instead of a stacked one. Defaults to False. Returns: `axis object` for plot with model population dynamics as described above. """ return populationsPlot( file_name, data, compartment=compartment, hosts=hosts, vectors=vectors, num_top_populations=num_top_populations, track_specific_populations=track_specific_populations, save_data_to_file=save_data_to_file, x_label=x_label, y_label=y_label, figsize=figsize, dpi=dpi, palette=palette, stacked=stacked )
[docs] def compartmentPlot( self, file_name, data, populations=[], hosts=True, vectors=False, save_data_to_file="", x_label='Time', y_label='Hosts', figsize=(8, 4), dpi=200, palette=CB_PALETTE, stacked=False): """Create plot with number of naive,inf,rec,dead hosts/vectors vs. time. Creates a line or stacked line plot with dynamics of all compartments (naive, infected, recovered, dead) across selected populations in the model, with one line for each compartment. A host or vector is considered part of the recovered compartment if it has protection sequences of any kind and is not infected. Arguments: file_name (String): file path, name, and extension to save plot under. data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function. Keyword arguments: populations (list of Strings): IDs of populations to include in analysis; if empty, uses all populations in model. Defaults to []. hosts (Boolean): whether to count hosts. Defaults to True. vectors (Boolean): whether to count vectors. Defaults to False. save_data_to_file (String): file path and name to save model data under, no saving occurs if empty string. Defaults to "". x_label (String): X axis title. Defaults to 'Time'. y_label (String): Y axis title. Defaults to 'Hosts'. legend_title (String): legend title. Defaults to 'Population'. legend_values (list of Strings): labels for each trace, if empty list, uses population IDs. Defaults to []. figsize (array-like of two ints): dimensions of figure. Defaults to (8,4). dpi (int)): figure resolution. Defaults to 200. palette (list of color Strings): color palette to use for traces. Defaults to `CB_PALETTE`. stacked -- whether to draw a regular line plot instead of a stacked one (default False, Boolean) Returns: axis object for plot with model compartment dynamics as described above """ return compartmentPlot( file_name, data, populations=populations, hosts=hosts, vectors=vectors, save_data_to_file=save_data_to_file, x_label=x_label, y_label=y_label, figsize=figsize, dpi=dpi, palette=palette, stacked=stacked )
[docs] def compositionPlot( self, file_name, data, composition_dataframe=None, populations=[], type_of_composition='Pathogens', hosts=True, vectors=False, num_top_sequences=7, track_specific_sequences=[], save_data_to_file="", x_label='Time', y_label='Infections', figsize=(8, 4), dpi=200, palette=CB_PALETTE, stacked=True, remove_legend=False, genomic_positions=[],population_fraction=False, count_individuals_based_on_model=None, legend_title='Genotype', legend_values=[], **kwargs): """Create plot with counts for pathogen genomes or resistance vs. time. Creates a line or stacked line plot with dynamics of the pathogen strains or protection sequences across selected populations in the model, with one line for each pathogen genome or protection sequence being shown. Of note: sum of totals for all sequences in one time point does not necessarily equal the number of infected hosts and/or vectors, given multiple infections in the same host/vector are counted separately. Arguments: file_name (String): file path, name, and extension to save plot under. data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function. Keyword arguments: composition_dataframe (pandas DataFrame): output of compositionDf() if already computed Defaults to None. populations (list of Strings): IDs of populations to include in analysis; if empty, uses all populations in model. Defaults to []. type_of_composition (String) field of data to count totals of, can be either 'Pathogens' or 'Protection'. Defaults to 'Pathogens'. hosts (Boolean) whether to count hosts. Defaults to True. vectors (Boolean): whether to count vectors. Defaults to False. num_top_sequences (int): how many sequences to count separately and include as columns, remainder will be counted under column "Other"; if <0, includes all genomes in model. Defaults to 7. track_specific_sequences (list of Strings): contains specific sequences to have as a separate column if not part of the top num_top_sequences sequences. Defaults to []. genomic_positions (list of lists of int): list in which each element is a list with loci positions to extract (e.g. genomic_positions=[ [0,3], [5,6] ] extracts positions 0, 1, 2, and 5 from each genome); if empty, takes full genomes. Defaults to []. count_individuals_based_on_model (None or Model): `Model` object with populations and fitness functions used to evaluate the most fit pathogen genome in each host/vector in order to count only a single pathogen per host/vector, as opposed to all pathogens within each host/vector; if None, counts all pathogens. Defaults to None. save_data_to_file (String): file path and name to save model data under, no saving occurs if empty string. Defaults to "". x_label (String): X axis title. Defaults to 'Time'. y_label (String): Y axis title. Defaults to 'Hosts'. legend_title (String): legend title. Defaults to 'Population'. legend_values (list of Strings): labels for each trace, if empty list, uses population IDs. Defaults to []. figsize (array-like of two ints): dimensions of figure. Defaults to (8,4). dpi (int): figure resolution. Defaults to 200. palette (list of color Strings): color palette to use for traces. Defaults to `CB_PALETTE`. stacked (Boolean): whether to draw a regular line plot instead of a stacked one. Defaults to False. remove_legend (Boolean): whether to print the sequences on the figure legend instead of printing them on a separate csv file. Defaults to True. **kwargs: additional arguents for joblib multiprocessing. Returns: axis object for plot with model sequence composition dynamics as described. """ return compositionPlot( file_name, data, composition_dataframe=composition_dataframe,populations=populations, type_of_composition=type_of_composition, hosts=hosts, vectors=vectors, num_top_sequences=num_top_sequences, track_specific_sequences=track_specific_sequences, save_data_to_file=save_data_to_file, x_label=x_label, y_label=y_label, figsize=figsize, dpi=dpi, palette=palette, stacked=stacked, remove_legend=remove_legend, genomic_positions=genomic_positions, count_individuals_based_on_model=count_individuals_based_on_model, population_fraction=population_fraction, legend_title=legend_title, legend_values=legend_values, **kwargs )
[docs] def clustermap( self, file_name, data, num_top_sequences=-1, track_specific_sequences=[], seq_names=[], n_cores=0, method='weighted', metric='euclidean', save_data_to_file="", legend_title='Distance', legend_values=[], figsize=(10,10), dpi=200, color_map=DEF_CMAP): """Create a heatmap and dendrogram for pathogen genomes in data passed. Arguments: file_name (String): file path, name, and extension to save plot under. data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function. Keyword arguments: num_top_sequences (int): how many sequences to include in matrix; if <0, includes all genomes in data passed. Defaults to -1. track_specific_sequences (list of Strings): contains specific sequences to include in matrix if not part of the top num_top_sequences sequences. Defaults to []. seq_names (list ofStrings): list with names to be used for sequence labels in matrix must be of same length as number of sequences to be displayed; if empty, uses sequences themselves. Defaults to []. n_cores (int >= 0): number of cores to parallelize distance compute across, if 0, all cores available are used. Defaults to 0. method (String): clustering algorithm to use with seaborn clustermap. Defaults to 'weighted'. metric (String): distance metric to use with seaborn clustermap. Defaults to 'euclidean'. save_data_to_file (String): file path and name to save model data under, no saving occurs if empty string. Defaults to "". legend_title (String): legend title. Defaults to 'Distance'. figsize (array-like of two ints): dimensions of figure. Defaults to (8,4). dpi (int): figure resolution. Defaults to 200. color_map (matplotlib cmap object): color map to use for traces. Defaults to `DEF_CMAP`. Returns: figure object for plot with heatmap and dendrogram as described. """ return clustermap( file_name, data, num_top_sequences=num_top_sequences, track_specific_sequences=track_specific_sequences, seq_names=seq_names, n_cores=n_cores, method=method, metric=metric, save_data_to_file=save_data_to_file, legend_title=legend_title, legend_values=legend_values, figsize=figsize, dpi=dpi, color_map=color_map )
[docs] def pathogenDistanceHistory( self, data, samples=-1, num_top_sequences=-1, track_specific_sequences=[], seq_names=[], n_cores=0, save_to_file=''): """Create DataFrame with pairwise Hamming distances for pathogen sequences in data. DataFrame has indexes and columns named according to genomes or argument seq_names, if passed. Distance is measured as percent Hamming distance from an optimal genome sequence. Arguments: data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function. Keyword arguments: samples (int): how many timepoints to uniformly sample from the total timecourse; if <0, takes all timepoints. Defaults to 1. num_top_sequences (int) how many sequences to include in matrix; if <0, includes all genomes in data passed. Defaults to -1. track_specific_sequences (list of Strings): contains specific sequences to include in matrix if not part of the top num_top_sequences sequences. Defaults to []. seq_names (list of Strings): list with names to be used for sequence labels in matrix must be of same length as number of sequences to be displayed; if empty, uses sequences themselves. Defaults to []. n_cores (int >= 0): number of cores to parallelize distance compute across, if 0, all cores available are used. Defaults to 0. save_to_file (String): file path and name to save model data under, no saving occurs if empty string. Defaults to "". Returns: pandas DataFrame with distance matrix as described above. """ return getPathogenDistanceHistoryDf(data, samples=samples, num_top_sequences=num_top_sequences, track_specific_sequences=track_specific_sequences, seq_names=seq_names, n_cores=n_cores, save_to_file=save_to_file)
[docs] def getGenomeTimes( self, data, samples=-1, num_top_sequences=-1, track_specific_sequences=[], seq_names=[], n_cores=0, save_to_file=''): """Create DataFrame with times genomes first appeared during simulation. Arguments: data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function. Keyword arguments: samples (int): how many timepoints to uniformly sample from the total timecourse; if <0, takes all timepoints. Defaults to 1. save_to_file (String): file path and name to save model data under, no saving occurs if empty string. Defaults to "". n_cores (int): number of cores to parallelize across, if 0, all cores available are used. Defaults to 0. Returns: pandas DataFrame with genomes and times as described above. """ return getGenomeTimesDf(data, samples=samples, num_top_sequences=num_top_sequences, track_specific_sequences=track_specific_sequences, seq_names=seq_names, n_cores=n_cores, save_to_file=save_to_file)
[docs] def getCompositionData( self, data=None, populations=[], type_of_composition='Pathogens', hosts=True, vectors=False, num_top_sequences=-1, track_specific_sequences=[], genomic_positions=[], count_individuals_based_on_model=None, save_data_to_file="", n_cores=0, **kwargs): """Create dataframe with counts for pathogen genomes or resistance. Creates a pandas Dataframe with dynamics of the pathogen strains or protection sequences across selected populations in the model, with one time point in each row and columns for pathogen genomes or protection sequences. Of note: sum of totals for all sequences in one time point does not necessarily equal the number of infected hosts and/or vectors, given multiple infections in the same host/vector are counted separately. Keyword arguments: data (pandas DataFrame): dataframe with model history as produced by `saveToDf` function; if None, computes this dataframe and saves it under 'raw_data_'+'save_data_to_file'. Defaults to None. populations (list of Strings): IDs of populations to include in analysis; if empty, uses all populations in model. Defaults to []. type_of_composition (String): field of data to count totals of, can be either 'Pathogens' or 'Protection'. Defaults to 'Pathogens'. hosts (Boolean): whether to count hosts. Defaults to True. vectors (Boolean): whether to count vectors. Defaults to False. num_top_sequences (int): how many sequences to count separately and include as columns, remainder will be counted under column "Other"; if <0, includes all genomes in model. Defaults to -1. track_specific_sequences (list of Strings): contains specific sequences to have as a separate column if not part of the top num_top_sequences sequences. Defaults to []. genomic_positions (list of lists of int): list in which each element is a list with loci positions to extract (e.g. genomic_positions=[ [0,3], [5,6] ] extracts positions 0, 1, 2, and 5 from each genome); if empty, takes full genomes. Defaults to []. count_individuals_based_on_model (None or Model object): Model object with populations and fitness functions used to evaluate the most fit pathogen genome in each host/vector in order to count only a single pathogen per host/vector, asopposed to all pathogens within each host/vector; if None, counts all pathogens. Defaults to None. save_data_to_file (String): file path and name to save model data under, no saving occurs if empty string. Defaults to "". n_cores (int): number of cores to parallelize processing across, if 0, all cores available are used. Defaults to 0. **kwargs: additional arguents for joblib multiprocessing. Returns: pandas DataFrame with model sequence composition dynamics as described above. """ if data is None: data = saveToDf( self.history,'raw_data_'+save_to_file,n_cores,verbose=verbose, **kwargs ) return compositionDf( data, populations=populations, type_of_composition=type_of_composition, hosts=hosts, vectors=vectors, num_top_sequences=num_top_sequences, track_specific_sequences=track_specific_sequences, genomic_positions=genomic_positions, count_individuals_based_on_model=count_individuals_based_on_model, save_to_file=save_data_to_file, n_cores=n_cores, **kwargs )
### Model interventions: ###
[docs] def newPopulation(self, id, setup_name, num_hosts=0, num_vectors=0): """Create a new Population object with setup parameters. If population ID is already in use, appends _2 to it Arguments: id (String): unique identifier for this population in the model. setup_name (Setup object): setup object with parameters for this population. Keyword arguments: num_hosts (int >= 0): number of hosts to initialize population with. Defaults to 100. num_vectors (int >= 0): number of vectors to initialize population with. Defaults to 100. """ if id in self.populations.keys(): id = id+'_2' self.populations[id] = Population( self, id, self.setups[setup_name], num_hosts, num_vectors ) for p in self.populations: self.populations[id].setHostMigrationNeighbor(self.populations[p],0) self.populations[id].setVectorMigrationNeighbor( self.populations[p],0 ) self.populations[id].setHostHostPopulationContactNeighbor( self.populations[p],0 ) self.populations[id].setVectorHostPopulationContactNeighbor( self.populations[p],0 ) self.populations[id].setHostVectorPopulationContactNeighbor( self.populations[p],0 ) self.populations[p].setHostMigrationNeighbor( self.populations[id],0 ) self.populations[p].setVectorMigrationNeighbor( self.populations[id],0 ) self.populations[p].setHostHostPopulationContactNeighbor( self.populations[id],0 ) self.populations[p].setVectorHostPopulationContactNeighbor( self.populations[id],0 ) self.populations[p].setHostVectorPopulationContactNeighbor( self.populations[id],0 )
[docs] def linkPopulationsHostMigration(self, pop1_id, pop2_id, rate): """Set host migration rate from one population towards another. Arguments: pop1_id (String): origin population for which migration rate will be specified. pop1_id (String): destination population for which migration rate will be specified. rate (number >= 0): migration rate from one population to the neighbor; evts/time. """ self.populations[pop1_id].setHostMigrationNeighbor( self.populations[pop2_id], rate )
[docs] def linkPopulationsVectorMigration(self, pop1_id, pop2_id, rate): """Set vector migration rate from one population towards another. Arguments: pop1_id (String): origin population for which migration rate will be specified. pop1_id (String): destination population for which migration rate will be specified. rate (number >= 0): migration rate from one population to the neighbor; evts/time. """ self.populations[pop1_id].setVectorMigrationNeighbor( self.populations[pop2_id], rate )
[docs] def linkPopulationsHostHostContact(self, pop1_id, pop2_id, rate): """Set host-host contact rate from one population towards another. Arguments: pop1_id (String): origin population for which inter-population contact rate will be specified. pop1_id (String): destination population for which inter-population contact rate will be specified. rate (number >= 0): inter-population contact rate from one population to the neighbor; evts/time. """ self.populations[pop1_id].setHostHostPopulationContactNeighbor( self.populations[pop2_id], rate )
[docs] def linkPopulationsHostVectorContact(self, pop1_id, pop2_id, rate): """Set host-vector contact rate from one population towards another. Arguments: pop1_id (String): origin population for which inter-population contact rate will be specified. pop1_id (String): destination population for which inter-population contact rate will be specified. rate (number >= 0): inter-population contact rate from one population to the neighbor; evts/time. """ self.populations[pop1_id].setHostVectorPopulationContactNeighbor( self.populations[pop2_id], rate )
[docs] def linkPopulationsVectorHostContact(self, pop1_id, pop2_id, rate): """Set vector-host contact rate from one population towards another. Arguments: pop1_id (String): origin population for which inter-population contact rate will be specified. pop1_id (String): destination population for which inter-population contact rate will be specified. rate (number >= 0): inter-population contact rate from one population to the neighbor; evts/time. """ self.populations[pop1_id].setVectorHostPopulationContactNeighbor( self.populations[pop2_id], rate )
[docs] def createInterconnectedPopulations( self, num_populations, id_prefix, setup_name, host_migration_rate=0, vector_migration_rate=0, host_host_contact_rate=0, host_vector_contact_rate=0, vector_host_contact_rate=0, num_hosts=100, num_vectors=100): """Create new populations, link all of them to each other. All populations in this cluster are linked with the same migration rate, starting number of hosts and vectors, and setup parameters. Their IDs are numbered onto prefix given as 'id_prefix_0', 'id_prefix_1', 'id_prefix_2', etc. Arguments: num_populations (int): number of populations to be created. id_prefix (String): prefix for IDs to be used for this population in the model. setup_name (Setup object): setup object with parameters for all populations. Keyword arguments: host_migration_rate (number >= 0): host migration rate between populations; evts/time. Defaults to 0. vector_migration_rate (number >= 0): vector migration rate between populations; evts/time. Defaults to 0. host_host_contact_rate (number >= 0): host-host inter-population contact rate between populations; evts/time. Defaults to 0. host_vector_contact_rate (number >= 0): host-vector inter-population contact rate between populations; evts/time. Defaults to 0. vector_host_contact_rate (number >= 0): vector-host inter-population contact rate between populations; evts/time. Defaults to 0. num_hosts (int): number of hosts to initialize population with. Defaults to 100. num_vectors (int): number of hosts to initialize population with. Defaults to 100. """ new_pops = [ Population( self, str(id_prefix) + str(i), self.setups[setup_name], num_hosts, num_vectors ) for i in range(num_populations) ] new_pop_ids = [] for pop in new_pops: if pop.id in self.populations.keys(): pop.id = pop.id+'_2' self.populations[pop.id] = pop new_pop_ids.append(pop.id) for p in self.populations: pop.setHostMigrationNeighbor(self.populations[p],0) pop.setVectorMigrationNeighbor(self.populations[p],0) pop.setHostHostPopulationContactNeighbor(self.populations[p],0) pop.setHostVectorPopulationContactNeighbor(self.populations[p],0) pop.setVectorHostPopulationContactNeighbor(self.populations[p],0) self.populations[p].setHostMigrationNeighbor(pop,0) self.populations[p].setVectorMigrationNeighbor(pop,0) self.populations[p].setHostHostPopulationContactNeighbor(pop,0) self.populations[p].setHostVectorPopulationContactNeighbor(pop,0) self.populations[p].setVectorHostPopulationContactNeighbor(pop,0) for p1_id in new_pop_ids: for p2_id in new_pop_ids: self.linkPopulationsHostMigration( p1_id,p2_id,host_migration_rate ) self.linkPopulationsVectorMigration( p1_id,p2_id,vector_migration_rate ) self.linkPopulationsHostHostContact( p1_id,p2_id,host_host_contact_rate ) self.linkPopulationsHostVectorContact( p1_id,p2_id,host_vector_contact_rate ) self.linkPopulationsVectorHostContact( p1_id,p2_id,vector_host_contact_rate )
[docs] def newHostGroup(self, pop_id, group_id, hosts=-1, type='any'): """Return a list of random hosts in population. Arguments: pop_id (String): ID of population to be sampled from. group_id (String): ID to name group with. Keyword arguments: hosts (number): number of hosts to be sampled randomly: if <0, samples from whole population; if <1, takes that fraction of population; if >=1, samples that integer number of hosts. Defaults to -1. type (String = {'healthy', 'infected', 'any'}): whether to sample healthy hosts only, infected hosts only, or any hosts. Defaults to 'any'. Returns: list containing sampled hosts. """ self.groups[group_id] = self.populations[pop_id].newHostGroup( hosts, type )
[docs] def newVectorGroup(self, pop_id, group_id, vectors=-1, type='any'): """Return a list of random vectors in population. Arguments: pop_id (String): ID of population to be sampled from. group_id (String): ID to name group with. Keyword arguments: vectors (number): number of vectors to be sampled randomly: if <0, samples from whole population; if <1, takes that fraction of population; if >=1, samples that integer number of vectors. Defaults to -1. type (String = {'healthy', 'infected', 'any'}): whether to sample healthy vectors only, infected vectors only, or any vectors. Defaults to 'any'. Returns: list containing sampled vectors. """ self.groups[group_id] = self.populations[pop_id].newVectorGroup( vectors, type )
[docs] def addHosts(self, pop_id, num_hosts): """Add a number of healthy hosts to population, return list with them. Arguments: pop_id (String): ID of population to be modified. num_hosts (int): number of hosts to be added. Returns: list containing new hosts. """ self.populations[pop_id].addHosts(num_hosts)
[docs] def addVectors(self, pop_id, num_vectors): """Add a number of healthy vectors to population, return list with them. Arguments: pop_id (String): ID of population to be modified. num_vectors (int): number of vectors to be added. Returns: list containing new vectors. """ self.populations[pop_id].addVectors(num_vectors)
[docs] def removeHosts(self, pop_id, num_hosts_or_list): """Remove a number of specified or random hosts from population. Arguments: pop_id (String): ID of population to be modified. num_hosts_or_list (int or list of Hosts): number of hosts to be sampled randomly for removal or list of hosts to be removed, must be hosts in this population. """ self.populations[pop_id].removeHosts(num_hosts_or_list)
[docs] def removeVectors(self, pop_id, num_vectors_or_list): """Remove a number of specified or random vectors from population. Arguments: pop_id (String): ID of population to be modified. num_vectors_or_list (int or list of Vectors): number of vectors to be sampled randomly for removal or list of vectors to be removed, must be vectors in this population. """ self.populations[pop_id].removeVectors(num_vectors_or_list)
[docs] def addPathogensToHosts(self, pop_id, genomes_numbers, group_id=""): """Add specified pathogens to random hosts, optionally from a list. Arguments: pop_id (String): ID of population to be modified. genomes_numbers (dict with keys=Strings, values=int) dictionary containing pathogen genomes to add as keys and number of hosts each one will be added to as values. Keyword arguments: group_id (String): ID of specific hosts to sample from, if empty, samples from whole population. Defaults to "". """ if group_id == "": hosts = self.populations[pop_id].hosts else: hosts = self.groups[group_id] self.populations[pop_id].addPathogensToHosts(genomes_numbers,hosts)
[docs] def addPathogensToVectors(self, pop_id, genomes_numbers, group_id=""): """Add specified pathogens to random vectors, optionally from a list. Arguments: pop_id (String): ID of population to be modified. genomes_numbers (dict with keys=Strings, values=int): dictionary containing pathogen genomes to add as keys and number of vectors each one will be added to as values. Keyword arguments: group_id (String): ID of specific vectors to sample from, if empty, samples from whole population. Defaults to "". """ if group_id == "": vectors = self.populations[pop_id].vectors else: vectors = self.groups[group_id] self.populations[pop_id].addPathogensToVectors(genomes_numbers,vectors)
[docs] def treatHosts(self, pop_id, frac_hosts, resistance_seqs, group_id=""): """Treat random fraction of infected hosts against some infection. Removes all infections with genotypes susceptible to given treatment. Pathogens are removed if they are missing at least one of the sequences in resistance_seqs from their genome. Removes this organism from population infected list and adds to healthy list if appropriate. Arguments: pop_id (String): ID of population to be modified. frac_hosts (number between 0 and 1): fraction of hosts considered to be randomly selected. resistance_seqs (list of Strings): contains sequences required for treatment resistance. Keyword arguments: group_id (String): ID of specific hosts to sample from, if empty, samples from whole population. Defaults to "". """ if group_id == "": hosts = self.populations[pop_id].hosts else: hosts = self.groups[group_id] self.populations[pop_id].treatHosts(frac_hosts,resistance_seqs,hosts)
[docs] def treatVectors(self, pop_id, frac_vectors, resistance_seqs, group_id=""): """Treat random fraction of infected vectors agains some infection. Removes all infections with genotypes susceptible to given treatment. Pathogens are removed if they are missing at least one of the sequences in resistance_seqs from their genome. Removes this organism from population infected list and adds to healthy list if appropriate. Arguments: pop_id (String): ID of population to be modified. frac_vectors (number between 0 and 1): fraction of vectors considered to be randomly selected. resistance_seqs (list of Strings): contains sequences required for treatment resistance. Keyword arguments: group_id (String): ID of specific vectors to sample from, if empty, samples from whole population. Defaults to "". """ if group_id == "": vectors = self.populations[pop_id].vectors else: vectors = self.groups[group_id] self.populations[pop_id].treatVectors( frac_vectors,resistance_seqs,vectors )
[docs] def protectHosts( self, pop_id, frac_hosts, protection_sequence, group_id=""): """Protect a random fraction of infected hosts against some infection. Adds protection sequence specified to a random fraction of the hosts specified. Does not cure them if they are already infected. Arguments: pop_id (String): ID of population to be modified. frac_hosts (number between 0 and 1): fraction of hosts considered to be randomly selected. protection_sequence (String): sequence against which to protect. Keyword arguments: group_id (String): ID of specific hosts to sample from, if empty, samples from whole population. Defaults to "". """ if group_id == "": hosts = self.populations[pop_id].hosts else: hosts = self.groups[group_id] self.populations[pop_id].protectHosts( frac_hosts,protection_sequence,hosts )
[docs] def protectVectors( self, pop_id, frac_vectors, protection_sequence, group_id=""): """Protect a random fraction of infected vectors against some infection. Adds protection sequence specified to a random fraction of the vectors specified. Does not cure them if they are already infected. Arguments: pop_id (String): ID of population to be modified. frac_vectors (number between 0 and 1): fraction of vectors considered to be randomly selected. protection_sequence (String): sequence against which to protect. Keyword arguments: group_id (String): ID of specific vectors to sample from, if empty, samples from whole population. Defaults to "". """ if group_id == "": vectors = self.populations[pop_id].vectors else: vectors = self.groups[group_id] self.populations[pop_id].protectVectors( frac_vectors,protection_sequence,vectors )
[docs] def wipeProtectionHosts(self, pop_id, group_id=""): """Removes all protection sequences from hosts. Arguments: pop_id (String): ID of population to be modified. Keyword arguments: group_id (String): ID of specific hosts to sample from, if empty, samples from whole from whole population. Defaults to "". """ if group_id == "": hosts = self.populations[pop_id].hosts else: hosts = self.groups[group_id] self.populations[pop_id].wipeProtectionHosts(hosts)
[docs] def wipeProtectionVectors(self, pop_id, group_id=""): """Removes all protection sequences from vectors. Arguments: pop_id (String): ID of population to be modified. Keyword arguments: group_id (String): ID of specific vectors to sample from, if empty, samples from whole population. Defaults to "". """ if group_id == "": vectors = self.populations[pop_id].vectors else: vectors = self.groups[group_id] self.populations[pop_id].wipeProtectionVectors(vectors)
### Modify population parameters: ###
[docs] def setSetup(self, pop_id, setup_id): """Assign parameters stored in Setup object to this population. Arguments: pop_id (String): ID of population to be modified. setup_id (String): ID of setup to be assigned. """ self.populations[pop_id].setSetup( self.setups[setup_id] )
### Utility: ###
[docs] def customModelFunction(self, function): """Returns output of given function, passing this model as a parameter. Arguments: function (callable): function to be evaluated; must take a Model object as the only parameter. Returns: output of function passed as parameter. """ return function(self)
### Preset fitness functions: ###
[docs] @staticmethod def peakLandscape(genome, peak_genome, min_value): """Return genome phenotype by decreasing with distance from optimal seq. A purifying selection fitness function based on exponential decay of fitness as genomes move away from the optimal sequence. Distance is measured as percent Hamming distance from an optimal genome sequence. Arguments: genome (String): the genome to be evaluated. peak_genome (String): the genome sequence to measure distance against, has value of 1. min_value (number 0-1): minimum value at maximum distance from optimal genome. Return: value of genome (number). """ distance = td.hamming(genome, peak_genome) / len(genome) value = np.exp( np.log( min_value ) * distance ) return value
[docs] @staticmethod def valleyLandscape(genome, valley_genome, min_value): """Return genome phenotype by increasing with distance from worst seq. A disruptive selection fitness function based on exponential decay of fitness as genomes move closer to the worst possible sequence. Distance is measured as percent Hamming distance from the worst possible genome sequence. Arguments: genome (String): the genome to be evaluated. valley_genome (String): the genome sequence to measure distance against, has value of min_value. min_value (number 0-1): fitness value of worst possible genome. Return: value of genome (number). """ distance = td.hamming(genome, valley_genome) / len(genome) value = np.exp( np.log( min_value ) * ( 1 - distance ) ) return value