Joeri Jongbloets bio photo

Joeri Jongbloets

Almost MSc // Modeller in the lab // Fluent in Java, Python, and R

Email Twitter

Simulation of microbial populations in a turbidostat

Overview

Simulate a stable culture

Code to sample from a distribution defined by percentiles

## Based on:
## http://stackoverflow.com/questions/14547364/generate-distribution-given-percentile-ranks?lq=1
sample.prob <- function(n, values, cum.probs, pair.len = 100) {
  prob <- c( first(cum.probs), diff(cum.probs), 1-last(cum.probs))
  # Extreme values beyond x (to sample)
  init <- min(values) - diff(values)[[1]]
  fin  <- max(values) + diff(values)[[2]]
  
  ival <- c(init, values, fin) # generate the sequence to take pairs from
  
  s <- sapply(2:length(ival), function(i) {
    seq(ival[i-1], ival[i], length.out=pair.len)
  })
  # sample from s, total of 10000 values with probabilities calculated above
  return( sample(s, n, prob=rep(prob, each=pair.len), replace = T) )
}

sample.prob_c <- compiler::cmpfun(sample.prob)

Code to bundle values into bins

# create.bins <- function( values, n.bins, lwr=NULL, upr=NULL) {
#   if (is.null(lwr)) lwr <- min(values)
#   if (is.null(upr)) upr <- max(values)
#   if ( lwr == upr ) { 
#     lwr <- lwr * (1 - 1/ (n.bins / 2))
#     upr <- upr * (1 + 1/ (n.bins / 2))
#   }
#   apply( values, 1, cut, c(-Inf, seq(lwr, upr, length.out=n.bins-1), Inf), labels=FALSE)
# }

create.bins <- function( values, n.bins, lwr=NULL, upr=NULL) {
  if (is.null(lwr)) lwr <- min(values)
  if (is.null(upr)) upr <- max(values)
  if ( lwr == upr ) { 
    lwr <- lwr * (1 - 1/ (n.bins / 2))
    upr <- upr * (1 + 1/ (n.bins / 2))
  }
  findInterval(values, c(seq(lwr, upr, length.out=n.bins)))
}

## create byte code
create.bins_c <- compiler::cmpfun(create.bins)

create.bins.df <- function( df, bin.var, n.bins = 10, dots = list(), dots.name = c(), size.name="size", 
                            add.group=FALSE, ... ) {
  ## generate bins: group growth rates into growth.bins number of bins.
  df$bin <- create.bins( df[[bin.var]], n.bins, ... )
  ## prepare summarise
  dots <- c(dots, list(~n()))
  dots.name <- c(dots.name, size.name)
  ## return binned population as the seed population
  result <- df %>%
    group_by_("bin", add=add.group) %>%
    summarise_(
      .dots = setNames(dots, dots.name)
    ) %>%
    select_("-bin")
  return(result)
}

# create byte code
create.bins.df_c <- compiler::cmpfun(create.bins.df)

Code to create a population by sampling from a percentile distribution

## creates a population of cells in n bins of growth rate
## return: data.frame with cells : growth.rate , n.cells
seed.population <- function( 
  n.cells = 10000, mean.growth = 0.08, extreme.growth = 0.09, extreme.prob = 0.001, growth.bins = 100,
  sample.fun = sample.prob
) {
  ## generate sample population
  extreme.left = mean.growth - (extreme.growth - mean.growth)
  population <- data.frame( 
  growth.rate = sample.fun( 
    n = n.cells, ## generate 1000 cells
    values = c(extreme.left, mean.growth, extreme.growth), ## distribution values
    c( extreme.prob , 0.5, 1-extreme.prob)) # cumulative probabilities
  ) 
  ## generate bins
  return(
    create.bins.df(
      population, "growth.rate", growth.bins, list(~mean(growth.rate)), c("growth.rate"), "n.cells"
    )
  )
}
seed.population_c <- compiler::cmpfun(seed.population)

Code to simulate the turbidostat

The global simulation function

## Simulation function, will run n.rounds of simulation using a selection function and mutation function
# cells: data.frame, with growth.rate and n.cells // cells to evolve
# n.rounds: int // number of rounds
# t.step: int // length of each time step in hours
run.simulation <- function( cells, n.rounds=100, t.step=5/60, dead.threshold = 1, grow.fun=simulate.growth, dilute.fun=simulate.turbidostat, verbose=FALSE, ... ) 
{
  t.begin <- Sys.time()
  ## result logs the counts of all cells at each time point
  ## preallocate a row for each round in result
  result <- vector(mode = "list", length = n.rounds)
  result[[1]] <- as.data.table(cells %>% mutate( t = 0, decision = FALSE))
  i <- 1
  while ( i <= n.rounds & nrow(cells) > 0 )
  {
    t.cycle <- Sys.time()
    ## grow cells
    if (verbose) print("## Grow cells")
    cells <- grow.fun(cells, t.step, verbose=verbose...)
    ## run turbidostat
    if (verbose) print("## Dilute cells")
    cells <- dilute.fun(cells, verbose=verbose, ...)
    # set dead strains to zero to indicate their death
    if (verbose) print("## Tag extinct strains")
    cells <- cells %>% mutate( n.cells = ifelse(n.cells < dead.threshold, 0, n.cells))
    ## log state
    if (verbose) print("## Add to results")
    result[[i+1]] <- as.data.table(cells %>% mutate(t=i))
    ## remove dead strains
    if (verbose) print("## Remove extinct strains")
    cells <- cells %>% filter( n.cells >= dead.threshold)
    if ( i %% (n.rounds / 100) == 0) {
      t.now <- Sys.time()
      print(
        sprintf("%i%% completed // t: %.0f hour // C/T: %.3f/%.3f seconds // Cells: %i // Strains: %i", 
                i / (n.rounds / 100), 
                i * t.step,
                as.numeric(t.now - t.cycle, units="secs"), 
                as.numeric(t.now - t.begin, units="secs"),
                as.integer(sum(cells$n.cells)), as.integer(count(cells))
              ))
    }
    i <- i + 1
  }
  if (verbose & nrow(cells) == 0) print("Ran out of cells!")
  ## leave user with ability to rbindlist the result
  return(result)
}

run.simulation_c <- compiler::cmpfun(run.simulation)

The turbidostat decision function

## Turbidostat simulation procedure
simulate.turbidostat <- function ( cells, threshold = 100, dilution.fraction = 0.08, verbose=FALSE, ... ) {
  ## calculate total 
  cell.count <- sum(cells$n.cells)
  if (verbose) print(sprintf( "TBS: %s > %s", cell.count, threshold ))
  ## adjust cell count accordingly
  result <- cells %>% 
      mutate( 
        decision = cell.count > threshold,
        n.cells = ifelse( decision, floor(n.cells * (1-dilution.fraction)), n.cells )
      )
  if (verbose) print(sprintf( "TBS: Removed %.2f cells", cell.count - sum(cells$n.cells) ))
  return (result)
}

simulate.turbidostat_c <- compiler::cmpfun(simulate.turbidostat)

## Growth simulation
## just grows the cells
simulate.growth <- function( cells, t.step, partial.division = TRUE, ...) {
  result <- cells %>%
    mutate(
      n.cells = n.cells * exp( t.step * growth.rate )
    )
  if (!partial.division) {
    results <- cells %>%
      mutate(
        n.cells = floor(n.cells)
      )
  }
  return(
    result
  )
}

simulate.growth_c <- compiler::cmpfun(simulate.growth)

Configuration of the simulation

Parameters of inoculation population

  • Number of cells inoculated: 4000000
  • Number of strains (bins of growth rate): 100
  • Average growth in culture: 0.08
  • Extreme growth in culture: 0.0808
  • Chance on extreme growth: 0.00001

Parameters of simulation

  • Total number of simulated rounds: 12000
  • Time length of each round: 0.0833333 hour
  • Turbidostat threshold: 28000000

Result statistics

  • Total time: 1000 (hours)
  • Mean number of cells in culture: 26502994.2094651
  • Mean number of cells per strain: 500690.9651014

Development of cell count of the 5 largest strains (of each time point), without the total cell count.

Number of strains at a given time point

Range of growth rates at a given time point. Area shows max and min growth rates, mean is indicated by black line.

Number of strains being diluted out at each time point.

Calculating culture growth rate

Code to calculate growth rates

I will use the dxdt method to calculate growth rates, as it is faster than fitting linear models to each dilution cycle and the data should not contain any noise. which uses the following formula:

growth.rate = dlog(x)/dt = (log(xmax) - log(xmin)) / (tmax - tmin)

dxdt_od_min <- function(od, time, mv.sides = 3){
  od <- log(od)
  # set to NA
  od[which(is.nan(od))] = NA
  od[which(is.infinite(od))] = NA
  # remove NA values
  time <- time[!is.na(od)]
  od <- od[!is.na(od)]
  # get minimum and max: start from minimum instead of first
  min.idx <- ifelse(length(od) > 0, which.min(od), 0)
  max.idx <- length(od)
  # prepare output
  output <- data.frame( 
    "time_h" = time[max.idx],
    "fit_size" = max.idx - min.idx, 
    "pump_size" = max.idx,
    "r_sq" = NA, 
    "slope" = NA, 
    "slope_se" = NA,
    "slope_p_value" = NA,
    "od_sd" = sd(od, na.rm=TRUE)
  )
  slopes <- c()
  min.size <- 3
  left.step <- 0
  while( (max.idx - min.idx) > min.size + left.step & left.step < mv.sides) {
    right.step <- 0
    while ((max.idx - min.idx) > min.size + left.step + right.step & right.step < mv.sides ) {
      ## add slope to slopes collection
      slopes <- c( slopes, c(
        (od[max.idx - right.step] - od[min.idx+left.step]) / (time[max.idx-right.step] - time[min.idx+left.step]) # first to last
      )) 
      right.step <- right.step + 1
    }
    left.step <- left.step + 1
  }
  if (length(slopes) > 1)
  {
    output$slope <- median(slopes, na.rm = TRUE)
    output$slope_se <- sd(slopes, na.rm = TRUE)
  }
  return(output)
}

Observed culture growth (calculating growth rate using culture (total) cell count)

Simulate mutations

To simulate mutations I implemented a new growth function. Every time a strain grows, new offspring is created. I will assign growth rates to the offspring, by sampling from a percentile distribution.

The distribution is defined by mutation.rate and mutation.benefit. The distribution is created from the idea that there is chance (mutation.rate) on having a percent increase or decrease in growth rate, called mutation.benefit. The average (50 percent) of the new cells keeps the growth rate of it's parent.

To reduce the complexity of the simulation I will reduce the amount of strains in the culture to max. 100 strains, by creating bins of growth rates.

Configuration of the simulation

Parameters of inoculation population

  • Number of cells inoculated: 4000000
  • Number of strains (bins of growth rate): 100
  • Average growth in culture: 0.08
  • Extreme growth in culture: 0.0808
  • Chance on extreme growth: 0.00001

Parameters of simulation

  • Total number of simulated rounds: 3000
  • Time length of each round: 0.1666667 hour
  • Turbidostat threshold: 28000000
  • Mutation rate: 0.00001 (chance of getting the mutation benefit)
  • Mutation beneft: 0.01 (percentage increase in growth rate)

Values for mutation rate and mutation benefit were taken from http://doi.org/10.1126/science.1142284

Result statistics

  • Total time: 500 (hours)
  • Mean number of cells in culture: 26153301.5611463
  • Mean number of cells per strain: 269088.2903001

Development of cell count of the 5 largest strains (of each time point), without the total cell count.

Number of strains at a given time point

Range of growth rates at a given time point

Number of strains that were diluted out at a each time point.

Calculating culture growth rate

Observed culture growth (calculated growth rate of culture using (total) cell count). Red is start growth rate.

Growth rate of largest population (absolute size) over time. Red is start growth rate.