Joeri Jongbloets bio photo

Joeri Jongbloets

Almost MSc // Modeller in the lab // Fluent in Java, Python, and R

Email Twitter

Ensemble Simulation of microbial populations in a turbidostat

Overview

Code to sample from a distribution defined by percentiles

## Based on:
## http://stackoverflow.com/questions/14547364/generate-distribution-given-percentile-ranks?lq=1
sample.prob <- function(n, values, cum.probs, pair.len = 100) {
  prob <- c( first(cum.probs), diff(cum.probs), 1-last(cum.probs))
  # Extreme values beyond x (to sample)
  init <- min(values) - diff(values)[[1]]
  fin  <- max(values) + diff(values)[[2]]
  
  ival <- c(init, values, fin) # generate the sequence to take pairs from
  
  s <- sapply(2:length(ival), function(i) {
    seq(ival[i-1], ival[i], length.out=pair.len)
  })
  # sample from s, total of 10000 values with probabilities calculated above
  return( sample(s, n, prob=rep(prob, each=pair.len), replace = T) )
}

sample.prob_c <- compiler::cmpfun(sample.prob)

Code to bundle values into bins

# create.bins <- function( values, n.bins, lwr=NULL, upr=NULL) {
#   if (is.null(lwr)) lwr <- min(values)
#   if (is.null(upr)) upr <- max(values)
#   if ( lwr == upr ) { 
#     lwr <- lwr * (1 - 1/ (n.bins / 2))
#     upr <- upr * (1 + 1/ (n.bins / 2))
#   }
#   apply( values, 1, cut, c(-Inf, seq(lwr, upr, length.out=n.bins-1), Inf), labels=FALSE)
# }

create.bins <- function( values, n.bins, lwr=NULL, upr=NULL) {
  if (is.null(lwr)) lwr <- min(values)
  if (is.null(upr)) upr <- max(values)
  if ( lwr == upr ) { 
    lwr <- lwr * (1 - 1/ (n.bins / 2))
    upr <- upr * (1 + 1/ (n.bins / 2))
  }
  findInterval(values, c(seq(lwr, upr, length.out=n.bins)))
}

## create byte code
create.bins_c <- compiler::cmpfun(create.bins)

create.bins.df <- function( df, bin.var, n.bins = 10, dots = list(), dots.name = c(), size.name="size", 
                            add.group=FALSE, ... ) {
  ## generate bins: group growth rates into growth.bins number of bins.
  df$bin <- create.bins( df[[bin.var]], n.bins, ... )
  ## prepare summarise
  dots <- c(dots, list(~n()))
  dots.name <- c(dots.name, size.name)
  ## return binned population as the seed population
  result <- df %>%
    group_by_("bin", add=add.group) %>%
    summarise_(
      .dots = setNames(dots, dots.name)
    ) %>%
    select_("-bin")
  return(result)
}

# create byte code
create.bins.df_c <- compiler::cmpfun(create.bins.df)

Code to create a population by sampling from a percentile distribution

## creates a population of cells in n bins of growth rate
## return: data.frame with cells : growth.rate , n.cells
seed.population <- function( 
  n.cells = 10000, mean.growth = 0.08, extreme.growth = 0.09, extreme.prob = 0.001, growth.bins = 100,
  sample.fun = sample.prob
) {
  ## generate sample population
  extreme.left = mean.growth - (extreme.growth - mean.growth)
  population <- data.frame( 
  growth.rate = sample.fun( 
    n = n.cells, ## generate 1000 cells
    values = c(extreme.left, mean.growth, extreme.growth), ## distribution values
    c( extreme.prob , 0.5, 1-extreme.prob)) # cumulative probabilities
  ) 
  ## generate bins
  return(
    create.bins.df(
      population, "growth.rate", growth.bins, list(~mean(growth.rate)), c("growth.rate"), "n.cells"
    )
  )
}
seed.population_c <- compiler::cmpfun(seed.population)

Code to simulate the turbidostat

The global simulation function

## Simulation function, will run n.rounds of simulation using a selection function and mutation function
# cells: data.frame, with growth.rate and n.cells // cells to evolve
# n.rounds: int // number of rounds
# t.step: int // length of each time step in hours
run.simulation <- function( cells, n.rounds=100, t.step=5/60, dead.threshold = 1, grow.fun=simulate.growth, dilute.fun=simulate.turbidostat, verbose=FALSE, ... ) 
{
  t.begin <- Sys.time()
  ## result logs the counts of all cells at each time point
  ## preallocate a row for each round in result
  result <- vector(mode = "list", length = n.rounds)
  result[[1]] <- as.data.table(cells %>% mutate( t = 0, decision = FALSE))
  i <- 1
  while ( i <= n.rounds & nrow(cells) > 0 )
  {
    t.cycle <- Sys.time()
    ## grow cells
    if (verbose) print("## Grow cells")
    cells <- grow.fun(cells, t.step, verbose=verbose...)
    ## run turbidostat
    if (verbose) print("## Dilute cells")
    cells <- dilute.fun(cells, verbose=verbose, ...)
    # set dead strains to zero to indicate their death
    if (verbose) print("## Tag extinct strains")
    cells <- cells %>% mutate( n.cells = ifelse(n.cells < dead.threshold, 0, n.cells))
    ## log state
    if (verbose) print("## Add to results")
    result[[i+1]] <- as.data.table(cells %>% mutate(t=i))
    ## remove dead strains
    if (verbose) print("## Remove extinct strains")
    cells <- cells %>% filter( n.cells >= dead.threshold)
    if ( i %% (n.rounds / 100) == 0) {
      t.now <- Sys.time()
      print(
        sprintf("%i%% completed // t: %.0f hour // C/T: %.3f/%.3f seconds // Cells: %i // Strains: %i", 
                i / (n.rounds / 100), 
                i * t.step,
                as.numeric(t.now - t.cycle, units="secs"), 
                as.numeric(t.now - t.begin, units="secs"),
                as.integer(sum(cells$n.cells)), as.integer(count(cells))
              ))
    }
    i <- i + 1
  }
  if (verbose & nrow(cells) == 0) print("Ran out of cells!")
  ## leave user with ability to rbindlist the result
  return(result)
}

run.simulation_c <- compiler::cmpfun(run.simulation)

The turbidostat decision function

## Turbidostat simulation procedure
simulate.turbidostat <- function ( cells, threshold = 100, dilution.fraction = 0.08, verbose=FALSE, ... ) {
  ## calculate total 
  cell.count <- sum(cells$n.cells)
  if (verbose) print(sprintf( "TBS: %s > %s", cell.count, threshold ))
  ## adjust cell count accordingly
  result <- cells %>% 
      mutate( 
        decision = cell.count > threshold,
        n.cells = ifelse( decision, floor(n.cells * (1-dilution.fraction)), n.cells )
      )
  if (verbose) print(sprintf( "TBS: Removed %.2f cells", cell.count - sum(cells$n.cells) ))
  return (result)
}

simulate.turbidostat_c <- compiler::cmpfun(simulate.turbidostat)

## Growth simulation
## just grows the cells
simulate.growth <- function( cells, t.step, partial.division = TRUE, ...) {
  result <- cells %>%
    mutate(
      n.cells = n.cells * exp( t.step * growth.rate )
    )
  if (!partial.division) {
    results <- cells %>%
      mutate(
        n.cells = floor(n.cells)
      )
  }
  return(
    result
  )
}

simulate.growth_c <- compiler::cmpfun(simulate.growth)

Code to do batch simulations

Configuration of the ensemble simulation

Parameters of inoculation populations

  • Total Number of cells inoculated: 4000000
  • Growth rates of simulated majority strains: 0.08
  • Number of cells in simulated majority strains: 4000000
  • Growth rates of simulated minority strains: 0.08008, 0.0804, 0.0808, 0.082, 0.084, 0.088, 0.1
  • Number of cells in simulated minority strains: 40

Parameters of simulation

  • Total number of simulated rounds: 12000
  • Time length of each round: 0.0833333 hour
  • Turbidostat threshold: 28000000

Result statistics

  • Total time: 1000 (hours)
  • Mean number of cells in culture: 185576007.901202
  • Mean number of cells per strain: 14117356.365668

Development of each strain.

Calculating culture growth rate

Code to calculate growth rates

I will use the dxdt method to calculate growth rates, as it is faster than fitting linear models to each dilution cycle and the data should not contain any noise. which uses the following formula:

growth.rate = dlog(x)/dt = (log(xmax) - log(xmin)) / (tmax - tmin)

dxdt_od_min <- function(od, time, mv.sides = 3){
  od <- log(od)
  # set to NA
  od[which(is.nan(od))] = NA
  od[which(is.infinite(od))] = NA
  # remove NA values
  time <- time[!is.na(od)]
  od <- od[!is.na(od)]
  # get minimum and max: start from minimum instead of first
  min.idx <- ifelse(length(od) > 0, which.min(od), 0)
  max.idx <- length(od)
  # prepare output
  output <- data.frame( 
    "time_h" = time[max.idx],
    "fit_size" = max.idx - min.idx, 
    "pump_size" = max.idx,
    "r_sq" = NA, 
    "slope" = NA, 
    "slope_se" = NA,
    "slope_p_value" = NA,
    "od_sd" = sd(od, na.rm=TRUE)
  )
  slopes <- c()
  min.size <- 3
  left.step <- 0
  while( (max.idx - min.idx) > min.size + left.step & left.step < mv.sides) {
    right.step <- 0
    while ((max.idx - min.idx) > min.size + left.step + right.step & right.step < mv.sides ) {
      ## add slope to slopes collection
      slopes <- c( slopes, c(
        (od[max.idx - right.step] - od[min.idx+left.step]) / (time[max.idx-right.step] - time[min.idx+left.step]) # first to last
      )) 
      right.step <- right.step + 1
    }
    left.step <- left.step + 1
  }
  if (length(slopes) > 1)
  {
    output$slope <- median(slopes, na.rm = TRUE)
    output$slope_se <- sd(slopes, na.rm = TRUE)
  }
  return(output)
}

Observed culture growth (calculating growth rate using culture (total) cell count)