 ### Joeri Jongbloets

Almost MSc // Modeller in the lab // Fluent in Java, Python, and R

# Simulate a stable culture

### Code to sample from a distribution defined by percentiles

``````## Based on:
## http://stackoverflow.com/questions/14547364/generate-distribution-given-percentile-ranks?lq=1
sample.prob <- function(n, values, cum.probs, pair.len = 100) {
prob <- c( first(cum.probs), diff(cum.probs), 1-last(cum.probs))
# Extreme values beyond x (to sample)
init <- min(values) - diff(values)[]
fin  <- max(values) + diff(values)[]

ival <- c(init, values, fin) # generate the sequence to take pairs from

s <- sapply(2:length(ival), function(i) {
seq(ival[i-1], ival[i], length.out=pair.len)
})
# sample from s, total of 10000 values with probabilities calculated above
return( sample(s, n, prob=rep(prob, each=pair.len), replace = T) )
}

sample.prob_c <- compiler::cmpfun(sample.prob)
``````

### Code to bundle values into bins

``````# create.bins <- function( values, n.bins, lwr=NULL, upr=NULL) {
#   if (is.null(lwr)) lwr <- min(values)
#   if (is.null(upr)) upr <- max(values)
#   if ( lwr == upr ) {
#     lwr <- lwr * (1 - 1/ (n.bins / 2))
#     upr <- upr * (1 + 1/ (n.bins / 2))
#   }
#   apply( values, 1, cut, c(-Inf, seq(lwr, upr, length.out=n.bins-1), Inf), labels=FALSE)
# }

create.bins <- function( values, n.bins, lwr=NULL, upr=NULL) {
if (is.null(lwr)) lwr <- min(values)
if (is.null(upr)) upr <- max(values)
if ( lwr == upr ) {
lwr <- lwr * (1 - 1/ (n.bins / 2))
upr <- upr * (1 + 1/ (n.bins / 2))
}
findInterval(values, c(seq(lwr, upr, length.out=n.bins)))
}

## create byte code
create.bins_c <- compiler::cmpfun(create.bins)

create.bins.df <- function( df, bin.var, n.bins = 10, dots = list(), dots.name = c(), size.name="size",
## generate bins: group growth rates into growth.bins number of bins.
df\$bin <- create.bins( df[[bin.var]], n.bins, ... )
## prepare summarise
dots <- c(dots, list(~n()))
dots.name <- c(dots.name, size.name)
## return binned population as the seed population
result <- df %>%
summarise_(
.dots = setNames(dots, dots.name)
) %>%
select_("-bin")
return(result)
}

# create byte code
create.bins.df_c <- compiler::cmpfun(create.bins.df)
``````

### Code to create a population by sampling from a percentile distribution

``````## creates a population of cells in n bins of growth rate
## return: data.frame with cells : growth.rate , n.cells
seed.population <- function(
n.cells = 10000, mean.growth = 0.08, extreme.growth = 0.09, extreme.prob = 0.001, growth.bins = 100,
sample.fun = sample.prob
) {
## generate sample population
extreme.left = mean.growth - (extreme.growth - mean.growth)
population <- data.frame(
growth.rate = sample.fun(
n = n.cells, ## generate 1000 cells
values = c(extreme.left, mean.growth, extreme.growth), ## distribution values
c( extreme.prob , 0.5, 1-extreme.prob)) # cumulative probabilities
)
## generate bins
return(
create.bins.df(
population, "growth.rate", growth.bins, list(~mean(growth.rate)), c("growth.rate"), "n.cells"
)
)
}
seed.population_c <- compiler::cmpfun(seed.population)
``````

### Code to simulate the turbidostat

The global simulation function

``````## Simulation function, will run n.rounds of simulation using a selection function and mutation function
# cells: data.frame, with growth.rate and n.cells // cells to evolve
# n.rounds: int // number of rounds
# t.step: int // length of each time step in hours
run.simulation <- function( cells, n.rounds=100, t.step=5/60, dead.threshold = 1, grow.fun=simulate.growth, dilute.fun=simulate.turbidostat, verbose=FALSE, ... )
{
t.begin <- Sys.time()
## result logs the counts of all cells at each time point
## preallocate a row for each round in result
result <- vector(mode = "list", length = n.rounds)
result[] <- as.data.table(cells %>% mutate( t = 0, decision = FALSE))
i <- 1
while ( i <= n.rounds & nrow(cells) > 0 )
{
t.cycle <- Sys.time()
## grow cells
if (verbose) print("## Grow cells")
cells <- grow.fun(cells, t.step, verbose=verbose...)
## run turbidostat
if (verbose) print("## Dilute cells")
cells <- dilute.fun(cells, verbose=verbose, ...)
# set dead strains to zero to indicate their death
if (verbose) print("## Tag extinct strains")
cells <- cells %>% mutate( n.cells = ifelse(n.cells < dead.threshold, 0, n.cells))
## log state
if (verbose) print("## Add to results")
result[[i+1]] <- as.data.table(cells %>% mutate(t=i))
if (verbose) print("## Remove extinct strains")
cells <- cells %>% filter( n.cells >= dead.threshold)
if ( i %% (n.rounds / 100) == 0) {
t.now <- Sys.time()
print(
sprintf("%i%% completed // t: %.0f hour // C/T: %.3f/%.3f seconds // Cells: %i // Strains: %i",
i / (n.rounds / 100),
i * t.step,
as.numeric(t.now - t.cycle, units="secs"),
as.numeric(t.now - t.begin, units="secs"),
as.integer(sum(cells\$n.cells)), as.integer(count(cells))
))
}
i <- i + 1
}
if (verbose & nrow(cells) == 0) print("Ran out of cells!")
## leave user with ability to rbindlist the result
return(result)
}

run.simulation_c <- compiler::cmpfun(run.simulation)
``````

The turbidostat decision function

``````## Turbidostat simulation procedure
simulate.turbidostat <- function ( cells, threshold = 100, dilution.fraction = 0.08, verbose=FALSE, ... ) {
## calculate total
cell.count <- sum(cells\$n.cells)
if (verbose) print(sprintf( "TBS: %s > %s", cell.count, threshold ))
result <- cells %>%
mutate(
decision = cell.count > threshold,
n.cells = ifelse( decision, floor(n.cells * (1-dilution.fraction)), n.cells )
)
if (verbose) print(sprintf( "TBS: Removed %.2f cells", cell.count - sum(cells\$n.cells) ))
return (result)
}

simulate.turbidostat_c <- compiler::cmpfun(simulate.turbidostat)

## Growth simulation
## just grows the cells
simulate.growth <- function( cells, t.step, partial.division = TRUE, ...) {
result <- cells %>%
mutate(
n.cells = n.cells * exp( t.step * growth.rate )
)
if (!partial.division) {
results <- cells %>%
mutate(
n.cells = floor(n.cells)
)
}
return(
result
)
}

simulate.growth_c <- compiler::cmpfun(simulate.growth)
``````

### Configuration of the simulation

Parameters of inoculation population

• Number of cells inoculated: 4000000
• Number of strains (bins of growth rate): 100
• Average growth in culture: 0.08
• Extreme growth in culture: 0.0808
• Chance on extreme growth: 0.00001

Parameters of simulation

• Total number of simulated rounds: 12000
• Time length of each round: 0.0833333 hour
• Turbidostat threshold: 28000000

Result statistics

• Total time: 1000 (hours)
• Mean number of cells in culture: 26502994.2094651
• Mean number of cells per strain: 500690.9651014

Development of cell count of the 5 largest strains (of each time point), without the total cell count. Number of strains at a given time point Range of growth rates at a given time point. Area shows max and min growth rates, mean is indicated by black line. Number of strains being diluted out at each time point. ## Calculating culture growth rate

### Code to calculate growth rates

I will use the dxdt method to calculate growth rates, as it is faster than fitting linear models to each dilution cycle and the data should not contain any noise. which uses the following formula:

`growth.rate = dlog(x)/dt = (log(xmax) - log(xmin)) / (tmax - tmin)`

``````dxdt_od_min <- function(od, time, mv.sides = 3){
od <- log(od)
# set to NA
od[which(is.nan(od))] = NA
od[which(is.infinite(od))] = NA
# remove NA values
time <- time[!is.na(od)]
od <- od[!is.na(od)]
# get minimum and max: start from minimum instead of first
min.idx <- ifelse(length(od) > 0, which.min(od), 0)
max.idx <- length(od)
# prepare output
output <- data.frame(
"time_h" = time[max.idx],
"fit_size" = max.idx - min.idx,
"pump_size" = max.idx,
"r_sq" = NA,
"slope" = NA,
"slope_se" = NA,
"slope_p_value" = NA,
"od_sd" = sd(od, na.rm=TRUE)
)
slopes <- c()
min.size <- 3
left.step <- 0
while( (max.idx - min.idx) > min.size + left.step & left.step < mv.sides) {
right.step <- 0
while ((max.idx - min.idx) > min.size + left.step + right.step & right.step < mv.sides ) {
## add slope to slopes collection
slopes <- c( slopes, c(
(od[max.idx - right.step] - od[min.idx+left.step]) / (time[max.idx-right.step] - time[min.idx+left.step]) # first to last
))
right.step <- right.step + 1
}
left.step <- left.step + 1
}
if (length(slopes) > 1)
{
output\$slope <- median(slopes, na.rm = TRUE)
output\$slope_se <- sd(slopes, na.rm = TRUE)
}
return(output)
}
``````

Observed culture growth (calculating growth rate using culture (total) cell count)  # Simulate mutations

To simulate mutations I implemented a new growth function. Every time a strain grows, new offspring is created. I will assign growth rates to the offspring, by sampling from a percentile distribution.

The distribution is defined by `mutation.rate` and `mutation.benefit`. The distribution is created from the idea that there is chance (`mutation.rate`) on having a percent increase or decrease in growth rate, called `mutation.benefit`. The average (50 percent) of the new cells keeps the growth rate of it's parent.

To reduce the complexity of the simulation I will reduce the amount of strains in the culture to max. 100 strains, by creating bins of growth rates.

### Configuration of the simulation

Parameters of inoculation population

• Number of cells inoculated: 4000000
• Number of strains (bins of growth rate): 100
• Average growth in culture: 0.08
• Extreme growth in culture: 0.0808
• Chance on extreme growth: 0.00001

Parameters of simulation

• Total number of simulated rounds: 3000
• Time length of each round: 0.1666667 hour
• Turbidostat threshold: 28000000
• Mutation rate: 0.00001 (chance of getting the mutation benefit)
• Mutation beneft: 0.01 (percentage increase in growth rate)

Values for mutation rate and mutation benefit were taken from http://doi.org/10.1126/science.1142284

Result statistics

• Total time: 500 (hours)
• Mean number of cells in culture: 26153301.5611463
• Mean number of cells per strain: 269088.2903001

Development of cell count of the 5 largest strains (of each time point), without the total cell count. Number of strains at a given time point Range of growth rates at a given time point Number of strains that were diluted out at a each time point. ## Calculating culture growth rate

Observed culture growth (calculated growth rate of culture using (total) cell count). Red is start growth rate.  Growth rate of largest population (absolute size) over time. Red is start growth rate. 