commit 191fb44e1be803e1161752d75660aefc57a5d124
parent 34d85e3ad6733caed3dcd63931281d254d82354c
Author: Walther Chen <walther.chen@gmail.com>
Date:   Fri,  1 Nov 2024 10:43:16 -0400

Improve memory usage in neighbors

Funny, I thought I lowered the memory pressure by swapping arenas and
clearing, which should have removed old suffix strings. But memory usage
actually increased a bit while instructions went down quite a bit, and
runtime got much faster.

bioinformatics on  main > poop "./ba1j input.txt" "./ba1j-mem input.txt"
Benchmark 1 (100 runs): ./ba1j input.txt
  measurement          mean ± σ            min … max           outliers         delta
  wall_time          49.9ms ± 3.15ms    45.4ms … 66.5ms          1 ( 1%)        0%
  peak_rss           22.4MB ± 48.7KB    22.3MB … 22.5MB          0 ( 0%)        0%
  cpu_cycles          109M  ± 4.65M      105M  …  148M           4 ( 4%)        0%
  instructions        223M  ± 4.65       223M  …  223M           1 ( 1%)        0%
  cache_references   1.52M  ± 29.0K     1.40M  … 1.65M          11 (11%)        0%
  cache_misses        638K  ± 49.4K      562K  …  783K           3 ( 3%)        0%
  branch_misses       138K  ± 2.72K      134K  …  155K           3 ( 3%)        0%
Benchmark 2 (146 runs): ./ba1j-mem input.txt
  measurement          mean ± σ            min … max           outliers         delta
  wall_time          34.3ms ± 2.43ms    30.7ms … 43.1ms          1 ( 1%)        ⚡- 31.3% ±  1.4%
  peak_rss           22.9MB ± 47.8KB    22.8MB … 23.0MB          0 ( 0%)        💩+  2.3% ±  0.1%
  cpu_cycles         67.1M  ± 3.37M     62.9M  … 85.2M           4 ( 3%)        ⚡- 38.7% ±  0.9%
  instructions        124M  ±  124       124M  …  124M           8 ( 5%)        ⚡- 44.5% ±  0.0%
  cache_references   1.01M  ± 18.1K      895K  … 1.08M           9 ( 6%)        ⚡- 33.3% ±  0.4%
  cache_misses        325K  ± 70.0K      237K  …  595K          10 ( 7%)        ⚡- 49.0% ±  2.5%
  branch_misses       130K  ± 1.84K      127K  …  139K           1 ( 1%)        ⚡-  6.0% ±  0.4%

Diffstat:
Mutil.c3 | 38++++++++++++++++++++++++++++----------
1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/util.c3 b/util.c3 @@ -1,5 +1,6 @@ module util; import std::collections; +import std::core::mem; import std::core::string; import std::io; import std::sort; @@ -206,10 +207,14 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap() { if (d == 0) return {pattern}; if (pattern.len == 0) return {}; + DynamicArenaAllocator neighborhood_arena; + DynamicArenaAllocator suffix_neighborhood_arena; + neighborhood_arena.init(4096, allocator::temp()); + suffix_neighborhood_arena.init(4096, allocator::temp()); List(<String>) neighborhood; - neighborhood.temp_init_with_array({"A", "C", "G", "T"}); List(<String>) suffix_neighborhood; - suffix_neighborhood.temp_init(); + neighborhood.new_init_with_array({"A", "C", "G", "T"}, &neighborhood_arena); + suffix_neighborhood.new_init(16, &suffix_neighborhood_arena); for (int i = 1; i < pattern.len; i += 1) { // swap suffix_neighborhood and neighborhood. The previous // neighborhood becomes the new suffix_neighborhood to iterate @@ -219,11 +224,24 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap() // been moved into the old neighborhood/new suffix_neighborhood as // calculated during the last iteration. (otherwise we have to keep // allocating new memory to collect the next neighborhood into). + // + // The memory itself is cleared for reuse by the arena holding the + // List (see next swap) List(<String>) swap_var; swap_var = suffix_neighborhood; suffix_neighborhood = neighborhood; neighborhood = swap_var; - neighborhood.clear(); + + // Same swap with the arenas + // Old strings and the List holding them all get cleared + DynamicArenaAllocator swap_arena; + swap_arena = suffix_neighborhood_arena; + suffix_neighborhood_arena = neighborhood_arena; + neighborhood_arena = swap_arena; + + // clear out the old neighborhood for use in next iteration + neighborhood_arena.free(); + neighborhood.new_init(suffix_neighborhood.len(), &neighborhood_arena); int suffix_idx = pattern.len - i; String suffix = pattern[suffix_idx..]; @@ -232,20 +250,20 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap() //io::printfn("%s, %s, %s, %s", suffix, suffix_neighbor, suffix_neighborhood, neighborhood); if (hamming_distance(suffix, suffix_neighbor) < d) { // adding the additional base will be at most hamming dist == d, - // TODO we can reuse the memory of neighborhoods by swapping, can we - // do something about all the new strings? - neighborhood.push("A".tconcat(suffix_neighbor)); - neighborhood.push("C".tconcat(suffix_neighbor)); - neighborhood.push("G".tconcat(suffix_neighbor)); - neighborhood.push("T".tconcat(suffix_neighbor)); + neighborhood.push("A".concat(suffix_neighbor, &suffix_neighborhood_arena)); + neighborhood.push("C".concat(suffix_neighbor, &suffix_neighborhood_arena)); + neighborhood.push("G".concat(suffix_neighbor, &suffix_neighborhood_arena)); + neighborhood.push("T".concat(suffix_neighbor, &suffix_neighborhood_arena)); } else { // hamming distance == d. It can't be more, because the neighbors are // generated up to hamming distance d. Adding the first symbol from the // original pattern cannot increase the hamming distance. - neighborhood.push(pattern[suffix_idx - 1:1].tconcat(suffix_neighbor)); + neighborhood.push(pattern[suffix_idx - 1:1].concat(suffix_neighbor, &suffix_neighborhood_arena)); } } } + + // Copy strings out to final result's allocator String[] res = allocator::new_array(alloc, String, neighborhood.len()); foreach (i, neighbor : neighborhood) { res[i] = neighbor.copy(alloc);