commit 191fb44e1be803e1161752d75660aefc57a5d124
parent 34d85e3ad6733caed3dcd63931281d254d82354c
Author: Walther Chen <walther.chen@gmail.com>
Date: Fri, 1 Nov 2024 10:43:16 -0400
Improve memory usage in neighbors
Funny, I thought I lowered the memory pressure by swapping arenas and
clearing, which should have removed old suffix strings. But memory usage
actually increased a bit while instructions went down quite a bit, and
runtime got much faster.
bioinformatics on main > poop "./ba1j input.txt" "./ba1j-mem input.txt"
Benchmark 1 (100 runs): ./ba1j input.txt
measurement mean ± σ min … max outliers delta
wall_time 49.9ms ± 3.15ms 45.4ms … 66.5ms 1 ( 1%) 0%
peak_rss 22.4MB ± 48.7KB 22.3MB … 22.5MB 0 ( 0%) 0%
cpu_cycles 109M ± 4.65M 105M … 148M 4 ( 4%) 0%
instructions 223M ± 4.65 223M … 223M 1 ( 1%) 0%
cache_references 1.52M ± 29.0K 1.40M … 1.65M 11 (11%) 0%
cache_misses 638K ± 49.4K 562K … 783K 3 ( 3%) 0%
branch_misses 138K ± 2.72K 134K … 155K 3 ( 3%) 0%
Benchmark 2 (146 runs): ./ba1j-mem input.txt
measurement mean ± σ min … max outliers delta
wall_time 34.3ms ± 2.43ms 30.7ms … 43.1ms 1 ( 1%) ⚡- 31.3% ± 1.4%
peak_rss 22.9MB ± 47.8KB 22.8MB … 23.0MB 0 ( 0%) 💩+ 2.3% ± 0.1%
cpu_cycles 67.1M ± 3.37M 62.9M … 85.2M 4 ( 3%) ⚡- 38.7% ± 0.9%
instructions 124M ± 124 124M … 124M 8 ( 5%) ⚡- 44.5% ± 0.0%
cache_references 1.01M ± 18.1K 895K … 1.08M 9 ( 6%) ⚡- 33.3% ± 0.4%
cache_misses 325K ± 70.0K 237K … 595K 10 ( 7%) ⚡- 49.0% ± 2.5%
branch_misses 130K ± 1.84K 127K … 139K 1 ( 1%) ⚡- 6.0% ± 0.4%
Diffstat:
M | util.c3 | | | 38 | ++++++++++++++++++++++++++++---------- |
1 file changed, 28 insertions(+), 10 deletions(-)
diff --git a/util.c3 b/util.c3
@@ -1,5 +1,6 @@
module util;
import std::collections;
+import std::core::mem;
import std::core::string;
import std::io;
import std::sort;
@@ -206,10 +207,14 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap()
{
if (d == 0) return {pattern};
if (pattern.len == 0) return {};
+ DynamicArenaAllocator neighborhood_arena;
+ DynamicArenaAllocator suffix_neighborhood_arena;
+ neighborhood_arena.init(4096, allocator::temp());
+ suffix_neighborhood_arena.init(4096, allocator::temp());
List(<String>) neighborhood;
- neighborhood.temp_init_with_array({"A", "C", "G", "T"});
List(<String>) suffix_neighborhood;
- suffix_neighborhood.temp_init();
+ neighborhood.new_init_with_array({"A", "C", "G", "T"}, &neighborhood_arena);
+ suffix_neighborhood.new_init(16, &suffix_neighborhood_arena);
for (int i = 1; i < pattern.len; i += 1) {
// swap suffix_neighborhood and neighborhood. The previous
// neighborhood becomes the new suffix_neighborhood to iterate
@@ -219,11 +224,24 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap()
// been moved into the old neighborhood/new suffix_neighborhood as
// calculated during the last iteration. (otherwise we have to keep
// allocating new memory to collect the next neighborhood into).
+ //
+ // The memory itself is cleared for reuse by the arena holding the
+ // List (see next swap)
List(<String>) swap_var;
swap_var = suffix_neighborhood;
suffix_neighborhood = neighborhood;
neighborhood = swap_var;
- neighborhood.clear();
+
+ // Same swap with the arenas
+ // Old strings and the List holding them all get cleared
+ DynamicArenaAllocator swap_arena;
+ swap_arena = suffix_neighborhood_arena;
+ suffix_neighborhood_arena = neighborhood_arena;
+ neighborhood_arena = swap_arena;
+
+ // clear out the old neighborhood for use in next iteration
+ neighborhood_arena.free();
+ neighborhood.new_init(suffix_neighborhood.len(), &neighborhood_arena);
int suffix_idx = pattern.len - i;
String suffix = pattern[suffix_idx..];
@@ -232,20 +250,20 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap()
//io::printfn("%s, %s, %s, %s", suffix, suffix_neighbor, suffix_neighborhood, neighborhood);
if (hamming_distance(suffix, suffix_neighbor) < d) {
// adding the additional base will be at most hamming dist == d,
- // TODO we can reuse the memory of neighborhoods by swapping, can we
- // do something about all the new strings?
- neighborhood.push("A".tconcat(suffix_neighbor));
- neighborhood.push("C".tconcat(suffix_neighbor));
- neighborhood.push("G".tconcat(suffix_neighbor));
- neighborhood.push("T".tconcat(suffix_neighbor));
+ neighborhood.push("A".concat(suffix_neighbor, &suffix_neighborhood_arena));
+ neighborhood.push("C".concat(suffix_neighbor, &suffix_neighborhood_arena));
+ neighborhood.push("G".concat(suffix_neighbor, &suffix_neighborhood_arena));
+ neighborhood.push("T".concat(suffix_neighbor, &suffix_neighborhood_arena));
} else {
// hamming distance == d. It can't be more, because the neighbors are
// generated up to hamming distance d. Adding the first symbol from the
// original pattern cannot increase the hamming distance.
- neighborhood.push(pattern[suffix_idx - 1:1].tconcat(suffix_neighbor));
+ neighborhood.push(pattern[suffix_idx - 1:1].concat(suffix_neighbor, &suffix_neighborhood_arena));
}
}
}
+
+ // Copy strings out to final result's allocator
String[] res = allocator::new_array(alloc, String, neighborhood.len());
foreach (i, neighbor : neighborhood) {
res[i] = neighbor.copy(alloc);