commit 8e8772fb11b89e0cda3c553e053eeb3c8a41a5b9
parent dbb2bb78577af39dd3df7f903f1b3dbf7154bfa2
Author: walther chen <walther.chen@gmail.com>
Date:   Wed, 19 Mar 2025 16:41:40 +0700

update for new pool in 0.7.0, remove some unneeded copies, fix some test mem leaks

Diffstat:
Mba1b.c3 | 2+-
Mba1d.c3 | 2+-
Mutil-frequency.c3 | 53+++++++++++++++++++++++++++++------------------------
3 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/ba1b.c3 b/ba1b.c3 @@ -23,7 +23,7 @@ fn int main(String[] args) { } fn String[] frequent_words(String text, int k, Allocator alloc= allocator::heap()) { - @pool(alloc) { + @pool() { FrequencyTable freq_map = util::frequency_table(text, k, allocator::temp()); int[] counts = freq_map.tvalues(); diff --git a/ba1d.c3 b/ba1d.c3 @@ -23,7 +23,7 @@ fn int main(String[] args) { fn int[] pattern_matching(String pattern, String genome, Allocator alloc = allocator::heap()) { if (pattern.len == 0 || genome.len == 0) return {}; List{int} res; - @pool(alloc) { + @pool() { res.tinit(); for (int i = 0; i <= genome.len - pattern.len; i += 1) { if (genome[i:pattern.len] == pattern) { diff --git a/util-frequency.c3 b/util-frequency.c3 @@ -23,19 +23,21 @@ fn String reverse_complement(String pattern, Allocator alloc = allocator::heap() } fn void test_reverse_complement() @test { - assert(reverse_complement("AAAACCCGGT") == "ACCGGGTTTT"); + assert(reverse_complement("AAAACCCGGT", tmem()) == "ACCGGGTTTT"); } alias FrequencyTable = HashMap{String, int}; // counts of kmers in a text. +// Lifetime of result is same as that of `text` param. fn FrequencyTable frequency_table(String text, int k, Allocator alloc = allocator::heap()) { FrequencyTable kmer_counts; kmer_counts.init(allocator: alloc); for (int i = 0; i <= text.len - k; i += 1) { // TODO get_or_update? Annoying to get twice - // Modified std so that hashmap doesn't copy keys - String key = text[i:k].copy(alloc); + // Modified std so that hashmap doesn't copy keys, but these + // keys lifetime is the same as `text`. + String key = text[i:k]; kmer_counts.@get_or_set(key, 0); if (try count = kmer_counts.get_ref(key)) { *count += 1; @@ -44,6 +46,7 @@ fn FrequencyTable frequency_table(String text, int k, Allocator alloc = allocato return kmer_counts; } +// Lifetime of result is that of the `genome` param. fn String[] clump_finding( String genome, int k, @@ -53,19 +56,19 @@ fn String[] clump_finding( { if (k == 0 || genome.len == 0) return {}; HashMap{String, char} clumps; // a set - @pool(alloc) { + @pool() { clumps.tinit(); for (int i = 0; i <= genome.len - region_len; i += 1) { - // TODO Could use another pool here, but there appears to be a bug with nested pools - FrequencyTable freq_map = frequency_table(genome[i:region_len], k, tmem()); - freq_map.@each(; String kmer, int count) { - if (count >= clump_threshold) { - // TODO can probably just use a growable array on small inputs - // std hashmap modified to not copy keys, so have to copy out kmer from - // frequency table - String key = kmer.copy(alloc); - clumps.set(key, 0); - } + @pool() { + FrequencyTable freq_map = frequency_table(genome[i:region_len], k, tmem()); + freq_map.@each(; String kmer, int count) { + if (count >= clump_threshold) { + // TODO can probably just use a growable array on small inputs + // std hashmap modified to not copy keys, lifetime of kmer is that of + // the `genome` param + clumps.set(kmer, 0); + } + }; }; } return clumps.keys(alloc); @@ -101,7 +104,7 @@ fn int[] approximate_pattern_matching( { if (pattern.len == 0 || genome.len == 0) return {}; List{int} res; - @pool(alloc) { + @pool() { res.tinit(); for (int i = 0; i <= genome.len - pattern.len; i += 1) { // TODO more efficient to break early when calculating hamming distance @@ -155,7 +158,7 @@ fn String[] _frequent_words_with_mismatches( bool with_rc, Allocator alloc= allocator::heap()) { - @pool(alloc) { + @pool() { if (with_rc) { text = text.tconcat(reverse_complement(text, allocator::temp())); } @@ -165,7 +168,7 @@ fn String[] _frequent_words_with_mismatches( String pattern = text[i:k]; String[] neighborhood = neighbors(pattern, d, allocator::temp()); foreach (neighbor : neighborhood) { - freq_map.@get_or_set(neighbor, 0); // don't need to clone, keys copied into table + freq_map.@get_or_set(neighbor, 0); if (try int* count = freq_map.get_ref(neighbor)) { *count += 1; } @@ -230,8 +233,10 @@ fn void test_frequent_words_with_mismatches() @test { }, }; foreach (t : tests) { - String[] matches = frequent_words_with_mismatches(t.text, t.k, t.d); - test::expect_equal_slices_sorted(t.expected, matches); + @pool() { + String[] matches = frequent_words_with_mismatches(t.text, t.k, t.d, tmem()); + test::expect_equal_slices_sorted(t.expected, matches); + }; } } fn void test_frequent_words_with_mismatches_and_rc() @test { @@ -244,7 +249,7 @@ fn void test_frequent_words_with_mismatches_and_rc() @test { }, }; foreach (t : tests) { - String[] matches = frequent_words_with_mismatches_and_rc(t.text, t.k, t.d); + String[] matches = frequent_words_with_mismatches_and_rc(t.text, t.k, t.d, tmem()); test::expect_equal_slices_sorted(t.expected, matches); } } @@ -257,11 +262,11 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap() { if (d == 0) return {pattern}; if (pattern.len == 0) return {}; - @pool(alloc) { + @pool() { DynamicArenaAllocator neighborhood_arena; DynamicArenaAllocator suffix_neighborhood_arena; - neighborhood_arena.init(1024, allocator::temp()); - suffix_neighborhood_arena.init(1024, allocator::temp()); + neighborhood_arena.init(tmem(), 1024); + suffix_neighborhood_arena.init(tmem(), 1024); List{String} neighborhood; List{String} suffix_neighborhood; neighborhood.init_with_array(&neighborhood_arena, {"A", "C", "G", "T"}); @@ -352,7 +357,7 @@ fn void test_neighbors() @test { }, }; foreach (t : tests) { - String[] matches = neighbors(t.text, t.d); + String[] matches = neighbors(t.text, t.d, tmem()); test::expect_equal_slices_sorted(t.expected, matches); } }