commit d42924645af9c35465d4309a264504ad6a17d161
parent b90acb87add41527b7d76849f4f0f293b5e64bbb
Author: walther chen <walther.chen@gmail.com>
Date:   Sun, 16 Mar 2025 23:11:33 +0700

updte c3 for 0.7.0 dev

Diffstat:
Mapproximate_pattern_count.c3 | 15++++++++-------
Mba1a.c3 | 11++++++-----
Mba1b.c3 | 36++++++++++++++++++++++++------------
Mba1c.c3 | 10+++++-----
Mba1d.c3 | 21+++++++++++----------
Mba1e.c3 | 17+++++++++--------
Mba1f.c3 | 17+++++++++--------
Mba1g.c3 | 11++++++-----
Mba1h.c3 | 15++++++++-------
Mba1i.c3 | 16++++++++--------
Mba1j.c3 | 17++++++++---------
Mba1n.c3 | 13++++++-------
Mclump_finding.c3 | 18+++++++++---------
Mutil-frequency.c3 | 60++++++++++++++++++++++++++++++------------------------------
14 files changed, 147 insertions(+), 130 deletions(-)

diff --git a/approximate_pattern_count.c3 b/approximate_pattern_count.c3 @@ -7,16 +7,17 @@ import std::collections; import util; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String pattern = io::treadline(&f)!; - String genome = io::treadline(&f)!; - String d_str = io::treadline(&f)!; - int d = d_str.to_integer(int)!; + File f = file::open(args[1], "rb")!!; + String pattern = io::treadline(&f)!!; + String genome = io::treadline(&f)!!; + String d_str = io::treadline(&f)!!; + int d = d_str.to_integer(int)!!; int[] matches = util::approximate_pattern_matching(pattern, genome, d); io::printfn("%d", matches.len); + return 0; } diff --git a/ba1a.c3 b/ba1a.c3 @@ -4,15 +4,16 @@ import std::io; import std::io::file; import std::collections; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String text = io::treadline(&f)!; - String pattern = io::treadline(&f)!; + File f = file::open(args[1], "rb")!!; + String text = io::treadline(&f)!!; + String pattern = io::treadline(&f)!!; io::printn(pattern_count(text, pattern)); + return 0; } fn int pattern_count(String text, String pattern) { diff --git a/ba1b.c3 b/ba1b.c3 @@ -6,42 +6,54 @@ import std::collections; import util; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String text = io::treadline(&f)!; - String k_str = io::treadline(&f)!; - int k = k_str.to_integer(int)!; + File f = file::open(args[1], "rb")!!; + String text = io::treadline(&f)!!; + String k_str = io::treadline(&f)!!; + int k = k_str.to_integer(int)!!; foreach(word: frequent_words(text, k)) { io::printf("%s ", word); } io::printn(); + return 0; } fn String[] frequent_words(String text, int k, Allocator alloc= allocator::heap()) { - @pool() { + @pool(alloc) { FrequencyTable freq_map = util::frequency_table(text, k, allocator::temp()); - int[] counts = freq_map.value_tlist(); + int[] counts = freq_map.tvalues(); int max = 0; freq_map.@each(; String _k, int count) { if (count > max) max = count; }; - List(<String>) res; - res.temp_init(); + List{String} res; + res.tinit(); freq_map.@each(; String key, int v) { if (v == max) { res.push(key.copy(alloc)); } }; - return res.to_new_array(alloc); + return res.to_array(alloc); }; } fn void test_frequent_words() @test { - assert(frequent_words("ACGTTGCATGTCGCATGATGCATGAGAGCT", 4) == {"CATG", "GCAT"}); + String[] words = frequent_words("ACGTTGCATGTCGCATGATGCATGAGAGCT", 4, tmem()); + assert(words.len == 2); + assert(words.contains("CATG")); + assert(words.contains("GCAT")); +} +fn bool String[].contains(ss, String target) { + foreach (s : ss) { + if (s == target) { + return true; + } + } + return false; } diff --git a/ba1c.c3 b/ba1c.c3 @@ -8,13 +8,13 @@ import std::collections; import util; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String pattern = io::treadline(&f)!; + File f = file::open(args[1], "rb")!!; + String pattern = io::treadline(&f)!!; io::printfn(util::reverse_complement(pattern)); + return 0; } - diff --git a/ba1d.c3 b/ba1d.c3 @@ -5,36 +5,37 @@ import std::io; import std::io::file; import std::collections; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String pattern = io::treadline(&f)!; - String genome = io::treadline(&f)!; + File f = file::open(args[1], "rb")!!; + String pattern = io::treadline(&f)!!; + String genome = io::treadline(&f)!!; int[] idxs = pattern_matching(pattern, genome); foreach (idx : idxs) { io::printf("%d ", idx); } + return 0; } fn int[] pattern_matching(String pattern, String genome, Allocator alloc = allocator::heap()) { if (pattern.len == 0 || genome.len == 0) return {}; - List(<int>) res; - @pool() { - res.temp_init(); + List{int} res; + @pool(alloc) { + res.tinit(); for (int i = 0; i <= genome.len - pattern.len; i += 1) { if (genome[i:pattern.len] == pattern) { res.push(i); } } - return res.to_new_array(alloc); + return res.to_array(alloc); }; } fn void test_pattern_matching() @test { - assert(pattern_matching("ATAT", "GATATATGCATATACTT") == {1, 3, 9}); + assert(pattern_matching("ATAT", "GATATATGCATATACTT", tmem()) == {1, 3, 9}); } diff --git a/ba1e.c3 b/ba1e.c3 @@ -7,20 +7,21 @@ import std::collections; import util; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String genome = io::treadline(&f)!; - String ints = io::treadline(&f)!; + File f = file::open(args[1], "rb")!!; + String genome = io::treadline(&f)!!; + String ints = io::treadline(&f)!!; String[] ints_split = ints.tsplit(" "); - int k = ints_split[0].to_integer(int)!; - int region_len = ints_split[1].to_integer(int)!; // L - int clump_threshold = ints_split[2].to_integer(int)!; // t + int k = ints_split[0].to_integer(int)!!; + int region_len = ints_split[1].to_integer(int)!!; // L + int clump_threshold = ints_split[2].to_integer(int)!!; // t String[] clumps = util::clump_finding(genome, k, region_len, clump_threshold); foreach (clump : clumps) { io::printf("%s ", clump); } + return 0; } diff --git a/ba1f.c3 b/ba1f.c3 @@ -7,25 +7,26 @@ import std::collections; import util; -def IntList = List(<ulong>); +alias IntList = List{ulong}; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String genome = io::treadline(&f)!; + File f = file::open(args[1], "rb")!!; + String genome = io::treadline(&f)!!; ulong[] min_skew_idxs = minimum_skew_idxs(genome); foreach (i : min_skew_idxs) { io::printf("%d ", i); } + return 0; } // TODO use min heap fn ulong[] minimum_skew_idxs(String genome, Allocator alloc = allocator::heap()) { IntList idxs; - idxs.temp_init(); + idxs.tinit(); int min_skew = 0; int skew = 0; foreach (i, base : genome) { @@ -45,9 +46,9 @@ fn ulong[] minimum_skew_idxs(String genome, Allocator alloc = allocator::heap()) idxs.push(skew_idx); } } - return idxs.to_new_array(alloc); + return idxs.to_array(alloc); } fn void test_minimum_skew_idxs() @test { - assert(minimum_skew_idxs("TAAAGACTGCCGAGAGGCCAACACGAGTGCTAGAACGAGGGGCGTAAACGCGGGTCCGAT") == {11, 24}); + assert(minimum_skew_idxs("TAAAGACTGCCGAGAGGCCAACACGAGTGCTAGAACGAGGGGCGTAAACGCGGGTCCGAT", tmem()) == {11, 24}); } diff --git a/ba1g.c3 b/ba1g.c3 @@ -7,13 +7,14 @@ import std::collections; import util; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String s1 = io::treadline(&f)!; - String s2 = io::treadline(&f)!; + File f = file::open(args[1], "rb")!!; + String s1 = io::treadline(&f)!!; + String s2 = io::treadline(&f)!!; io::printn(util::hamming_distance(s1, s2)); + return 0; } diff --git a/ba1h.c3 b/ba1h.c3 @@ -7,18 +7,19 @@ import std::collections; import util; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String pattern = io::treadline(&f)!; - String genome = io::treadline(&f)!; - String d_str = io::treadline(&f)!; - int d = d_str.to_integer(int)!; + File f = file::open(args[1], "rb")!!; + String pattern = io::treadline(&f)!!; + String genome = io::treadline(&f)!!; + String d_str = io::treadline(&f)!!; + int d = d_str.to_integer(int)!!; int[] matches = util::approximate_pattern_matching(pattern, genome, d); foreach (match : matches) { io::printf("%d ", match); } + return 0; } diff --git a/ba1i.c3 b/ba1i.c3 @@ -7,20 +7,20 @@ import std::collections; import util; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String genome = io::treadline(&f)!; - String ints = io::treadline(&f)!; + File f = file::open(args[1], "rb")!!; + String genome = io::treadline(&f)!!; + String ints = io::treadline(&f)!!; String[] ints_split = ints.tsplit(" "); - int k = ints_split[0].to_integer(int)!; - int d = ints_split[1].to_integer(int)!; + int k = ints_split[0].to_integer(int)!!; + int d = ints_split[1].to_integer(int)!!; String[] matches = util::frequent_words_with_mismatches(genome, k, d); foreach (match : matches) { io::printf("%s ", match); } + return 0; } - diff --git a/ba1j.c3 b/ba1j.c3 @@ -7,21 +7,20 @@ import std::collections; import util; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String genome = io::treadline(&f)!; - String ints = io::treadline(&f)!; + File f = file::open(args[1], "rb")!!; + String genome = io::treadline(&f)!!; + String ints = io::treadline(&f)!!; String[] ints_split = ints.tsplit(" "); - int k = ints_split[0].to_integer(int)!; - int d = ints_split[1].to_integer(int)!; + int k = ints_split[0].to_integer(int)!!; + int d = ints_split[1].to_integer(int)!!; String[] matches = util::frequent_words_with_mismatches_and_rc(genome, k, d); foreach (match : matches) { io::printf("%s ", match); } + return 0; } - - diff --git a/ba1n.c3 b/ba1n.c3 @@ -7,18 +7,17 @@ import std::collections; import util; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 2) { io::eprintn("Please supply path to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File f = file::open(args[1], "rb")!; - String pattern = io::treadline(&f)!; - int d = io::treadline(&f)!.to_integer(int)!; + File f = file::open(args[1], "rb")!!; + String pattern = io::treadline(&f)!!; + int d = io::treadline(&f)!!.to_integer(int)!!; String[] matches = util::neighbors(pattern, d); foreach (match : matches) { io::printf("%s ", match); } + return 0; } - - diff --git a/clump_finding.c3 b/clump_finding.c3 @@ -12,22 +12,22 @@ import std::collections; import util; -fn void! main(String[] args) { +fn int main(String[] args) { if (args.len != 5) { io::eprintn("Please supply path, k, L, t to data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } - File! f = file::open(args[1], "rb"); + File? f = file::open(args[1], "rb"); if (catch err = f) { io::eprintn("Missing data file"); - return IoError.FILE_NOT_FOUND?; + return 1; } defer (void)f.close(); - String genome = (String)io::read_new_fully(&f)!; - int k = args[2].to_integer(int)!; - int region_len = args[3].to_integer(int)!; // L - int clump_threshold = args[4].to_integer(int)!; // t + String genome = (String)io::read_fully(mem, &f)!!; + int k = args[2].to_integer(int)!!; + int region_len = args[3].to_integer(int)!!; // L + int clump_threshold = args[4].to_integer(int)!!; // t String[] clumps = util::clump_finding(genome, k, region_len, clump_threshold); io::printfn("%s ", clumps.len); + return 0; } - diff --git a/util-frequency.c3 b/util-frequency.c3 @@ -26,12 +26,12 @@ fn void test_reverse_complement() @test { assert(reverse_complement("AAAACCCGGT") == "ACCGGGTTTT"); } -def FrequencyTable = HashMap(<String, int>); +alias FrequencyTable = HashMap{String, int}; // counts of kmers in a text. fn FrequencyTable frequency_table(String text, int k, Allocator alloc = allocator::heap()) { FrequencyTable kmer_counts; - kmer_counts.new_init(allocator: alloc); + kmer_counts.init(allocator: alloc); for (int i = 0; i <= text.len - k; i += 1) { // TODO get_or_update? Annoying to get twice kmer_counts.@get_or_set(text[i:k], 0); // don't need to clone, keys copied into table @@ -51,11 +51,11 @@ fn String[] clump_finding( { if (k == 0 || genome.len == 0) return {}; String[] res; - @pool() { - HashMap(<String, char>) clumps; // a set - clumps.temp_init(); + @pool(alloc) { + HashMap{String, char} clumps; // a set + clumps.tinit(); for (int i = 0; i <= genome.len - region_len; i += 1) { - @pool(allocator::temp()) { + @pool(alloc) { FrequencyTable freq_map = frequency_table(genome[i:region_len], k, allocator::temp()); freq_map.@each(; String kmer, int count) { if (count >= clump_threshold) { @@ -67,7 +67,7 @@ fn String[] clump_finding( }; }; } - return clumps.copy_keys(alloc); + return clumps.keys(alloc); }; } @@ -99,16 +99,16 @@ fn int[] approximate_pattern_matching( Allocator alloc = allocator::heap()) { if (pattern.len == 0 || genome.len == 0) return {}; - List(<int>) res; - @pool() { - res.temp_init(); + List{int} res; + @pool(alloc) { + res.tinit(); for (int i = 0; i <= genome.len - pattern.len; i += 1) { // TODO more efficient to break early when calculating hamming distance if (util::hamming_distance(genome[i:pattern.len], pattern) <= d) { res.push(i); } } - return res.to_new_array(alloc); + return res.to_array(alloc); }; } @@ -140,7 +140,7 @@ fn void test_approximate_pattern_matching() @test { }, }; foreach (t : tests) { - int[] matches = approximate_pattern_matching(t.pattern, t.genome, t.d); + int[] matches = approximate_pattern_matching(t.pattern, t.genome, t.d, tmem()); assert(matches == t.expected, "Expected %s, found %s", t.expected, matches); } } @@ -154,12 +154,12 @@ fn String[] _frequent_words_with_mismatches( bool with_rc, Allocator alloc= allocator::heap()) { - @pool() { + @pool(alloc) { if (with_rc) { - text = text.concat(reverse_complement(text, allocator::temp()), allocator::temp()); + text = text.tconcat(reverse_complement(text, allocator::temp())); } FrequencyTable freq_map; - freq_map.new_init(allocator: alloc); + freq_map.init(allocator: alloc); for (int i = 0; i <= text.len - k; i += 1) { String pattern = text[i:k]; String[] neighborhood = neighbors(pattern, d, allocator::temp()); @@ -171,20 +171,20 @@ fn String[] _frequent_words_with_mismatches( } } - int[] counts = freq_map.value_tlist(); + int[] counts = freq_map.tvalues(); int max = 0; freq_map.@each(; String _k, int count) { if (count > max) max = count; }; - List(<String>) res; - res.temp_init(); + List{String} res; + res.tinit(); freq_map.@each(; String key, int v) { if (v == max) { res.push(key.copy(alloc)); } }; - return res.to_new_array(alloc); + return res.to_array(alloc); }; } @@ -261,10 +261,10 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap() DynamicArenaAllocator suffix_neighborhood_arena; neighborhood_arena.init(1024, allocator::temp()); suffix_neighborhood_arena.init(1024, allocator::temp()); - List(<String>) neighborhood; - List(<String>) suffix_neighborhood; - neighborhood.new_init_with_array({"A", "C", "G", "T"}, &neighborhood_arena); - suffix_neighborhood.new_init(16, &suffix_neighborhood_arena); + List{String} neighborhood; + List{String} suffix_neighborhood; + neighborhood.init_with_array(&neighborhood_arena, {"A", "C", "G", "T"}); + suffix_neighborhood.init(&suffix_neighborhood_arena, 16); for (int i = 1; i < pattern.len; i += 1) { // swap suffix_neighborhood and neighborhood. The previous // neighborhood becomes the new suffix_neighborhood to iterate @@ -277,7 +277,7 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap() // // The memory itself is cleared for reuse by the arena holding the // List (see next swap) - List(<String>) swap_var; + List{String} swap_var; swap_var = suffix_neighborhood; suffix_neighborhood = neighborhood; neighborhood = swap_var; @@ -291,7 +291,7 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap() // clear out the old neighborhood for use in next iteration neighborhood_arena.free(); - neighborhood.new_init(suffix_neighborhood.len(), &neighborhood_arena); + neighborhood.init(&neighborhood_arena, suffix_neighborhood.len(),); int suffix_idx = pattern.len - i; String suffix = pattern[suffix_idx..]; @@ -300,15 +300,15 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap() //io::printfn("%s, %s, %s, %s", suffix, suffix_neighbor, suffix_neighborhood, neighborhood); if (hamming_distance(suffix, suffix_neighbor) < d) { // adding the additional base will be at most hamming dist == d, - neighborhood.push("A".concat(suffix_neighbor, &suffix_neighborhood_arena)); - neighborhood.push("C".concat(suffix_neighbor, &suffix_neighborhood_arena)); - neighborhood.push("G".concat(suffix_neighbor, &suffix_neighborhood_arena)); - neighborhood.push("T".concat(suffix_neighbor, &suffix_neighborhood_arena)); + neighborhood.push("A".concat(&suffix_neighborhood_arena, suffix_neighbor)); + neighborhood.push("C".concat(&suffix_neighborhood_arena, suffix_neighbor)); + neighborhood.push("G".concat(&suffix_neighborhood_arena, suffix_neighbor)); + neighborhood.push("T".concat(&suffix_neighborhood_arena, suffix_neighbor)); } else { // hamming distance == d. It can't be more, because the neighbors are // generated up to hamming distance d. Adding the first symbol from the // original pattern cannot increase the hamming distance. - neighborhood.push(pattern[suffix_idx - 1:1].concat(suffix_neighbor, &suffix_neighborhood_arena)); + neighborhood.push(pattern[suffix_idx - 1:1].concat(&suffix_neighborhood_arena, suffix_neighbor,)); } } }