commit d42924645af9c35465d4309a264504ad6a17d161
parent b90acb87add41527b7d76849f4f0f293b5e64bbb
Author: walther chen <walther.chen@gmail.com>
Date: Sun, 16 Mar 2025 23:11:33 +0700
updte c3 for 0.7.0 dev
Diffstat:
14 files changed, 147 insertions(+), 130 deletions(-)
diff --git a/approximate_pattern_count.c3 b/approximate_pattern_count.c3
@@ -7,16 +7,17 @@ import std::collections;
import util;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String pattern = io::treadline(&f)!;
- String genome = io::treadline(&f)!;
- String d_str = io::treadline(&f)!;
- int d = d_str.to_integer(int)!;
+ File f = file::open(args[1], "rb")!!;
+ String pattern = io::treadline(&f)!!;
+ String genome = io::treadline(&f)!!;
+ String d_str = io::treadline(&f)!!;
+ int d = d_str.to_integer(int)!!;
int[] matches = util::approximate_pattern_matching(pattern, genome, d);
io::printfn("%d", matches.len);
+ return 0;
}
diff --git a/ba1a.c3 b/ba1a.c3
@@ -4,15 +4,16 @@ import std::io;
import std::io::file;
import std::collections;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String text = io::treadline(&f)!;
- String pattern = io::treadline(&f)!;
+ File f = file::open(args[1], "rb")!!;
+ String text = io::treadline(&f)!!;
+ String pattern = io::treadline(&f)!!;
io::printn(pattern_count(text, pattern));
+ return 0;
}
fn int pattern_count(String text, String pattern) {
diff --git a/ba1b.c3 b/ba1b.c3
@@ -6,42 +6,54 @@ import std::collections;
import util;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String text = io::treadline(&f)!;
- String k_str = io::treadline(&f)!;
- int k = k_str.to_integer(int)!;
+ File f = file::open(args[1], "rb")!!;
+ String text = io::treadline(&f)!!;
+ String k_str = io::treadline(&f)!!;
+ int k = k_str.to_integer(int)!!;
foreach(word: frequent_words(text, k)) {
io::printf("%s ", word);
}
io::printn();
+ return 0;
}
fn String[] frequent_words(String text, int k, Allocator alloc= allocator::heap()) {
- @pool() {
+ @pool(alloc) {
FrequencyTable freq_map = util::frequency_table(text, k, allocator::temp());
- int[] counts = freq_map.value_tlist();
+ int[] counts = freq_map.tvalues();
int max = 0;
freq_map.@each(; String _k, int count) {
if (count > max) max = count;
};
- List(<String>) res;
- res.temp_init();
+ List{String} res;
+ res.tinit();
freq_map.@each(; String key, int v) {
if (v == max) {
res.push(key.copy(alloc));
}
};
- return res.to_new_array(alloc);
+ return res.to_array(alloc);
};
}
fn void test_frequent_words() @test {
- assert(frequent_words("ACGTTGCATGTCGCATGATGCATGAGAGCT", 4) == {"CATG", "GCAT"});
+ String[] words = frequent_words("ACGTTGCATGTCGCATGATGCATGAGAGCT", 4, tmem());
+ assert(words.len == 2);
+ assert(words.contains("CATG"));
+ assert(words.contains("GCAT"));
+}
+fn bool String[].contains(ss, String target) {
+ foreach (s : ss) {
+ if (s == target) {
+ return true;
+ }
+ }
+ return false;
}
diff --git a/ba1c.c3 b/ba1c.c3
@@ -8,13 +8,13 @@ import std::collections;
import util;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String pattern = io::treadline(&f)!;
+ File f = file::open(args[1], "rb")!!;
+ String pattern = io::treadline(&f)!!;
io::printfn(util::reverse_complement(pattern));
+ return 0;
}
-
diff --git a/ba1d.c3 b/ba1d.c3
@@ -5,36 +5,37 @@ import std::io;
import std::io::file;
import std::collections;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String pattern = io::treadline(&f)!;
- String genome = io::treadline(&f)!;
+ File f = file::open(args[1], "rb")!!;
+ String pattern = io::treadline(&f)!!;
+ String genome = io::treadline(&f)!!;
int[] idxs = pattern_matching(pattern, genome);
foreach (idx : idxs) {
io::printf("%d ", idx);
}
+ return 0;
}
fn int[] pattern_matching(String pattern, String genome, Allocator alloc = allocator::heap()) {
if (pattern.len == 0 || genome.len == 0) return {};
- List(<int>) res;
- @pool() {
- res.temp_init();
+ List{int} res;
+ @pool(alloc) {
+ res.tinit();
for (int i = 0; i <= genome.len - pattern.len; i += 1) {
if (genome[i:pattern.len] == pattern) {
res.push(i);
}
}
- return res.to_new_array(alloc);
+ return res.to_array(alloc);
};
}
fn void test_pattern_matching() @test {
- assert(pattern_matching("ATAT", "GATATATGCATATACTT") == {1, 3, 9});
+ assert(pattern_matching("ATAT", "GATATATGCATATACTT", tmem()) == {1, 3, 9});
}
diff --git a/ba1e.c3 b/ba1e.c3
@@ -7,20 +7,21 @@ import std::collections;
import util;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String genome = io::treadline(&f)!;
- String ints = io::treadline(&f)!;
+ File f = file::open(args[1], "rb")!!;
+ String genome = io::treadline(&f)!!;
+ String ints = io::treadline(&f)!!;
String[] ints_split = ints.tsplit(" ");
- int k = ints_split[0].to_integer(int)!;
- int region_len = ints_split[1].to_integer(int)!; // L
- int clump_threshold = ints_split[2].to_integer(int)!; // t
+ int k = ints_split[0].to_integer(int)!!;
+ int region_len = ints_split[1].to_integer(int)!!; // L
+ int clump_threshold = ints_split[2].to_integer(int)!!; // t
String[] clumps = util::clump_finding(genome, k, region_len, clump_threshold);
foreach (clump : clumps) {
io::printf("%s ", clump);
}
+ return 0;
}
diff --git a/ba1f.c3 b/ba1f.c3
@@ -7,25 +7,26 @@ import std::collections;
import util;
-def IntList = List(<ulong>);
+alias IntList = List{ulong};
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String genome = io::treadline(&f)!;
+ File f = file::open(args[1], "rb")!!;
+ String genome = io::treadline(&f)!!;
ulong[] min_skew_idxs = minimum_skew_idxs(genome);
foreach (i : min_skew_idxs) {
io::printf("%d ", i);
}
+ return 0;
}
// TODO use min heap
fn ulong[] minimum_skew_idxs(String genome, Allocator alloc = allocator::heap()) {
IntList idxs;
- idxs.temp_init();
+ idxs.tinit();
int min_skew = 0;
int skew = 0;
foreach (i, base : genome) {
@@ -45,9 +46,9 @@ fn ulong[] minimum_skew_idxs(String genome, Allocator alloc = allocator::heap())
idxs.push(skew_idx);
}
}
- return idxs.to_new_array(alloc);
+ return idxs.to_array(alloc);
}
fn void test_minimum_skew_idxs() @test {
- assert(minimum_skew_idxs("TAAAGACTGCCGAGAGGCCAACACGAGTGCTAGAACGAGGGGCGTAAACGCGGGTCCGAT") == {11, 24});
+ assert(minimum_skew_idxs("TAAAGACTGCCGAGAGGCCAACACGAGTGCTAGAACGAGGGGCGTAAACGCGGGTCCGAT", tmem()) == {11, 24});
}
diff --git a/ba1g.c3 b/ba1g.c3
@@ -7,13 +7,14 @@ import std::collections;
import util;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String s1 = io::treadline(&f)!;
- String s2 = io::treadline(&f)!;
+ File f = file::open(args[1], "rb")!!;
+ String s1 = io::treadline(&f)!!;
+ String s2 = io::treadline(&f)!!;
io::printn(util::hamming_distance(s1, s2));
+ return 0;
}
diff --git a/ba1h.c3 b/ba1h.c3
@@ -7,18 +7,19 @@ import std::collections;
import util;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String pattern = io::treadline(&f)!;
- String genome = io::treadline(&f)!;
- String d_str = io::treadline(&f)!;
- int d = d_str.to_integer(int)!;
+ File f = file::open(args[1], "rb")!!;
+ String pattern = io::treadline(&f)!!;
+ String genome = io::treadline(&f)!!;
+ String d_str = io::treadline(&f)!!;
+ int d = d_str.to_integer(int)!!;
int[] matches = util::approximate_pattern_matching(pattern, genome, d);
foreach (match : matches) {
io::printf("%d ", match);
}
+ return 0;
}
diff --git a/ba1i.c3 b/ba1i.c3
@@ -7,20 +7,20 @@ import std::collections;
import util;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String genome = io::treadline(&f)!;
- String ints = io::treadline(&f)!;
+ File f = file::open(args[1], "rb")!!;
+ String genome = io::treadline(&f)!!;
+ String ints = io::treadline(&f)!!;
String[] ints_split = ints.tsplit(" ");
- int k = ints_split[0].to_integer(int)!;
- int d = ints_split[1].to_integer(int)!;
+ int k = ints_split[0].to_integer(int)!!;
+ int d = ints_split[1].to_integer(int)!!;
String[] matches = util::frequent_words_with_mismatches(genome, k, d);
foreach (match : matches) {
io::printf("%s ", match);
}
+ return 0;
}
-
diff --git a/ba1j.c3 b/ba1j.c3
@@ -7,21 +7,20 @@ import std::collections;
import util;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String genome = io::treadline(&f)!;
- String ints = io::treadline(&f)!;
+ File f = file::open(args[1], "rb")!!;
+ String genome = io::treadline(&f)!!;
+ String ints = io::treadline(&f)!!;
String[] ints_split = ints.tsplit(" ");
- int k = ints_split[0].to_integer(int)!;
- int d = ints_split[1].to_integer(int)!;
+ int k = ints_split[0].to_integer(int)!!;
+ int d = ints_split[1].to_integer(int)!!;
String[] matches = util::frequent_words_with_mismatches_and_rc(genome, k, d);
foreach (match : matches) {
io::printf("%s ", match);
}
+ return 0;
}
-
-
diff --git a/ba1n.c3 b/ba1n.c3
@@ -7,18 +7,17 @@ import std::collections;
import util;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 2) {
io::eprintn("Please supply path to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File f = file::open(args[1], "rb")!;
- String pattern = io::treadline(&f)!;
- int d = io::treadline(&f)!.to_integer(int)!;
+ File f = file::open(args[1], "rb")!!;
+ String pattern = io::treadline(&f)!!;
+ int d = io::treadline(&f)!!.to_integer(int)!!;
String[] matches = util::neighbors(pattern, d);
foreach (match : matches) {
io::printf("%s ", match);
}
+ return 0;
}
-
-
diff --git a/clump_finding.c3 b/clump_finding.c3
@@ -12,22 +12,22 @@ import std::collections;
import util;
-fn void! main(String[] args) {
+fn int main(String[] args) {
if (args.len != 5) {
io::eprintn("Please supply path, k, L, t to data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
- File! f = file::open(args[1], "rb");
+ File? f = file::open(args[1], "rb");
if (catch err = f) {
io::eprintn("Missing data file");
- return IoError.FILE_NOT_FOUND?;
+ return 1;
}
defer (void)f.close();
- String genome = (String)io::read_new_fully(&f)!;
- int k = args[2].to_integer(int)!;
- int region_len = args[3].to_integer(int)!; // L
- int clump_threshold = args[4].to_integer(int)!; // t
+ String genome = (String)io::read_fully(mem, &f)!!;
+ int k = args[2].to_integer(int)!!;
+ int region_len = args[3].to_integer(int)!!; // L
+ int clump_threshold = args[4].to_integer(int)!!; // t
String[] clumps = util::clump_finding(genome, k, region_len, clump_threshold);
io::printfn("%s ", clumps.len);
+ return 0;
}
-
diff --git a/util-frequency.c3 b/util-frequency.c3
@@ -26,12 +26,12 @@ fn void test_reverse_complement() @test {
assert(reverse_complement("AAAACCCGGT") == "ACCGGGTTTT");
}
-def FrequencyTable = HashMap(<String, int>);
+alias FrequencyTable = HashMap{String, int};
// counts of kmers in a text.
fn FrequencyTable frequency_table(String text, int k, Allocator alloc = allocator::heap()) {
FrequencyTable kmer_counts;
- kmer_counts.new_init(allocator: alloc);
+ kmer_counts.init(allocator: alloc);
for (int i = 0; i <= text.len - k; i += 1) {
// TODO get_or_update? Annoying to get twice
kmer_counts.@get_or_set(text[i:k], 0); // don't need to clone, keys copied into table
@@ -51,11 +51,11 @@ fn String[] clump_finding(
{
if (k == 0 || genome.len == 0) return {};
String[] res;
- @pool() {
- HashMap(<String, char>) clumps; // a set
- clumps.temp_init();
+ @pool(alloc) {
+ HashMap{String, char} clumps; // a set
+ clumps.tinit();
for (int i = 0; i <= genome.len - region_len; i += 1) {
- @pool(allocator::temp()) {
+ @pool(alloc) {
FrequencyTable freq_map = frequency_table(genome[i:region_len], k, allocator::temp());
freq_map.@each(; String kmer, int count) {
if (count >= clump_threshold) {
@@ -67,7 +67,7 @@ fn String[] clump_finding(
};
};
}
- return clumps.copy_keys(alloc);
+ return clumps.keys(alloc);
};
}
@@ -99,16 +99,16 @@ fn int[] approximate_pattern_matching(
Allocator alloc = allocator::heap())
{
if (pattern.len == 0 || genome.len == 0) return {};
- List(<int>) res;
- @pool() {
- res.temp_init();
+ List{int} res;
+ @pool(alloc) {
+ res.tinit();
for (int i = 0; i <= genome.len - pattern.len; i += 1) {
// TODO more efficient to break early when calculating hamming distance
if (util::hamming_distance(genome[i:pattern.len], pattern) <= d) {
res.push(i);
}
}
- return res.to_new_array(alloc);
+ return res.to_array(alloc);
};
}
@@ -140,7 +140,7 @@ fn void test_approximate_pattern_matching() @test {
},
};
foreach (t : tests) {
- int[] matches = approximate_pattern_matching(t.pattern, t.genome, t.d);
+ int[] matches = approximate_pattern_matching(t.pattern, t.genome, t.d, tmem());
assert(matches == t.expected, "Expected %s, found %s", t.expected, matches);
}
}
@@ -154,12 +154,12 @@ fn String[] _frequent_words_with_mismatches(
bool with_rc,
Allocator alloc= allocator::heap())
{
- @pool() {
+ @pool(alloc) {
if (with_rc) {
- text = text.concat(reverse_complement(text, allocator::temp()), allocator::temp());
+ text = text.tconcat(reverse_complement(text, allocator::temp()));
}
FrequencyTable freq_map;
- freq_map.new_init(allocator: alloc);
+ freq_map.init(allocator: alloc);
for (int i = 0; i <= text.len - k; i += 1) {
String pattern = text[i:k];
String[] neighborhood = neighbors(pattern, d, allocator::temp());
@@ -171,20 +171,20 @@ fn String[] _frequent_words_with_mismatches(
}
}
- int[] counts = freq_map.value_tlist();
+ int[] counts = freq_map.tvalues();
int max = 0;
freq_map.@each(; String _k, int count) {
if (count > max) max = count;
};
- List(<String>) res;
- res.temp_init();
+ List{String} res;
+ res.tinit();
freq_map.@each(; String key, int v) {
if (v == max) {
res.push(key.copy(alloc));
}
};
- return res.to_new_array(alloc);
+ return res.to_array(alloc);
};
}
@@ -261,10 +261,10 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap()
DynamicArenaAllocator suffix_neighborhood_arena;
neighborhood_arena.init(1024, allocator::temp());
suffix_neighborhood_arena.init(1024, allocator::temp());
- List(<String>) neighborhood;
- List(<String>) suffix_neighborhood;
- neighborhood.new_init_with_array({"A", "C", "G", "T"}, &neighborhood_arena);
- suffix_neighborhood.new_init(16, &suffix_neighborhood_arena);
+ List{String} neighborhood;
+ List{String} suffix_neighborhood;
+ neighborhood.init_with_array(&neighborhood_arena, {"A", "C", "G", "T"});
+ suffix_neighborhood.init(&suffix_neighborhood_arena, 16);
for (int i = 1; i < pattern.len; i += 1) {
// swap suffix_neighborhood and neighborhood. The previous
// neighborhood becomes the new suffix_neighborhood to iterate
@@ -277,7 +277,7 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap()
//
// The memory itself is cleared for reuse by the arena holding the
// List (see next swap)
- List(<String>) swap_var;
+ List{String} swap_var;
swap_var = suffix_neighborhood;
suffix_neighborhood = neighborhood;
neighborhood = swap_var;
@@ -291,7 +291,7 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap()
// clear out the old neighborhood for use in next iteration
neighborhood_arena.free();
- neighborhood.new_init(suffix_neighborhood.len(), &neighborhood_arena);
+ neighborhood.init(&neighborhood_arena, suffix_neighborhood.len(),);
int suffix_idx = pattern.len - i;
String suffix = pattern[suffix_idx..];
@@ -300,15 +300,15 @@ fn String[] neighbors(String pattern, int d, Allocator alloc = allocator::heap()
//io::printfn("%s, %s, %s, %s", suffix, suffix_neighbor, suffix_neighborhood, neighborhood);
if (hamming_distance(suffix, suffix_neighbor) < d) {
// adding the additional base will be at most hamming dist == d,
- neighborhood.push("A".concat(suffix_neighbor, &suffix_neighborhood_arena));
- neighborhood.push("C".concat(suffix_neighbor, &suffix_neighborhood_arena));
- neighborhood.push("G".concat(suffix_neighbor, &suffix_neighborhood_arena));
- neighborhood.push("T".concat(suffix_neighbor, &suffix_neighborhood_arena));
+ neighborhood.push("A".concat(&suffix_neighborhood_arena, suffix_neighbor));
+ neighborhood.push("C".concat(&suffix_neighborhood_arena, suffix_neighbor));
+ neighborhood.push("G".concat(&suffix_neighborhood_arena, suffix_neighbor));
+ neighborhood.push("T".concat(&suffix_neighborhood_arena, suffix_neighbor));
} else {
// hamming distance == d. It can't be more, because the neighbors are
// generated up to hamming distance d. Adding the first symbol from the
// original pattern cannot increase the hamming distance.
- neighborhood.push(pattern[suffix_idx - 1:1].concat(suffix_neighbor, &suffix_neighborhood_arena));
+ neighborhood.push(pattern[suffix_idx - 1:1].concat(&suffix_neighborhood_arena, suffix_neighbor,));
}
}
}