commit 1bacb219fa0c7c302aeb98c6a7a33512225e9c56
parent bea243e9c50e6b09e11ba8b1591aa52e250f8e52
Author: Walther Chen <walther.chen@gmail.com>
Date: Thu, 24 Oct 2024 09:57:05 -0400
ba1e clump finding
Diffstat:
A | ba1e.c3 | | | 61 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 61 insertions(+), 0 deletions(-)
diff --git a/ba1e.c3 b/ba1e.c3
@@ -0,0 +1,61 @@
+// clump finding
+
+module ba1e;
+import std::io;
+import std::io::file;
+import std::collections;
+
+import util;
+
+fn void! main(String[] args) {
+ if (args.len != 2) {
+ io::eprintn("Please supply path to data file");
+ return IoError.FILE_NOT_FOUND?;
+ }
+ File f = file::open(args[1], "rb")!;
+ String genome = io::treadline(&f)!;
+ String ints = io::treadline(&f)!;
+ String[] ints_split = ints.tsplit(" ");
+ int k = ints_split[0].to_integer(int)!;
+ int region_len = ints_split[1].to_integer(int)!; // K
+ int clump_threshold = ints_split[2].to_integer(int)!; // t
+ String[] clumps = clump_finding(genome, k, region_len, clump_threshold);
+ foreach (clump : clumps) {
+ io::printf("%s ", clump);
+ }
+}
+
+fn String[] clump_finding(
+ String genome,
+ int k,
+ int region_len, // K
+ int clump_threshold, // t
+ Allocator alloc = allocator::heap())
+{
+ if (k == 0 || genome.len == 0) return {};
+ String[] res;
+ @pool() {
+ HashMap(<String, char>) clumps; // a set
+ clumps.temp_init();
+ for (int i = 0; i <= genome.len - region_len; i += 1) {
+ FrequencyTable freq_map = util::frequency_table(genome[i:region_len], k);
+ freq_map.@each(; String kmer, int count) {
+ if (count >= clump_threshold) {
+ // TODO can probably just use a growable array on small inputs
+ // Also, careful: hashmap copies keys! Here that means keys copied
+ // to temp allocator.
+ clumps.set(kmer, 0);
+ }
+ };
+ }
+ return clumps.copy_keys(alloc);
+ };
+}
+
+fn void test_clump_finding() @test {
+ String input = "CGGACTCGACAGATGTGAAGAACGACAATGTGAAGACTCGACACGACAGAGTGAAGAGAAGAGGAAACATTGTAA";
+ assert(clump_finding(input, 5, 50, 4) == {"CGACA", "GAAGA"});
+}
+
+
+