/* Rosalind problem: Overlap Graphs in Picat. https://rosalind.info/problems/grph/ """ Problem A graph whose nodes have all been labeled can be represented by an adjacency list, in which each row of the list contains the two node labels corresponding to a unique edge. A directed graph (or digraph) is a graph containing directed edges, each of which has an orientation. That is, a directed edge is represented by an arrow instead of a line segment; the starting and ending nodes of an edge form its tail and head, respectively. The directed edge with tail v and head w is represented by (v,w) (but not by (w,v)). A directed loop is a directed edge of the form (v,v). For a collection of strings and a positive integer k, the overlap graph for the strings is a directed graph Ok in which each string is represented by a node, and string s is connected to string t with a directed edge when there is a length k suffix of s that matches a length k prefix of t, as long as s≠t; we demand s≠t to prevent directed loops in the overlap graph (although directed cycles may be present). Given: A collection of DNA strings in FASTA format having total length at most 10 kbp. Return: The adjacency list corresponding to O3. You may return edges in any order. Sample Dataset >Rosalind_0498 AAATAAA >Rosalind_2391 AAATTTT >Rosalind_2323 TTTTCCC >Rosalind_0442 AAATCCC >Rosalind_5013 GGGTGGG Sample Output Rosalind_0498 Rosalind_2391 Rosalind_0498 Rosalind_0442 Rosalind_2391 Rosalind_2323 """ This model was created by Hakan Kjellerstrand, hakank@gmail.com See also my Picat page: http://www.hakank.org/picat/ */ import rosalind_utils. main => go. go ?=> File = "rosalind_grph_fasta1.txt", % File = "rosalind_grph_fasta_challenge.txt", Fs = parse_fasta(read_file_chars(File)), K = 3, % overlap size foreach(O in overlapping(Fs,K).sort) printf("%w %w\n",O[1],O[2]) end, nl. go => true. % Prolog style go2 ?=> File = "rosalind_grph_fasta1.txt", % File = "rosalind_grph_fasta_challenge.txt", Fs = parse_fasta(read_file_chars(File)), K = 3, % overlap size Overlapping = overlapping2(Fs,K), foreach(F1=F2 in Overlapping.sort) printf("%w %w\n", F1,F2) end, nl. go2 => true. overlapping(Fs,K) = Overlapping => Len = Fs.len, Overlapping1 = [], foreach(I in 1..Len, J in 1..Len, I != J) Last = new_list(K), % Suffix of Fs[I] is the prefix of Fs[J] if append(_,Last,Fs[I,2]), append(Last,_,Fs[J,2]) then Overlapping1 := Overlapping1 ++ [[Fs[I,1],Fs[J,1]]] end end, Overlapping = Overlapping1. % Note the flatten. overlapping2(Fs,K) = Res.flatten => overlapping2(Fs,Fs,K,[],Res). overlapping2([],_All,_K,Res,Res). overlapping2([F1|Fs],All,K,Res0,[Tmp|Res]) :- overlapping2_(F1,All,K,[],Tmp), Tmp != [], !, overlapping2(Fs,All,K,Res0,Res). overlapping2([_F1|Fs],All,K,Res0,Res) :- overlapping2(Fs,All,K,Res0,Res). overlapping2_(_,[],_K,Res,Res). overlapping2_(F1,[F2|Fs],K,Res0,Res1) :- F1 != F2, !, L = new_list(K), ( ( append(_,L,F1[2]), append(L,_,F2[2])) -> Res1 = [ F1[1]=F2[1] |Res] ; Res1 = Res ), overlapping2_(F1,Fs,K,Res0,Res). overlapping2_(F1,[_F2|Fs],K,Res0,Res1) :- overlapping2_(F1,Fs,K,Res0,Res1).