/* 

  Rosalind problem: Overlap Graphs in Picat.

  https://rosalind.info/problems/grph/
  """
  Problem

  A graph whose nodes have all been labeled can be represented by an adjacency list, in which each 
  row of the list contains the two node labels corresponding to a unique edge.

  A directed graph (or digraph) is a graph containing directed edges, each of which has an orientation. 
  That is, a directed edge is represented by an arrow instead of a line segment; the starting and ending 
  nodes of an edge form its tail and head, respectively. The directed edge with tail v and head w is 
  represented by (v,w) (but not by (w,v)). A directed loop is a directed edge of the form (v,v).

  For a collection of strings and a positive integer k, the overlap graph for the strings is a directed 
  graph Ok in which each string is represented by a node, and string s is connected to string t with a 
  directed edge when there is a length k suffix of s that matches a length k prefix of t, as long 
  as s≠t; we demand s≠t to prevent directed loops in the overlap graph (although directed cycles 
  may be present).

  Given: A collection of DNA strings in FASTA format having total length at most 10 kbp.

  Return: The adjacency list corresponding to O3. You may return edges in any order.

  Sample Dataset
    >Rosalind_0498
    AAATAAA
    >Rosalind_2391
    AAATTTT
    >Rosalind_2323
    TTTTCCC
    >Rosalind_0442
    AAATCCC
    >Rosalind_5013
    GGGTGGG
  Sample Output
    Rosalind_0498 Rosalind_2391
    Rosalind_0498 Rosalind_0442
    Rosalind_2391 Rosalind_2323
  """

  This model was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

import rosalind_utils.

main => go.

go ?=>
  File = "rosalind_grph_fasta1.txt",
  % File = "rosalind_grph_fasta_challenge.txt",  
  Fs = parse_fasta(read_file_chars(File)),
  
  K = 3, % overlap size
  foreach(O in overlapping(Fs,K).sort)
    printf("%w %w\n",O[1],O[2])
  end,
  nl.
go => true.


% Prolog style
go2 ?=>
  File = "rosalind_grph_fasta1.txt",
  % File = "rosalind_grph_fasta_challenge.txt",  
  Fs = parse_fasta(read_file_chars(File)),

  K = 3, % overlap size
  Overlapping = overlapping2(Fs,K),
  foreach(F1=F2 in Overlapping.sort)
    printf("%w %w\n", F1,F2)
  end,
  nl.
go2 => true.


overlapping(Fs,K) = Overlapping =>
  Len = Fs.len,
  Overlapping1 = [],
  foreach(I in 1..Len, J in 1..Len, I != J)
    Last = new_list(K),
    % Suffix of Fs[I] is the prefix of Fs[J]
    if append(_,Last,Fs[I,2]), append(Last,_,Fs[J,2])  then
      Overlapping1 := Overlapping1 ++ [[Fs[I,1],Fs[J,1]]]
    end
  end,
  Overlapping = Overlapping1.


% Note the flatten.
overlapping2(Fs,K) = Res.flatten =>
   overlapping2(Fs,Fs,K,[],Res).
overlapping2([],_All,_K,Res,Res).
overlapping2([F1|Fs],All,K,Res0,[Tmp|Res]) :-
  overlapping2_(F1,All,K,[],Tmp),
  Tmp != [], !,
  overlapping2(Fs,All,K,Res0,Res).
overlapping2([_F1|Fs],All,K,Res0,Res) :-
  overlapping2(Fs,All,K,Res0,Res).
  
overlapping2_(_,[],_K,Res,Res).
overlapping2_(F1,[F2|Fs],K,Res0,Res1) :-
  F1 != F2, !, 
  L = new_list(K),
  ( ( append(_,L,F1[2]), append(L,_,F2[2])) ->
      Res1 = [ F1[1]=F2[1] |Res]
     ; 
      Res1 = Res
  ),
  overlapping2_(F1,Fs,K,Res0,Res).
overlapping2_(F1,[_F2|Fs],K,Res0,Res1) :-
  overlapping2_(F1,Fs,K,Res0,Res1).