/* 

  Rosalind problem: Finding a Motif in DNA in Picat.

  https://rosalind.info/problems/subs/
  """
  Problem
  Given two strings s and t, t is a substring of s if t is contained as a contiguous collection of 
  symbols in s (as a result, t must be no longer than s).

  The position of a symbol in a string is the total number of symbols found to its left, including 
  itself (e.g., the positions of all occurrences of 'U' in "AUGCUUCAGAAAGGUCUUACG" are 
  2, 5, 6, 15, 17, and 18). The symbol at position i of s is denoted by s[i].

  A substring of s can be represented as s[j:k], where j and k represent the starting and 
  ending positions of the substring in s; for example, if s = "AUGCUUCAGAAAGGUCUUACG", 
  then s[2:5] = "UGCU".

  The location of a substring s[j:k] is its beginning position j; note that t will have multiple 
  locations in s if it occurs more than once as a substring of s (see the Sample below).

  Given: Two DNA strings s and t (each of length at most 1 kbp).

  Return: All locations of t as a substring of s.

  Sample Dataset
     GATATATGCATATACTT
     ATAT
  Sample Output
  2 4 10
  """

  This model was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

import util.
import dcg_utils.


main => go.

go ?=>
   DNA="GATATATGCATATACTT",
   S = "ATAT",
   println(findall(From,find(DNA,S,From,_To))),

  nl.
go => true.

% The Challenge
go2 ?=>
   DNA="CAAGGAGGGTCCCGTCGGGTCCCGGGTCCCATCTAGCACTTACAGAAGGGTCCCGTGTTGTAAGGGGTCCCCGGGTCCCCGGGTCCCGGGTCCCGGGTCCCCGGGTCCCGGGGTCCCGGGTCCCCTTCGGGTCCCTACAGGGTCCCCGACGTATCGGGTCCCCCGCTCGGGTCCCACGGGTCCCTGTAAGGGTCCCGGGTCCCTGGGTCCCCGGGTCCCCGCGGGTCCCGGGGGTCCCGTGAAAAGACGGGTCCCGGCGGGTCCCGGGTCCCGTTTCGATATTGGCACTGGGTCCCGCTATTGGGTCCCCCGTTAGGGGTCCCGGGTCCCTGGAGGGTCCCTAGGGGTCCCAACGGGTCCCATGGTGGGTCCCTTGAGGGTCCCGCGGGTCCCGGGTCCCTTCTTAGTACGGGTCCCTCTCTGTGGGTCCCACGGGTCCCTCACTGGGTCCCGGGTCCCAGGGTCCCCCGGGTCCCTGGGGTCCCGGGTCCCATATAACAGGGTCCCCGGGTCCCACAAGGGGTCCCTAGGGTCCCGGGTCCCTGAAGCTCGGGTCCCCGGGGTCCCAGGGGTCCCGGGTCCCTAGGGTCCCAGTGGGTCCCGCTGGGTCCCGCGGGGGTCCCACGGGTCCCTGGGTCCCGGGTCCCTGGGTCCCGGGTCCCTCGAACAGGGTCCCGGGTCCCGGGTGGGTCCCTGGTTCTCTAGGGTCCCCGGCGCCTCGGGTCCCGGGTCCCGGGGGGTCGGGGTCCCACGGGTCCCGGGTCCCTGGGTCCCTGTCGTATGGGTCCCGGGTCCCTAGGGTCCCGGGGTCCCGAAAACAGGGTCCCAGATGGGTCCCAGGGTCCCTCGGGTCCCTGGGTCCCATACGGGTCCCCGGGTCCCGGGTCCCGGGTCCC",
  S = "GGGTCCCGG",
  % println(findall(From,find(DNA,S,From,_To))),
  printf("%s\n", [I.to_string : I in findall(From,find(DNA,S,From,_To))].join(" ")),
  
  nl.
go2 => true.


%
% Testing DCG and backtracking:
% ->
%   x = [GAT,GC,ACTT]
%   x = [G,ATGC,ACTT]
% 
go3 ?=>
  DNA="GATATATGCATATACTT",  
  S = "ATAT",
  split_string2(X,S,DNA,[]),
  % start_pos(1, P, X, S, SNA,Rest),
  println(x=X),
  % println(p=P),
  % println(rest=Rest),
  nl,
  fail,
  nl.
go3 => true.


any_except_string(Str,Target) --> any(Str), {not append(_,Target,_,Str)}.

% This is splitting using a string.
% On failure it might find another way of splitting the string.
% split_string1(Tmp) --> any_except_string(Tmp,"ATAT"),{println(tmp=Tmp)}, "ATAT".
% split_string([Tmp|Tmps]) --> split_string1(Tmp), (split_string(Tmps) ; []).
% split_string([Tmp]) --> any_except_string(Tmp,"ATAT").
% split_string([]) --> [].

%
% I don't like that one have to use some/1 here. It would be nice with [Split].
% I settle with: any(Tmp2) + Tmp2 == Split
%
split_string2_1(Tmp,Split) --> any_except_string(Tmp,Split), any(Tmp2), {Tmp2 == Split} . % some(Split). % [Split]
split_string2([Tmp|Tmps],Split) --> split_string2_1(Tmp,Split), (split_string2(Tmps,Split) ; []).
split_string2([Tmp],Split) --> any_except_string(Tmp,Split).
split_string2([],_) --> [].