/* 

  Regex crossword in Picat.

  https://regexcrossword.com
 
  The object is to solve a crossword puzzle were the hints are regexes.

  Here is the first puzzle: https://regexcrossword.com/challenges/beginner/puzzles/1            
                 
                 [SPEAK]+  (EP|IP|EF)
                 --------------------
      HE|LL|O+   |        |          |
      [PLEASE]+  |        |          |
                 ---------------------

  The problem is represented as 
      Rows = [ "HE|LL|O+", "[PLEASE]+"],
      Cols = [ "[^SPEAK]+", "EP|IP|EF"].

  The solution: 
        H  E
        L  P

  The generation of the regexes is done via generate_string/3 using DCGs 
  (Definite Glause Grammar), Picat's/Prolog's variant to generate 
  and validate (string) patterns. See the description below, and 
  the program  http://hakank.org/picat/regex_generating_strings_v3.pi 
  for more information and tests.

  generate_string/3 generates all reasonable ASCII characters for the regex ".",
  and I have sometimes simplified this by replacing it with "[A-Z]".

  One issue with this DCG generator is that it does not support regex 
  backrefs (\1, \2) so I had to rely on either DCGs or generating alternative 
  via list comprehension.

  The 11 problems are solved in 0.745s. Most problems are solved in < 0.01s.
  Problem 8 is the slowest (0.686s).


  This Picat model was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/


import util.
% import regex_utils.

main => go.


go =>
  garbage_collect(200_000_000),
  NumProblems = 11,
  member(Problem,1..NumProblems),
  once println(problem=Problem),    
  problem(Problem,Rows,Cols),
  once println(rows=Rows),
  once println(cols=Cols),
  time(regex_crossword(Rows,Cols, Mat)),
  nl,
  foreach(Row in Mat)
    println(Row)
  end,
  nl,
  fail,
  nl.

% Test a DCG
go2 =>
  S = new_list(5),
  prob11_row1(S,[]),
  println(s=S),
  fail,
  nl.

% Test a regex
go3 =>
  Re = "[ARK]*O", % "[UGLER]*",
  S = matches(Re,5),
  println(S),
  println(len=S.len),
  nl.

/*
   Mat is the solution of the regexes in Rows and Cols
*/
regex_crossword(Rows,Cols, Mat) =>
  N = Rows.len,
  M = Cols.len,
  Mat = new_array(N,M).array_matrix_to_list_matrix,
  MatT = Mat.transpose,
  foreach(I in 1..N)
    % println(I=Rows[I]),
    if string(Rows[I]) then
      member(Mat[I],matches(Rows[I],M))
    else
      % Get all matching strings from the DCG
      Ps = findall(P, call(Rows[I],P,[])),
      % println(dcg=Rows[I]=Ps.len),      
      member(Mat[I],Ps)
    end,
    foreach(J in 1..M)
      % println(J=Cols[J]),
      if string(Cols[J]) then
        member(MatT[J],matches(Cols[J],N))
      else
        % DCG
        Ps = findall(P, call(Cols[J],P,[])),
        % println(dcg=Cols[J]=Ps.len),
        member(MatT[J],Ps)
      end
    end
  end.

/*

  Get all strings generated by regex Regex of length Len.
  Note the table for this which helps since we then don't
  have to recalculate the matches during backtracking.

  Using the print debugging we can see the number of generated variants
  for a regex. Here's the output for example 11:

    rows = [prob11_row1,(U|O|I)*T[FRO]+,[KANE]*[GIN]*]
    cols = [(FI|A)+,(YE|OT)K,.[IF]+,[NODE]+,(FY|F|RG)*]
    matches((FI|A)+,3)
    len = 3
    matches((YE|OT)K,3)
    len = 2
    matches(.[IF]+,3)
    len = 380
    matches([NODE]+,3)
    len = 64
    matches((FY|F|RG)*,3)
    len = 5
    matches((U|O|I)*T[FRO]+,5)
    len = 324
    matches([KANE]*[GIN]*,5)
    len = 3367

    CPU time 0.007 seconds.

    FOODF
    ITFOR
    AKING

*/
table
matches(Regex,Len) = Matches =>
  % println($matches(Regex,Len)),
  Matches = [P : P in find_all(C,generate_string(Regex,Len,C)), P.len == Len].
  % , println(len=Matches.len).

%
% We mostly use just [A-Z] in the DCGs/list comprehensions.
%
upper() = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".


/*
  generate_string(Regex,List,Length)
  Generate a list of strings matching the regex Regex of length Length.

  Prolog solution by ankh-morpork (adjusted to Picat by hakank)
  From
  https://codegolf.stackexchange.com/questions/25378/regex-in-reverse-decompose-regular-expressions

  For more details and examples, see
  http://hakank.org/picat/regex_generating_strings_v3.pi 

*/
generate_string(R, L, S) :-
    % parse regex
    % string_codes(R, RC),
    RC = to_codes(R), % hakank
    regex_union(RE, RC, []),

    % bound string length
    between(0, L, M),
    bp.length(SC, M), % hakank

    % find string
    match(SC, RE, []),

    % string_codes(S, SC).
    S = [chr(C) : C in SC]. % hakank

% Parsers
%%%%%%%%%  

regex_union(R) -->regex_concat(S), regex_union1(S, R).

regex_union1(R,T) --> [124], regex_concat(S), regex_union1($regex_union(R,S), T).
regex_union1(R, R) --> [].

regex_concat(R) --> regex_op(S), regex_concat1(S, R).

regex_concat1(R, T) --> regex_op(S), regex_concat1($regex_concat(R,S), T).
regex_concat1(R, R) --> [].

regex_op(regex_kleene(R)) --> regex_lit(R), [42].
regex_op(regex_concat(R,regex_kleene(R))) --> regex_lit(R), [43].
regex_op(regex_union(regex_empty,R)) --> regex_lit(R), [63].
regex_op(R) --> regex_lit(R).

regex_lit(regex_char([C])) --> [C], {\+ regex_ctrl(C)}.
regex_lit(regex_char([C])) --> [92], [C].

regex_lit(regex_char(CS)) --> [46],
          % {findall(C, between(32,126, C), CS)}.
          {CS = findall(C, between(32,126, C))}. % hakank        

regex_lit(regex_char(DS)) --> 
    [91], [94], !, class_body(CS), [93],
    % {findall(C, (between(32, 126, C), \+ member(C, CS)), DS)}.
    {DS = findall(C, (between(32, 126, C), \+ member(C, CS)))}. % hakank
regex_lit(regex_char(CS)) --> [91], class_body(CS), [93].

regex_lit(R) --> [40], regex_union(R), [41].

class_body([C|T]) --> class_lit(C),class_body(T).
class_body(CS) -->
    class_lit(C0), [45], class_lit(C1), class_body(T),
    % {findall(C, between(C0, C1, C), H), append(H,T,CS)}.
    {H = findall(C, between(C0, C1, C)), append(H,T,CS)}. % hakank
class_body([]) --> [].

class_lit(C) --> [C], {\+ class_ctrl(C)}.
class_lit(C) --> [92], [C].

class_ctrl(C) :- CS=to_codes("\\[]-"), member(C, CS). % hakank
regex_ctrl(C) :- CS=to_codes("\\.[]|+?*()"), member(C, CS). % hakank

% Regex Engine
%%%%%%%%%%%%%% 

% The regex empty matches any string without consuming any characters.
match(S, regex_empty, S).

% A regex consisting of a single character matches any string starting with
% that character. The chanter is consumed.
match([C|S], regex_char(CS), S) :- member(C, CS).

% A union of two regex only needs to satisify one of the branches.
match(S, regex_union(L,R), T) :- match(S, L, T); match(S, R, T).     

% A concat of two regex must satisfy the left and then the right.
match(S, regex_concat(L, R), U) :- match(S, L, T), match(T, R, U).

% The kleene closure of a regex can match the regex 0 times or it can the regex
% once before matching the kleene closure again.
match(S, regex_kleene(_), S).
match(S, regex_kleene(K), U) :- match(S, K, T), S != T, match(T, $regex_kleene(K), U).


% Experimenting
problem(0,Rows,Cols) =>
   Rows = [ "[A-Z]+", "[A-Z]+"],
   Cols = [ "[A-Z]+", "[A-Z]+"].


%
%
% Newbie: 1-5
% 
% https://regexcrossword.com/challenges/beginner/puzzles/1
%
% Answer:
%   H E
%   L P
%
problem(1,Rows,Cols) =>
   Rows = [ "HE|LL|O+", "[PLEASE]+"],
   Cols = [ "[^SPEAK]+", "EP|IP|EF"].

%
% https://regexcrossword.com/challenges/beginner/puzzles/2
% B O
% B E
% 
problem(2,Rows,Cols) =>
   Rows = [ ".*M?O.*", "AN|FE|BE"],
   % Cols = [ "(A|B|C)\\1", "(AB|OE|SK)"].  % \1 is not supported   
   Cols = [ "(AA|BB|CC)", "(AB|OE|SK)"].
   

%
% https://regexcrossword.com/challenges/beginner/puzzles/3
% O O 
% O O 
% 
problem(3,Rows,Cols) =>
   % Rows = [ "(.)+\\1", "[^ABRC)+"], % \1 is not supported
   % Row1 = [ [C,C] : C in upper()].join("|"),
   % Rows = [ Row1, "[^ABRC]+"],
   Rows = [prob3_row1, "[^ABRC]+"],
   Cols = [ "[COBRA]+", "(AB|O|OR)+"].
   

%
% https://regexcrossword.com/challenges/beginner/puzzles/4
%
% 4 solutions with the same matrix
%
% **
% //
%
% **
% //
%
% **
% //
%
% **
% //
%
% 
problem(4,Rows,Cols) =>
   Rows = [ "[*]+", "/+"],
   Cols = [ ".?.+", ".+"].
   

%
% https://regexcrossword.com/challenges/beginner/puzzles/5
%
% 19
% 84
% 
problem(5,Rows,Cols) =>
   Rows = [ "18|19|20",    "[6789][0-9]"],
   Cols = [ "[0-9][2480]", "56|94|73"].
   

%
% Intermediate 6-10
%

%
% https://regexcrossword.com/challenges/intermediate/puzzles/1
%
% ATO
% WEL
% 
problem(6,Rows,Cols) =>
   Rows = [ "[NOTAD]*",    "WEL|BAL|EAR"],
   Cols = [ "UB|IE|AW", "[TUBE]*", "[BORF]."].
   
% 
% https://regexcrossword.com/challenges/intermediate/puzzles/2
% WA
% LK
% ER
%
problem(7,Rows,Cols) =>
   Rows = [ "[AWE]+", "[ALP]+K", "PR|ER|EP"],
   Cols = [ "[BQW](PR|LE)", "[RANK]+"].


% 
% https://regexcrossword.com/challenges/intermediate/puzzles/3
%
% FOR
% TY-
% TWO
%
problem(8,Rows,Cols) =>
   Rows = [ "CAT|FOR|FAT", "RY|TY\\-", "[TOWEL]*"],
   % Cols = [ ".(.)\\1", ".*[WAY]+", "[RAM].[OH]"].  % no \1
   Cols = [prob8_col1 , ".*[WAY]+", "[RAM].[OH]"].


% 
% https://regexcrossword.com/challenges/intermediate/puzzles/4
%
% DON
% TPA
% NIC
%
problem(9,Rows,Cols) =>
   Rows = [ "[DEF][MNO]+", "[^DJNU]P[ABC]", "[ICAN]+"],
   Cols = [ "[JUNDT]*", "APA|OPI|OLK", "(NA|FE|HE)[CV]"].   


% 
% https://regexcrossword.com/challenges/intermediate/puzzles/5
%
% TURN
% OFFA
% NDON
%
problem(10,Rows,Cols) =>
   % Rows = [ "[RUNT]*", "O.*[HAT]", "(.)*DO\\1"], % no \1
   Rows = [ "[RUNT]*", "O[A-Z]*[HAT]", prob10_row3 ],
   Cols = [ "[^NRU](NO|ON)", "(D|FU|UF)+", "(FO|A|R)*","(N|A)*"].   

  
%
% Experienced 11-
%

% 
% https://regexcrossword.com/challenges/experienced/puzzles/1
%
% FOODF
% ITFOR
% AKING
%
problem(11,Rows,Cols) =>
   % Rows = [ "(Y|F)(.)\\2[DAF]\\1", "(U|O|I)*T[FRO]+", "[KANE]*[GIN]*" ], % no \1 \2
   Rows = [ prob11_row1, "(U|O|I)*T[FRO]+", "[KANE]*[GIN]*" ],      
   Cols = [ "(FI|A)+", "(YE|OT)K", ".[IF]+", "[NODE]+", "(FY|F|RG)*"].   


%
% DCGs for the backref examples
%

% alpha(C): generate the character C) 
alpha(C) --> [C], {member(C,upper())}.


% Problem 3 row 1
% "(.)+\\1"
prob3_row1 --> alpha(First), [First].

% For problem 8, col 1
% ".(.)\\1"
prob8_col1 --> alpha(_C), alpha(First), [First].

% For problem 10, row 3
% "(.)*DO\\1"
prob10_row3 -->  alpha(First), "DO", [First].


% For problem 11, row 1
% (Y|F)(.)\\2[DAF]\\1"
prob11_row1 --> [First], {member(First,"YF")}, alpha(Second), [Second], ("D" ; "A" ; "F"), [First].