/* Regex crossword in Picat. https://regexcrossword.com The object is to solve a crossword puzzle were the hints are regexes. Here is the first puzzle: https://regexcrossword.com/challenges/beginner/puzzles/1 [SPEAK]+ (EP|IP|EF) -------------------- HE|LL|O+ | | | [PLEASE]+ | | | --------------------- The problem is represented as Rows = [ "HE|LL|O+", "[PLEASE]+"], Cols = [ "[^SPEAK]+", "EP|IP|EF"]. The solution: H E L P The generation of the regexes is done via generate_string/3 using DCGs (Definite Glause Grammar), Picat's/Prolog's variant to generate and validate (string) patterns. See the description below, and the program http://hakank.org/picat/regex_generating_strings_v3.pi for more information and tests. generate_string/3 generates all reasonable ASCII characters for the regex ".", and I have sometimes simplified this by replacing it with "[A-Z]". One issue with this DCG generator is that it does not support regex backrefs (\1, \2) so I had to rely on either DCGs or generating alternative via list comprehension. The 11 problems are solved in 0.745s. Most problems are solved in < 0.01s. Problem 8 is the slowest (0.686s). This Picat model was created by Hakan Kjellerstrand, hakank@gmail.com See also my Picat page: http://www.hakank.org/picat/ */ import util. % import regex_utils. main => go. go => garbage_collect(200_000_000), NumProblems = 11, member(Problem,1..NumProblems), once println(problem=Problem), problem(Problem,Rows,Cols), once println(rows=Rows), once println(cols=Cols), time(regex_crossword(Rows,Cols, Mat)), nl, foreach(Row in Mat) println(Row) end, nl, fail, nl. % Test a DCG go2 => S = new_list(5), prob11_row1(S,[]), println(s=S), fail, nl. % Test a regex go3 => Re = "[ARK]*O", % "[UGLER]*", S = matches(Re,5), println(S), println(len=S.len), nl. /* Mat is the solution of the regexes in Rows and Cols */ regex_crossword(Rows,Cols, Mat) => N = Rows.len, M = Cols.len, Mat = new_array(N,M).array_matrix_to_list_matrix, MatT = Mat.transpose, foreach(I in 1..N) % println(I=Rows[I]), if string(Rows[I]) then member(Mat[I],matches(Rows[I],M)) else % Get all matching strings from the DCG Ps = findall(P, call(Rows[I],P,[])), % println(dcg=Rows[I]=Ps.len), member(Mat[I],Ps) end, foreach(J in 1..M) % println(J=Cols[J]), if string(Cols[J]) then member(MatT[J],matches(Cols[J],N)) else % DCG Ps = findall(P, call(Cols[J],P,[])), % println(dcg=Cols[J]=Ps.len), member(MatT[J],Ps) end end end. /* Get all strings generated by regex Regex of length Len. Note the table for this which helps since we then don't have to recalculate the matches during backtracking. Using the print debugging we can see the number of generated variants for a regex. Here's the output for example 11: rows = [prob11_row1,(U|O|I)*T[FRO]+,[KANE]*[GIN]*] cols = [(FI|A)+,(YE|OT)K,.[IF]+,[NODE]+,(FY|F|RG)*] matches((FI|A)+,3) len = 3 matches((YE|OT)K,3) len = 2 matches(.[IF]+,3) len = 380 matches([NODE]+,3) len = 64 matches((FY|F|RG)*,3) len = 5 matches((U|O|I)*T[FRO]+,5) len = 324 matches([KANE]*[GIN]*,5) len = 3367 CPU time 0.007 seconds. FOODF ITFOR AKING */ table matches(Regex,Len) = Matches => % println($matches(Regex,Len)), Matches = [P : P in find_all(C,generate_string(Regex,Len,C)), P.len == Len]. % , println(len=Matches.len). % % We mostly use just [A-Z] in the DCGs/list comprehensions. % upper() = "ABCDEFGHIJKLMNOPQRSTUVWXYZ". /* generate_string(Regex,List,Length) Generate a list of strings matching the regex Regex of length Length. Prolog solution by ankh-morpork (adjusted to Picat by hakank) From https://codegolf.stackexchange.com/questions/25378/regex-in-reverse-decompose-regular-expressions For more details and examples, see http://hakank.org/picat/regex_generating_strings_v3.pi */ generate_string(R, L, S) :- % parse regex % string_codes(R, RC), RC = to_codes(R), % hakank regex_union(RE, RC, []), % bound string length between(0, L, M), bp.length(SC, M), % hakank % find string match(SC, RE, []), % string_codes(S, SC). S = [chr(C) : C in SC]. % hakank % Parsers %%%%%%%%% regex_union(R) -->regex_concat(S), regex_union1(S, R). regex_union1(R,T) --> [124], regex_concat(S), regex_union1($regex_union(R,S), T). regex_union1(R, R) --> []. regex_concat(R) --> regex_op(S), regex_concat1(S, R). regex_concat1(R, T) --> regex_op(S), regex_concat1($regex_concat(R,S), T). regex_concat1(R, R) --> []. regex_op(regex_kleene(R)) --> regex_lit(R), [42]. regex_op(regex_concat(R,regex_kleene(R))) --> regex_lit(R), [43]. regex_op(regex_union(regex_empty,R)) --> regex_lit(R), [63]. regex_op(R) --> regex_lit(R). regex_lit(regex_char([C])) --> [C], {\+ regex_ctrl(C)}. regex_lit(regex_char([C])) --> [92], [C]. regex_lit(regex_char(CS)) --> [46], % {findall(C, between(32,126, C), CS)}. {CS = findall(C, between(32,126, C))}. % hakank regex_lit(regex_char(DS)) --> [91], [94], !, class_body(CS), [93], % {findall(C, (between(32, 126, C), \+ member(C, CS)), DS)}. {DS = findall(C, (between(32, 126, C), \+ member(C, CS)))}. % hakank regex_lit(regex_char(CS)) --> [91], class_body(CS), [93]. regex_lit(R) --> [40], regex_union(R), [41]. class_body([C|T]) --> class_lit(C),class_body(T). class_body(CS) --> class_lit(C0), [45], class_lit(C1), class_body(T), % {findall(C, between(C0, C1, C), H), append(H,T,CS)}. {H = findall(C, between(C0, C1, C)), append(H,T,CS)}. % hakank class_body([]) --> []. class_lit(C) --> [C], {\+ class_ctrl(C)}. class_lit(C) --> [92], [C]. class_ctrl(C) :- CS=to_codes("\\[]-"), member(C, CS). % hakank regex_ctrl(C) :- CS=to_codes("\\.[]|+?*()"), member(C, CS). % hakank % Regex Engine %%%%%%%%%%%%%% % The regex empty matches any string without consuming any characters. match(S, regex_empty, S). % A regex consisting of a single character matches any string starting with % that character. The chanter is consumed. match([C|S], regex_char(CS), S) :- member(C, CS). % A union of two regex only needs to satisify one of the branches. match(S, regex_union(L,R), T) :- match(S, L, T); match(S, R, T). % A concat of two regex must satisfy the left and then the right. match(S, regex_concat(L, R), U) :- match(S, L, T), match(T, R, U). % The kleene closure of a regex can match the regex 0 times or it can the regex % once before matching the kleene closure again. match(S, regex_kleene(_), S). match(S, regex_kleene(K), U) :- match(S, K, T), S != T, match(T, $regex_kleene(K), U). % Experimenting problem(0,Rows,Cols) => Rows = [ "[A-Z]+", "[A-Z]+"], Cols = [ "[A-Z]+", "[A-Z]+"]. % % % Newbie: 1-5 % % https://regexcrossword.com/challenges/beginner/puzzles/1 % % Answer: % H E % L P % problem(1,Rows,Cols) => Rows = [ "HE|LL|O+", "[PLEASE]+"], Cols = [ "[^SPEAK]+", "EP|IP|EF"]. % % https://regexcrossword.com/challenges/beginner/puzzles/2 % B O % B E % problem(2,Rows,Cols) => Rows = [ ".*M?O.*", "AN|FE|BE"], % Cols = [ "(A|B|C)\\1", "(AB|OE|SK)"]. % \1 is not supported Cols = [ "(AA|BB|CC)", "(AB|OE|SK)"]. % % https://regexcrossword.com/challenges/beginner/puzzles/3 % O O % O O % problem(3,Rows,Cols) => % Rows = [ "(.)+\\1", "[^ABRC)+"], % \1 is not supported % Row1 = [ [C,C] : C in upper()].join("|"), % Rows = [ Row1, "[^ABRC]+"], Rows = [prob3_row1, "[^ABRC]+"], Cols = [ "[COBRA]+", "(AB|O|OR)+"]. % % https://regexcrossword.com/challenges/beginner/puzzles/4 % % 4 solutions with the same matrix % % ** % // % % ** % // % % ** % // % % ** % // % % problem(4,Rows,Cols) => Rows = [ "[*]+", "/+"], Cols = [ ".?.+", ".+"]. % % https://regexcrossword.com/challenges/beginner/puzzles/5 % % 19 % 84 % problem(5,Rows,Cols) => Rows = [ "18|19|20", "[6789][0-9]"], Cols = [ "[0-9][2480]", "56|94|73"]. % % Intermediate 6-10 % % % https://regexcrossword.com/challenges/intermediate/puzzles/1 % % ATO % WEL % problem(6,Rows,Cols) => Rows = [ "[NOTAD]*", "WEL|BAL|EAR"], Cols = [ "UB|IE|AW", "[TUBE]*", "[BORF]."]. % % https://regexcrossword.com/challenges/intermediate/puzzles/2 % WA % LK % ER % problem(7,Rows,Cols) => Rows = [ "[AWE]+", "[ALP]+K", "PR|ER|EP"], Cols = [ "[BQW](PR|LE)", "[RANK]+"]. % % https://regexcrossword.com/challenges/intermediate/puzzles/3 % % FOR % TY- % TWO % problem(8,Rows,Cols) => Rows = [ "CAT|FOR|FAT", "RY|TY\\-", "[TOWEL]*"], % Cols = [ ".(.)\\1", ".*[WAY]+", "[RAM].[OH]"]. % no \1 Cols = [prob8_col1 , ".*[WAY]+", "[RAM].[OH]"]. % % https://regexcrossword.com/challenges/intermediate/puzzles/4 % % DON % TPA % NIC % problem(9,Rows,Cols) => Rows = [ "[DEF][MNO]+", "[^DJNU]P[ABC]", "[ICAN]+"], Cols = [ "[JUNDT]*", "APA|OPI|OLK", "(NA|FE|HE)[CV]"]. % % https://regexcrossword.com/challenges/intermediate/puzzles/5 % % TURN % OFFA % NDON % problem(10,Rows,Cols) => % Rows = [ "[RUNT]*", "O.*[HAT]", "(.)*DO\\1"], % no \1 Rows = [ "[RUNT]*", "O[A-Z]*[HAT]", prob10_row3 ], Cols = [ "[^NRU](NO|ON)", "(D|FU|UF)+", "(FO|A|R)*","(N|A)*"]. % % Experienced 11- % % % https://regexcrossword.com/challenges/experienced/puzzles/1 % % FOODF % ITFOR % AKING % problem(11,Rows,Cols) => % Rows = [ "(Y|F)(.)\\2[DAF]\\1", "(U|O|I)*T[FRO]+", "[KANE]*[GIN]*" ], % no \1 \2 Rows = [ prob11_row1, "(U|O|I)*T[FRO]+", "[KANE]*[GIN]*" ], Cols = [ "(FI|A)+", "(YE|OT)K", ".[IF]+", "[NODE]+", "(FY|F|RG)*"]. % % DCGs for the backref examples % % alpha(C): generate the character C) alpha(C) --> [C], {member(C,upper())}. % Problem 3 row 1 % "(.)+\\1" prob3_row1 --> alpha(First), [First]. % For problem 8, col 1 % ".(.)\\1" prob8_col1 --> alpha(_C), alpha(First), [First]. % For problem 10, row 3 % "(.)*DO\\1" prob10_row3 --> alpha(First), "DO", [First]. % For problem 11, row 1 % (Y|F)(.)\\2[DAF]\\1" prob11_row1 --> [First], {member(First,"YF")}, alpha(Second), [Second], ("D" ; "A" ; "F"), [First].