/* Test of dcg_utils in Picat. This model was created by Hakan Kjellerstrand, hakank@gmail.com See also my Picat page: http://www.hakank.org/picat/ */ import util. import dcg_utils. main => go. /* "abracadabra" =~ m!([ab])(.*)cad(.*)! m = [ab,ra,abra] m = [a,bra,abra] m = [[],abra,abra] */ go ?=> S = "abracadabra", abracadabra_test1(M,S,[]) , println(m=M), fail, nl. go => true. /* "abracadabra" =~ m!(.*)(.*)\1! m = [abra,cad] m = [a,bracadabr] m = [[],abracadabra] */ go2 ?=> S = "abracadabra", abracadabra_test2(M,S,[]) , println(m=M), fail, nl. go2 => true. /* Testing some with length constraints m = [abrac,adabr,a] m = [abrac,adab,ra] m = [abrac,ada,bra] m = [abrac,ad,abra] m = [abrac,a,dabra] m = [abra,cadab,ra] m = [abra,cada,bra] m = [abra,cad,abra] m = [abra,ca,dabra] m = [abr,acada,bra] m = [abr,acad,abra] m = [abr,aca,dabra] m = [ab,racad,abra] m = [ab,raca,dabra] m = [a,braca,dabra] */ go3 ?=> S = "abracadabra", abracadabra_test3(M,S,[]) , println(m=M), fail, nl. go3 => true. /* m = [abrac,adabr,a] m = [abrac,adab,ra] m = [abrac,adab,r] m = [abrac,ada,bra] m = [abrac,ada,br] m = [abrac,ada,b] m = [abrac,ad,abra] m = [abrac,ad,abr] m = [abrac,ad,ab] m = [abrac,ad,a] m = [abrac,a,dabra] ... m = [d,a,b] m = [ab,r,a] m = [a,br,a] m = [a,b,ra] m = [a,b,r] m = [b,r,a] */ go4 ?=> S = "abracadabra", abracadabra_test4(M,S,[]) , println(m=M), fail, nl. go4 => true. go5 ?=> S = "abracadabra", abracadabra_test5(M,S,[]) , println(m=M), fail, nl. go5 => true. /* Get subwords of a word. Two subwords: abalone = [aba,lone] abelian = [abel,ian] abelson = [abel,son] abetted = [abet,ted] aboveboard = [above,board] aboveground = [above,ground] abramson = [abram,son] abutted = [abut,ted] academician = [academic,ian] acceptant = [accept,ant] accession = [access,ion] accordant = [accord,ant] accordion = [accord,ion] accountant = [account,ant] accreditate = [accredit,ate] ... Three subwords abernathy = [abe,rna,thy] abolition = [abo,lit,ion] abominable = [abo,min,able] abominate = [abo,min,ate] affectionate = [affect,ion,ate] afterglow = [aft,erg,low] alabamian = [ala,bam,ian] alphameric = [alp,ham,eric] altogether = [alto,get,her] anastigmatic = [ana,stigma,tic] animadversion = [ani,mad,version] antarctic = [ant,arc,tic] antiquated = [anti,qua,ted] antisemitic = [anti,semi,tic] antithetic = [anti,the,tic] archangel = [arc,han,gel] arctangent = [arc,tan,gent] ... This is not efficient... */ go6 ?=> File = "unixdict.txt", % File = "sv_spelling_org_utf8.txt", Words = [W : W in read_file_lines(File), W.len > 2], foreach(Word in Words) % println(word=Word), if word_test2([Word1,Word2],Words,Word,[]) then println(Word=[Word1,Word2]) end % if word_test3([Word1,Word2,Word3],Words,Word,[]) then % println(Word=[Word1,Word2,Word3]) % end end, nl. go6 => true. go7 ?=> IP = "127.0.0.1", ip_test(Match,IP,[]), println(Match), nl. go7 =>true. go8 ?=> Email = "hakan.kjellerstrand@gmail.com", email_test(Match,Email,[]), println(Match), fail, nl. go8 =>true. /* Parsing Apache accesslog lines. See dcg_parse_accesslog.pi for a complete program using this. */ go9 ?=> Lines = [ "17-121-115-130.applebot.apple.com - - [01/Mar/2022:01:48:50 +0100] \"GET /picat/flabbergasted.pi HTTP/1.1\" 200 3621 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15 (Applebot/0.1; +http://www.apple.com/go/applebot)\"", % Fake host.. "127.0.0.1 - - [19/Mar/2022:18:28:59 +0100] \"GET /jgap/pickover_puzzle1.conf HTTP/1.1\" 200 3569 \"https://t.co/1FwX1rGVTq\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 15_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.3 Mobile/15E148 Safari/604.1\"", % No proper Method/URL/HTTP "127.0.0.1 - - [22/Mar/2022:18:03:02 +0100] \"-\" 408 0 \"-\" \"-\"" ], member(Line,Lines), println(line=Line), parse_apachelog_list(Entries,Line,[]), println("Entried:"), foreach(Entry in Entries) println(Entry) end, nl, println("\nAs a map:"), parse_apachelog_map(Map,Line,[]), foreach(Key=Value in Map) println(Key=Value) end, nl, fail, nl. go9 => true. /* Testing any_except_string on a text that contains records, here separated by BEGIN and END. The issue with any_except(C,"...") is that it can only check for one character, not the string "END" that is needed here. The fix (hack) is to preparse the string and replace the END marker with some unique character + END, e.g. |END and then we can use any_except(C,"|") The drawback is that it must a character that's not in the string. Also, this replacement can be slow for very large strings. */ go10 ?=> % The E in sEcond makes it not possible to use % any_except(C,"E") S = "BEGIN first END BEGIN sEcond END BEGIN third END", % Here we fix this: "END" -> "|END" S := replace_string(S,"END","|END"), any_except_string_test(R,S,[]), println(R), fail, nl. go10 => true. % % Same as go10/0 but any_except_string_test2 use any_except_string % so we don't have to do the replace "END" -> "|END" hack. % go10b ?=> S = "BEGIN first END BEGIN sEcond END BEGIN third END", any_except_string_test2(R,S,[]), println(R), fail, nl. go10b => true. /* "Generator version" if IP_test. This is not completely correct... */ go11 ?=> member(N,1..100), println(n=N), bp.length(B,N), % B = "0.0.0.0", ip_test_gen(A,B,[]), println(a=A), println(b=B), nl, fail, nl. go11 => true. % % Generate a^nb^b and check the result. % go12 ?=> % Generates a{n}b{n} an_bn_test(X,[]), println(x=X), % Check the result an_bn_check(S,X,[]), println(s=S), nl, (X.length < 20 -> fail ; true), nl. go12 => true. % "Reverse" a^nb^b: Given the "parse tree" generate the strings go12b ?=> an_bn_check($[as("aa"),bs("bb")],X1,[]), println(x1=X1), an_bn_check($[as("aaaa"),bs("bbbb")],X2,[]), println(x2=X2), if not an_bn_check($[as("aaaa"),bs("bbbbb")],_X3,[]) then println(x3=ok) else println(x3=not_ok) end, nl, nl. go12b => true. % % Formal language: a^nb^2nc^3n % go13 ?=> an_bn_cn(Count,L,[]), succ2num_dcg(Count,N,_,[]), % convert succ -> number println(L=Count=N), an_bn_cn_test(S,L,[]), println(s=S), % println(n=succ2num(Count)), nl, (N < 7 -> fail ; true), nl, % number -> successor notation succ2num_dcg(SS,10,_,_), println(SS), nl. go13 => true. % % Balancing parenthesis, brackets, braces. % go14 ?=> Text1 = "This { is a test } of [ balancing parens] {of different kinds}, (This is the last one.) Ignore this text.", println(text1=Text1), bal_test(Matched1,Text1,[]), println(matched1=Matched1), nl, Text2 = "This {will be matched}. {This will not be matched since it's no well balanced.) ", println(text2=Text2), bal_test(Matched2,Text2,[]), println(matched2=Matched2), nl. go14 => true. % % Generating possible string of Khadaffi/Ghadaffi given a DCG. % https://stackoverflow.com/questions/5365283/regular-expression-to-search-for-gadaffi % Translated the regexp % \b[KGQ]h?add?h?af?fi\b % to a DCG. % % It generates the following 48 spellings: % [Kadafi,Kadaffi,Kadhafi,Kadhaffi,Kaddafi,Kaddaffi,Kaddhafi,Kaddhaffi,Khadafi,Khadaffi,Khadhafi,Khadhaffi,Khaddafi,Khaddaffi,Khaddhafi,Khaddhaffi,Gadafi,Gadaffi,Gadhafi,Gadhaffi,Gaddafi,Gaddaffi,Gaddhafi,Gaddhaffi,Ghadafi,Ghadaffi,Ghadhafi,Ghadhaffi,Ghaddafi,Ghaddaffi,Ghaddhafi,Ghaddhaffi,Qadafi,Qadaffi,Qadhafi,Qadhaffi,Qaddafi,Qaddaffi,Qaddhafi,Qaddhaffi,Qhadafi,Qhadaffi,Qhadhafi,Qhadhaffi,Qhaddafi,Qhaddaffi,Qhaddhafi,Qhaddhaffi] % % go15 ?=> DCG = (("K";"G";"Q"),("";"h"),"ad",("";"d"),("";"h"),"a",("";"f"),"fi"), All = dcg_generate(DCG), println(All), println(len=All.len), nl. go15 => true. % % US State/Territory/Associate codes: https://www.regexlib.com/REDetails.aspx?regexp_id=471 % """ % The RE match U.S. state abbreviation used by the U.S. Post Office. % """ % This also includes some Territory/Associate codes. % % The regexp from this site: % (?-i:A[LKSZRAEP]|C[AOT]|D[EC]|F[LM]|G[AU]|HI|I[ADLN]|K[SY]|LA|M[ADEHINOPST]|N[CDEHJMVY]| % O[HKR]|P[ARW]|RI|S[CD]|T[NX]|UT|V[AIT]|W[AIVY])$ % is not correct. The first part should instead be % A[LKSZR] % % Converting the original regepx generated the following 62 codes: % [AL,AK,AS,AZ,AR,AA,AE,AP,CA,CO,CT,DE,DC,FL,FM,GA,GU,HI,IA,ID,IL,IN,KS,KY,LA,MA,MD,ME,MH,MI, % MN,MO,MP,MS,MT,NC,ND,NE,NH,NJ,NM,NV,NY,OH,OK,OR,PA,PR,PW,RI,SC,SD,TN,TX,UT,VA,VI,VT,WA,WI,WV,WY] % But: % - AA is not correct! % - AE is not correct! % - AP is not correct! % % This is fixed below. % go16 ?=> % The codes from https://www.infoplease.com/us/postal-information/state-abbreviations-and-state-postal-codes CorrectCodes = ["AL","AK","AS","AZ","AR","CA","CO","CT","DE","DC","FL","FM","GA","GU","HI","ID","IL","IN", "IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MH","MP","MT","NE","NV","NH","NJ", "NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV", "WI","WY","PW","PR","VI"], DCG = ( % this is not correct: it yields AA,AE,AP which is not in the list above % ("A",("L";"K";"S";"Z";"R";"A";"E";"P")); ("A",("L";"K";"S";"Z";"R")); % corrected, skipping A, E, and P ("C",("A";"O";"T")); ("D",("E";"C")); ("F",("L";"M")); ("G",("A";"U")) ; "HI" ; ("I",("A";"D";"L";"N")) ; ("K",("S";"Y")) ; "LA" ; ("M",("A";"D";"E";"H";"I";"N";"O";"P";"S";"T")) ; ("N", ("C";"D";"E";"H";"J";"M";"V";"Y")); ("O",("H";"K";"R")) ; ("P",("A";"R";"W")) ; "RI"; ("S",("C";"D")) ; ("T",("N";"X")) ; "UT" ; ("V",("A";"I";"T")); ("W",("A";"I";"V";"Y")) ), All = dcg_generate(DCG), println(All), println(len=All.len), foreach(C in All) if not membchk(C,CorrectCodes) then println(incorrect=C) end end, foreach(C in CorrectCodes) if not membchk(C,All) then println(missing=C) end end, nl. go16 => true. % This is from an old Unicon program. % Given the text % "Adam’s birthday is February 28, 1969. He is no spring chicken." % Identify the date. % I normalized to an ISO Date. ;-) % go17 ?=> S = "Adam’s birthday is February 28, 1969. He is no spring chicken.", println(s=S), date_test([Year,Month,Day],S,[]), println([year=Year,month=Month,day=Day]), nl, S2 = "Hakan’s birthday is 1957-01-12. He is no spring chicken.", println(s2=S2), date_test([Year2,Month2,Day2],S2,[]), println([year2=Year2,month2=Month2,day2=Day2]), nl, % DD/MM/YY S3 = "Anna’s birthday is 1/16/57. She is no spring chicken.", println(s3=S3), date_test([Year3,Month3,Day3],S3,[]), println([year3=Year3,month3=Month3,day3=Day3]), nl, % DD/MM/YY or MM/DD/YY? S4 = "Anna’s birthday is 1-16-57. She is no spring chicken.", println(s4=S4), date_test([Year4,Month4,Day4],S4,[]), println([year4=Year4,month4=Month4,day4=Day4]), nl, S5 = "Anna’s birthday is 1.16.57. She is no spring chicken.", println(s5=S5), date_test([Year5,Month5,Day5],S5,[]), println([year5=Year5,month5=Month5,day5=Day5]), nl, SFail = "Peter says his birthday is February 31, 1969. He is lying.", println(sFail=SFail), if once(date_test([YearFail,MonthFail,DayFail],SFail,[])) then println([yearFail=YearFail,monthFail=MonthFail,dayFail=DayFail]) else println("Cannot find a valid date.") end, % We got some repetition with fail, % especially for SFail, but also for the DD MM YY.. formats % fail, nl. go17 => true. % DD/MM/YY or MM/DD/YY? go17b ?=> S = "Petra’s birthday is 1/3/19. She is no spring chicken.", println(s=S), date_test([Year,Month,Day],S,[]), println([year=Year,month=Month,day=Day]), fail, nl. go17b => true. % % The DCGs. % % Simple pattern test abracadabra_test1([M1,M2,M3]) --> any_chars(M1,"ab"), any(M2), "cad", any(M3). % Capture previous pattern (M1) abracadabra_test2([M1,M2]) --> any(M1), any(M2), any(M1). % Just testing some with upper/lower abracadabra_test3([M1,M2,M3]) --> some(M1,1,5), some(M2,1,5), some(M3,1,5). % Skipping anchoring of the string with any. abracadabra_test4([M1,M2,M3]) --> any, some(M1,1,5), some(M2,1,5), some(M3,1,5), any. % Require that pre and post are at leats one characters abracadabra_test5([M1,M2,M3]) --> some, some(M1,1,5), some(M2,1,5), some(M3,1,5), some. % Subwords % Two words word_test2([Word1,Word2],Words) --> any_member(Word1,Words), any_member(Word2,Words). % Three words word_test3([Word1,Word2,Word3],Words) --> any_member(Word1,Words), any_member(Word2,Words), any_member(Word3,Words). % IP test ip_test([M1,M2,M3,M4]) --> digits_len(M1,1,3), ".", digits_len(M2,1,3), ".", digits_len(M3,1,3), ".", digits_len(M4,1,3). digits_len(Digits,Lower,Upper) --> digits(Digits), { Digits.len >= Lower, Digits.len <= Upper}. % Very experimental. % Generator version of digits digits_gen([C|Rest]) --> [C], {member(C,"0123456789")}, digits_gen(Rest). digits_gen([]) --> []. digits_len_gen(Digits,Lower,Upper) --> digits_gen(Digits), { Digits.len >= Lower, Digits.len <= Upper}. ip_test_gen([M1Int,M2Int,M3Int,M4Int]) --> digits_len_gen(M1,1,3), ".", {(M1.len > 1 -> M1[1] != "0" ; true)}, digits_len_gen(M2,1,3), ".", digits_len_gen(M3,1,3), ".", digits_len_gen(M4,1,3), {M1Int=M1.to_int, M2Int=M2.to_int, M3Int=M3.to_int, M4Int=M4.to_int}. % Simple email verification % email_test([Name,Org,Domain]) --> some_except(Name,"@"), "@", some_except(Org,"."), ".", some(Domain,1,3). email_test([Name,Org,Domain]) --> some(Name), "@", some(Org), ".", some(Domain,2,3). % Parsing a webserver log parse_apachelog_list([Host,Date,Method, URL,HTTP,HTTPCode,Size,Referer,Browser]) --> any_except(Host," "), " - - ", "[", any_except(Date,"]"), "]", " ", "\"",any_except(MethodURLHTTP,"\""),"\"", % Heuristic for finding strange Method + URL + HTTP entries. % Note that when using -> ; in a code section, one have to % wrap it by (...). {( [1 : C in MethodURLHTTP, C==' '].len == 2 -> MethodURLHTTP.split() = [Method,URL,HTTP] ; Method="?",URL="?",HTTP="?" )}, " ", digits(HTTPCode), " ", digits(Size), " ", "\"", any_except(Referer,"\""), "\"", " ", % The browser entry might contain \" so I just take everything... any(Browser). % Return a map with the entries parse_apachelog_map(Map) --> parse_apachelog_list([Host,Date,Method,URL,HTTP,HTTPCode,Size,Referer,Browser]), {Map = new_map([host=Host,date=Date,method=Method, url=URL,http=HTTP,http_code=HTTPCode, size=Size,referer=Referer,browser=Browser])}. % Accept any characters except the string ExceptString. % Does NOT work! % any_except_string(String,ExceptString) --> some(String), % % {println(string=String)}, % {String != ExceptString}, % {println(string=String)}. % , % % any_except_string(Rest,ExceptString). % % any_except_string([],_ExceptString) --> []. % Note: The string must be preprocess so END is replaced with |END, % otherwise one cannot use any_except/2. any_except_string_test([S|Ss]) --> "BEGIN", " ", any_except(S,"|"), " ", "|END", " ", any_except_string_test(Ss). any_except_string_test([S]) --> "BEGIN", " ", any_except(S,"|"), " ", "|END". any_except_string_test([]) --> []. % % Same as any_except_string_test/1 except that it uses any_except_string/2 % any_except_string_test2_1(S) --> "BEGIN", " ", any_except_string(S,"END"), " ", "END". any_except_string_test2([S|Ss]) --> any_except_string_test2_1(S), " ", any_except_string_test2(Ss). any_except_string_test2([S]) --> any_except_string_test2_1(S). any_except_string_test2([]) --> []. % Formal language a^nb^n an_bn_test --> []. an_bn_test --> as, an_bn_test, bs. as --> "a". bs --> "b". % Check an_bn an_bn_check([as(As),bs(Bs)]) --> many_of(As,"a"), many_of(Bs,"b"), {As.len == Bs.len}. % Formal language: a^nb^2nc^3n an_bn_cn(Count) --> as2(Count), bs2(Count), cs2(Count). as2(0) --> []. as2(succ(Count)) --> "a", as2(Count). bs2(0) --> []. bs2(succ(Count)) --> "bb", bs2(Count). cs2(0) --> []. cs2(succ(Count)) --> "ccc", cs2(Count). % Checking a^nb^2nc^3n an_bn_cn_test([as(As),bs(Bs),cs(Cs)]) --> many_of(As,"a"), many_of(Bs,"b"), many_of(Cs,"c"), {2*As.len == Bs.len, 3*As.len == Cs.len}. % Just for fun: successor notation <=> numbers % "Plain Prolog" (reversible) succ2num(0,0). succ2num(succ(N),N2) :- succ2num(N,N1), N2=N1+1. % As a function (not reversible, it's a function) succ2num(0) = 0. succ2num(succ(N)) = N1+1 => succ2num(N,N1). % As a DCG (though we don't have any string to be parsed/recognized) % (reversible) succ2num_dcg(0,0) --> []. succ2num_dcg(succ(N),N2) --> succ2num_dcg(N,N1), {N2 = N1+1}. % Balanced parens: bal_test_one(Text) --> one_of(Begin,"([{"), {( Begin == '(' -> End = ")" ; Begin == '[' -> End = "]" ; Begin == '{' -> End = "}" ; true )}, any_except(Text,End), any(End). bal_test([Text|Texts]) --> any, bal_test_one(Text), any, bal_test(Texts). bal_test([Text]) --> bal_test_one(Text). bal_test([]) --> []. % tested in go17 % table date_test(Date) --> any, date(Date), any, ".". % "Original" date format: , Year date([Year,Month,Day]) --> month(Month), space, day(Day), ", ", year(Year), {check_date(Year, Month, Day)}. % ISO date: YYYY-MM-DD date([Year,Month,Day]) --> year(Year), "-", month_digits(Month), "-", day(Day), {check_date(Year, Month, Day)}. % Not ISO: DD/MM/YY, DD-MM-YY, DD.MM.YY, no leading 0 % Must be the same separator date([Year,Month,Day]) --> day2(Day), one_of(Sep,"-/."), month_digits(Month), [Sep], year2digits(Year), {check_date(Year, Month, Day)}. % Not ISO: MM/DD/YY, MM-DD-YY, MM.DD.YY, no leading 0 % Must be the same separator date([Year,Month,Day]) --> month_digits(Month), one_of(Sep,"-/."), day2(Day), [Sep], year2digits(Year), {check_date(Year, Month, Day)}. month(Month) --> any_member(Month,["January","February","March","April","May","June","July", "August","September","October","November","December"]). month_digits(Month) --> digits(Month), {Month != "", membchk(Month.to_int,1..12)}. % 2 digits day(Day) --> digits(Day), {Day.len=2}. % No leading 0 (for the ././.. dates day2(Day) --> digits(Day), {Day != "", Day[1] != '0'}. year(Year) --> digits(Year), {Year.len == 4}. year2digits(Year) --> digits(Year), {Year.len == 2}. % Check the date. % Sorry, I couldn't resist.... check_date(Year1,Month1,Day) :- Year1 != "", Day != "", Month1 != "", Month = cond(Month1.len>2, Month1.month_to_int, Month1.to_int), if Year1.len == 2 then % sigh! Year = ("20"++Year1).to_int ; Year = ("19"++Year1).to_int else Year = Year1.to_int end, Day.to_int <= max_days_in_month(Year, Month). % Convert string version of month to an int (1 based) month_to_int(Month) = I => L = ["January","February","March","April","May","June","July", "August","September","October","November","December"], nth(I,L,Month). % Is Year a leap year? leap_year(Year) => (Year mod 4 == 0, Year mod 100 != 0) ; Year mod 400 == 0. % How many days has a month? max_days_in_month(Year,Month) = Days => if member(Month, [1,3,5,7,8,10,12]) then Days = 31 elseif member(Month,[4,6,9,11]) then Days = 30 else if leap_year(Year) then Days = 29 else Days = 28 end end.