#!/usr/bin/python -u
# -*- coding: utf-8 -*-
#
#
# Isomorphic words:
#
#  Words that are translatable into others according to
#  number of occurences of characters ("profile") 
#

# Created by Hakan Kjellerstrand/hakank@gmail.com
# Thu May  6 17:27:37 2004
#
# The CGI version of this program was presented in the (swedish) blog post:
# "Isomorfa ord (Isomorphic Words)"
# http://www.hakank.org/webblogg/archives/000659.html
#
import sys, string, re, os

#
# Create a word structure, i.e. a list of the number of
# occurences of the specific characters
# E.g. The word structure for and inexact match is
#
#     "Håkan Kjellerstrand" is
#     [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
# 
# For _exact_ match a completely different structure is used
# where the positions is relevant:
#     "Håkan Kjellerstrand" is
#     [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 12, 13, 14, 12, 3, 4, 18]
#
# Here we keep a pointer of the first match of a character.
#
def word_structure(str, exact_match):

    hash = {}
    for s in str:
        hash[s] = 1 + hash.get(s,0)

    if not exact_match:
        l = hash.values()
        l.sort()

    else:
        l = [0]*len(str)
        pos = 0
        positions = {}
        for c in str:
            if c in positions:
                l[pos] = positions[c]
            else:
                l[pos] = pos
                positions[c] = pos

            pos += 1

    return (l, hash)


# and now the other part: make _one_ mapping from the source word to target
# arguments: the two word structures
#  Think partitions:
#    1 occurrences <-> 1 occurences
#    2  - "" -     <-> 2   - "" ---
#    n  - "" -     <-> n   - "" ---
#
# NOTE: This don't check for isomorphisms!
#
def partition(w_hash):
    chars = {}
    for key, value in w_hash.items():
        chars.setdefault(value, []).append(key)
    return chars


#
# Given two word structures, print the corresponding structures
# Note: no isomorphism checking is done here!
#
def print_partitions(w1_hash, w2_hash, print_groups, print_example_map, exact_match):
    w1 = partition(w1_hash)
    w2 = partition(w2_hash)
    partition_keys = w2.keys()

    for p in partition_keys:
        w1_str = "".join(w1[p])
        w2_str = "".join(w2[p])

        if print_groups:
            w1_str_pres = w1_str
            w2_str_pres = w2_str
            w1_str_pres = string.replace(w1_str_pres, " ", "' '")
            w2_str_pres = string.replace(w2_str_pres, " ", "' '")
            print "\t",

            plural = "s"
            if p == 1:
                plural = ""
            if exact_match:
                print "%s char%s: \"%s\" -> \"%s\"" % (p, plural,w1_str_pres, w2_str_pres)
            else:
                print "%s char%s: [%s] -> [%s]" % (p, plural, w1_str_pres, w2_str_pres)


            # if print_example_map:
            # print p,":"
            # for i in range(len(w1[p])):
            #    print w1_str[i], " ->; ", w2_str[i]


#
#
# check_isomorphism
# 
#
def check_isomorphism(w1, w2, w1_list, w2_list):

    if len(w1) == len(w2):
        if len(w_list) == len(w2_list) and w1_list == w2_list:
            return 1
        else:
            return 0

    else:
        return 0



#
#
# check_dict()
#
#
def check_dict(str, str_list, str_hash, file, exact_match=0, print_groups=0, print_example_map=1):

    str_len      = len(str)
    str_list_len = len(str_list)

    try:
        file = open(file)
    except IOError, err:
        raise AssertionError("Couldn't open %s for reading : %s" % (file, err.strerror))

    num_words = 0
    for w in file:
        # note: we don't strip the word until we have to,
        #       but since there are "\n" at end -> len(w) - 1
        if len(w)-1 == str_len:
            w = w.strip()
            w = w.lower()

            w_list, w_hash = word_structure(w, exact_match)

            if len(w_list) == str_list_len and str_list == w_list:
                num_words += 1
                print w
                
                if print_groups:
                    print_partitions(str_hash, w_hash, print_groups, print_example_map, exact_match)
                    print

    return num_words




#
#
# Main program
#
#
if __name__ == '__main__':


    # Testwords
    # str = "kjellerstrand"
    str = "isomorphic"
    other_str = ""

    print_example_map = 0
    print_groups = 1

    exact_match = 1
    lang = "eng"
    language_str = "english"


    if len(sys.argv) > 1:
        str = sys.argv[1]

    if len(sys.argv) > 2:
        exact_match = int(sys.argv[2])

    if len(sys.argv) > 3:
        lang = sys.argv[3]


    str_list, str_hash = word_structure(str, exact_match)
    exact_match_str = "yes";
    if exact_match == 0 :
        exact_match_str = "no"


    file = "/usr/dict/words"
    if lang == "swe":
        file = "swedish_words.txt" # Note: Replace this with a swedish dict file
        language_str = "swedish"

    print_groups_str = "yes"
    if not print_groups:
        print_groups_str = "no"


    print "Word: %s" % str
    print "Word structure of '%s': %s" % (str,str_list)
    print "Exact isomorphism: %s" % exact_match_str
    print "Language: %s" %  language_str
    print "Print mappings: %s" %  print_groups_str

    #
    # Check isomorphics with other word
    #
##  if other_str:
##          other_str_list, other_str_hash = word_structure(other_str, exact_match)
##          res = check_isomorphism(str, other_str, str_list, other_str_list)
##          other_check = "is not isomorphic"
##          if res:
##             other_check = "is isomorphic"
##          print "Check the other word: '%s' %s to '%s'" % (str, other_check, other_str)


    interesting_sequence = 1
    not_interesting_message = ""
    if exact_match:
        if str_list == range(len(str_list)):
            interesting_sequence = 0
    else:
        if str_list == [1] * len(str_list):
            interesting_sequence = 0

    if not interesting_sequence:
        print_groups = 0
        print "Note: This is a word structure with just unique characters. No mapping is shown.\n"



    num_words = check_dict(str, str_list, str_hash, file, exact_match, print_groups, print_example_map)
    print "\nIt was %s isomorphics words to '%s', with exact isomorphism: %s" % (num_words, str, exact_match_str)

