from collections import defaultdict # You may use a regular dict in Python. # However, defaultdict allows you to: # [1] provide an appropriate initial default value to a key, and # [2] specify a data type of the value of a key so a default value can be provided. # This can simplify coding. def create_jumble_words_file(input_file, output_file): # anagram_dict: a dictionary to store words grouped by their sorted character signature. # The value of a key (signature) is a list of all word with the signature. # For example, the signature of 'care' is 'acer'. # The signatures of 'acre' and 'race' are also 'acer'. # Those are the three words with the same signature 'acer' # Eventually, at the end of the processing: # anagram['acer'] has the value of ['acre', 'care', 'race'] # On the other hand, 'index' is the only word with the signature of 'deinx'. # Thus, at the end of processing the input file, # anagram_dict['deinx'] has the value of ['index'] # # Note that 'index' is a suitable word for the jumble game, # but not any of ['acre', 'care', 'race'] anagram_dict = defaultdict(list) # [1] Read input and group words by sorted signature with open(input_file, 'r') as f: for line in f: # Strip away possible spaces in the input word word = line.strip() # Filter out/Skip words with three or less or eight or more characters and # words with non-alphabets and # words with the first character in upper case. # <<>> if word: # Signature: word characters sorted alphabetically signature = # <<>> # Add the word to the list of the signature. # Since we are using defaultdict, no need to # check whether the word is the first one in the # signature. # <<>> # [2] Keep only words that have no anagrams # (signature groups with exactly one word) # Save these words in the list unique_words. # The list uniques should contain words that Jumble can use, one word per line. # <<>> # [3] Output the result (unique_words) to a new file, called jumble_words.txt # <<>> # Usage if __name__ == "__main__": create_jumble_words_file('words.txt', 'jumble_words.txt') # Read the file 'words.txt' and output 'jumble_words.txt' for use in the Jumble game.