def word_to_keep(word, freq): """ return true if word has a length of 5 to 9, a distinct number of characters of at least four, and a frequency of more than 2000000. """ return 5 <= len(word) <= 9 and \ len(set(word)) >= 4 and \ freq > 2000000 # File download from https://www.kaggle.com/datasets/rtatman/english-word-frequency input_filename = "unigram_freq.csv" output_filename = "hangman_words.txt" outfile = open(output_filename, 'w') with open(input_filename, 'r') as infile: first_line = infile.readline() # throw away the heading line (line #1) for line in infile: # Remove leading/trailing whitespace and check if not empty line = line.strip() word, freq = line.split(',') freq = int(freq) if word_to_keep(word, freq): outfile.write(word+'\n') outfile.close()