Source code for needles_haystack
#!/usr/bin/python3/needles_haystack.py
"""Python version of my stretch exercise based on Google tech writing exercise.
Console application that models finding needles in a haystack. Needles are words
entered by the user, and the haystack is a file containing plain unstuctured
text. User can set the number of words to enter, from 1 to 5.
"""
__author__ = """Kelli Wiseth (kelli [at] alameda [dash] tech [dash] lab [dot] com)"""
# 25-April-2020 reorganize using main() and functions in place of one long script
# Refactored code to reduce extraneous list, use dictionary to collect
# needle input and to count instances of the needle in the haystack
[docs]def haystack_builder(filename):
""" Opens specified file, reads each line, and tokenizes into individual strings.
Eliminates contractions, punctuation, parenthesis, and so on, and builds a
list named 'haystack' of each word, in sequence.
Args:
filename: The path and name of the text file to use as haystack.
Returns:
haystack: A list of each word (in order) in the text file.
"""
haystack = []
with open(filename) as textfile:
lines = textfile.readlines()
for line in lines:
nuline = line.strip().split()
clean_line = [chunk.strip('.,:;?!"()') for chunk in nuline]
for word in clean_line:
word = word.lower().strip()
haystack.append(word)
return haystack
[docs]def find_needles(needles_found, haystack):
""" Compares each word in the dictionary to each word in the haystack list.
If the needle matches the word in haystack, increments its counter value.
Args:
needles_found: A dictionary comprising each needle and its count.
haystack: A list containing all 'words' from the text file.
Returns:
needles_found: A dictionary of all needles updated with actual counts.
"""
for needle in needles_found:
for word in haystack:
if word == needle.lower().strip():
needles_found[needle] += 1
return needles_found
[docs]def main():
""" Handles input for the program and invokes the functions to perform
the search and prints the results.
"""
# 1. Welcome info about console program
print("=" * 50)
print("This console program accepts up to 5 words ('needles') \nto search for \
in a textfile ('haystack'). Specify your choices \nafter each prompt below. \
Include full path if 'filename' \nis not in the same path as this application.")
print("=" * 50)
# 2. Prompt for filename to use as haystack.
filename = input("Enter the filename to use as the haystack: ")
# 3. Validate input from user. Must be digit from 1 to 5, inclusive.
valid_entry = False # prime the while loop
while not valid_entry:
num_needles = input("How many needles do you want to look for (1 to 5)?: ")
if num_needles.isdigit() and int(num_needles) >= 1 and int(num_needles) <= 5:
num_needles = int(num_needles)
valid_entry = True
else:
print("Invalid entry. Please enter a number between 1 and 5. ")
# 4. Prompt user for each word (needle) to look for in the file (haystack).
# Build up dictionary of words and set counter for each to 0.
needles_found = {}
counter = 1
for word in range(0, num_needles):
print(str(counter), end=". ")
word = input("Enter the word: ")
needles_found[word.lower().strip()] = 0
counter += 1
# 5. Process file and words
haystack = haystack_builder(filename)
needles_found = find_needles(needles_found, haystack)
# 6. Loop through the updated dictionary and print results in the console.
print("=" * 50)
print("Your haystack contains these ", f"{len(needles_found)}", "needles:\n")
for needle in needles_found:
print("%-20s %25d" % (needle, needles_found[needle]))
if __name__ == '__main__':
main()