autocomplete.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import random
  2. import string
  3. from string import ascii_lowercase as alphabet #import the set of lowercase alphabets as a string ("abcdef....z")
  4. def extract(query):
  5. dataset=set() # initialize an empty dataset which will store usernames
  6. helper('',dataset,query,0) #call helper function to iteratively find users based on query
  7. return sorted(list(dataset)) #return the lexicographically sorted list of usernames
  8. def helper(prefix,dataset,query,start_at): #helper function takes a prefix, the current dataset, the query function and the letter at which to start searching
  9. for i in range(start_at,26): #iterate through letters starting from the specified letter until z
  10. results=query(prefix+alphabet[i]) # query prefix + letter and store results
  11. if prefix+alphabet[i] in results: #edge case: if the query itself is a username
  12. dataset.add(prefix+alphabet[i]) # then add it to the dataset (eg. "al")
  13. if len(results)==5: #if query results in 5 names, that means there are potentially more
  14. dataset.update(results) #add names to dataset
  15. start_at=alphabet.index(results[4][len(prefix)+1]) #define new starting value as the next letter after the prefix in the last word of results (eg. if last word is allen when "a" is queried, new start value = "l")
  16. helper(prefix+alphabet[i],dataset,query,start_at) #recursively perform helper function as long as there are 5 results, with updated prefix and start value
  17. else: #if there are < 5 results
  18. dataset.update(results) # add usernames to dataset
  19. return
  20. def test(num_words): #function to generate a dataset of random strings for testing purposes. Takes the number of strings to be generated as an argument
  21. database = [] #initialize empty list of names
  22. for i in range(num_words): #each iteration of the loop creates a random string
  23. word = ''.join(random.choice(alphabet) for i in range(random.randint(1,20))) #a string is created by joining a random number of letters between 1 and 20
  24. database.append(word) # newly generated string is added to database
  25. database = sorted(database) #lexicographically sorted database
  26. query = lambda prefix: [d for d in database if d.startswith(prefix)][:5]
  27. print (database)
  28. print (extract(query))
  29. assert extract(query) == database
  30. def main():
  31. # Runs the solution
  32. # Simple implementation of the autocomplete API
  33. database = ["abracadara", "al", "alice", "alicia", "allen", "alter", "altercation", "bob", "eve", "evening", "event", "eventually", "mallory"]
  34. query = lambda prefix: [d for d in database if d.startswith(prefix)][:5]
  35. assert extract(query) == database
  36. main()
  37. test(25) #tests generality of solution on 25 randomly generated strings