parser_1.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. def main(args):
  2. """Loop thru all the games and parse them."""
  3. if not os.path.isdir(args.dir):
  4. print ("The specified folder is not a directory.")
  5. sys.exit(1)
  6. NUMBER_OF_FILES = len(os.listdir(args.dir))
  7. if args.num_of_files:
  8. NUMBER_OF_FILES = args.num_of_files
  9. print ("Parsing", NUMBER_OF_FILES, "files")
  10. sql = None
  11. if not args.stdout:
  12. sql = sqlite3.connect(args.database)
  13. sql.execute("""PRAGMA foreign_keys = ON;""")
  14. sql.execute("""CREATE TABLE airdates(
  15. game INTEGER PRIMARY KEY,
  16. airdate TEXT
  17. );""")
  18. sql.execute("""CREATE TABLE documents(
  19. id INTEGER PRIMARY KEY AUTOINCREMENT,
  20. clue TEXT,
  21. answer TEXT
  22. );""")
  23. sql.execute("""CREATE TABLE categories(
  24. id INTEGER PRIMARY KEY AUTOINCREMENT,
  25. category TEXT UNIQUE
  26. );""")
  27. sql.execute("""CREATE TABLE clues(
  28. id INTEGER PRIMARY KEY AUTOINCREMENT,
  29. game INTEGER,
  30. round INTEGER,
  31. value INTEGER,
  32. FOREIGN KEY(id) REFERENCES documents(id),
  33. FOREIGN KEY(game) REFERENCES airdates(game)
  34. );""")
  35. sql.execute("""CREATE TABLE classifications(
  36. clue_id INTEGER,
  37. category_id INTEGER,
  38. FOREIGN KEY(clue_id) REFERENCES clues(id),
  39. FOREIGN KEY(category_id) REFERENCES categories(id)
  40. );""")
  41. for i, file_name in enumerate(glob(os.path.join(args.dir, "*.html")), 1):
  42. with open(os.path.abspath(file_name)) as f:
  43. parse_game(f, sql, i)
  44. if not args.stdout:
  45. sql.commit()
  46. print ("All done")