s3recon.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. #!/usr/bin/env python3
  2. from asyncio import get_event_loop, gather, Semaphore
  3. from collections import defaultdict
  4. from datetime import datetime
  5. from json import dumps
  6. from logging import getLogger, basicConfig, INFO
  7. from os import environ, cpu_count
  8. from pathlib import Path
  9. from random import choice
  10. from sys import path
  11. from warnings import filterwarnings
  12. import requests
  13. from mergedeep import merge
  14. from requests import RequestException
  15. from urllib3.exceptions import InsecureRequestWarning
  16. from yaml import safe_load as load
  17. if not __package__:
  18. path.insert(0, str(Path(Path(__file__).parent.parent.parent)))
  19. from s3recon import __version__
  20. from s3recon.constants import useragent_list, format_list
  21. from s3recon.mongodb import MongoDB, Hit, Access
  22. filterwarnings("ignore", category=InsecureRequestWarning)
  23. cpus = cpu_count() or 1
  24. logger = getLogger(__name__)
  25. # TODO: opt to change log-level
  26. basicConfig(format="%(message)s", level=INFO)
  27. def bucket_exists(url, timeout):
  28. exists = False
  29. public = False
  30. try:
  31. res = requests.head(
  32. url,
  33. headers={"User-Agent": choice(useragent_list)},
  34. verify=False,
  35. timeout=timeout,
  36. )
  37. # TODO: handle redirects
  38. status_code = res.status_code
  39. exists = status_code != 404
  40. public = status_code == 200
  41. except RequestException:
  42. pass
  43. return exists, public
  44. async def find_bucket(url, timeout, db, sem):
  45. async with sem:
  46. exists, public = bucket_exists(url, timeout)
  47. if exists:
  48. access = Access.PUBLIC if public else Access.PRIVATE
  49. access_key = repr(access)
  50. access_word = str(access).upper()
  51. logger.info(f"{access_key} {access_word} {url}")
  52. hit = Hit(url, access)
  53. if db and hit.is_valid():
  54. db.update({"url": url}, dict(hit))
  55. return Hit(url, access)
  56. return None
  57. def collect_results(hits):
  58. d = defaultdict(list)
  59. for hit in hits:
  60. url = hit.url
  61. access = repr(hit.access)
  62. d[access].append(url)
  63. return d.get(repr(Access.PRIVATE), []), d.get(repr(Access.PUBLIC), [])
  64. def read_config():
  65. config = {}
  66. config_hierarchy = [
  67. Path(Path(__file__).parent, "s3recon.yml"), # default
  68. Path(Path.home(), "s3recon.yaml"),
  69. Path(Path.home(), "s3recon.yml"),
  70. Path(Path.cwd(), "s3recon.yaml"),
  71. Path(Path.cwd(), "s3recon.yml"),
  72. Path(environ.get("S3RECON_CONFIG") or ""),
  73. ]
  74. for c in config_hierarchy:
  75. try:
  76. c = load(open(c, "r")) or {}
  77. merge(config, c)
  78. except (IOError, TypeError):
  79. pass
  80. return config
  81. def json_output_template(key, total, hits, exclude):
  82. return {} if exclude else {key: {"total": total, "hits": hits}}
  83. def main(words, timeout, concurrency, output, use_db, only_public):
  84. start = datetime.now()
  85. loop = get_event_loop()
  86. config = read_config()
  87. database = config.get("database")
  88. regions = config.get("regions") or [""]
  89. separators = config.get("separators") or [""]
  90. environments = config.get("environments") or [""]
  91. url_list = {
  92. f.format(
  93. region=f"s3.{region}" if region else "s3",
  94. word=word,
  95. sep=sep if env else "",
  96. env=env,
  97. )
  98. for f in format_list
  99. for region in regions
  100. for word in words
  101. for sep in separators
  102. for env in environments
  103. }
  104. db = MongoDB(host=database["host"], port=database["port"]) if use_db else None
  105. sem = Semaphore(concurrency)
  106. tasks = gather(
  107. *[
  108. find_bucket(
  109. url,
  110. timeout,
  111. db,
  112. sem
  113. )
  114. for url in url_list
  115. ]
  116. )
  117. hits = filter(bool, loop.run_until_complete(tasks))
  118. private, public = collect_results(hits)
  119. if output:
  120. json_result = {
  121. **json_output_template(
  122. str(Access.PRIVATE), len(private), private, only_public
  123. ),
  124. **json_output_template(str(Access.PUBLIC), len(public), public, False),
  125. }
  126. output.write(dumps(json_result, indent=4))
  127. logger.info(f"Output written to file: {output.name}")
  128. stop = datetime.now()
  129. logger.info(f"Complete after: {stop - start}")
  130. def cli():
  131. import argparse
  132. parser = argparse.ArgumentParser(
  133. description=__doc__,
  134. formatter_class=lambda prog: argparse.HelpFormatter(
  135. prog, max_help_position=35, width=100
  136. ),
  137. )
  138. parser.add_argument(
  139. "-o",
  140. "--output",
  141. type=argparse.FileType("w"),
  142. metavar="file",
  143. help="write output to <file>",
  144. )
  145. parser.add_argument(
  146. "-d", "--db", action="store_true", help="write output to database"
  147. )
  148. parser.add_argument(
  149. "-p",
  150. "--public",
  151. action="store_true",
  152. help="only include 'public' buckets in the output",
  153. )
  154. parser.add_argument(
  155. "-t",
  156. "--timeout",
  157. type=int,
  158. metavar="seconds",
  159. default=30,
  160. help="http request timeout in <seconds> (default: 30)",
  161. )
  162. parser.add_argument(
  163. "-v", "--version", action="version", version=f"%(prog)s {__version__}"
  164. )
  165. parser.add_argument(
  166. "-c",
  167. "--concurrency",
  168. type=int,
  169. metavar="num",
  170. default=cpus,
  171. help=f"maximum <num> of concurrent requests (default: {cpus})",
  172. )
  173. # parser.add_argument("words", nargs="?", type=argparse.FileType("r"), default=stdin, help="list of words to permute")
  174. parser.add_argument(
  175. "word_list",
  176. nargs="+",
  177. type=argparse.FileType("r"),
  178. help="read words from one or more <word-list> files",
  179. )
  180. args = parser.parse_args()
  181. output = args.output
  182. db = args.db
  183. timeout = args.timeout
  184. concurrency = args.concurrency
  185. public = args.public
  186. words = {l.strip() for f in args.word_list for l in f}
  187. main(words=words, timeout=timeout, concurrency=max(1, concurrency), output=output, use_db=db, only_public=public)
  188. if __name__ == "__main__":
  189. cli()