123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231 |
- #!/usr/bin/env python3
- from asyncio import get_event_loop, gather, Semaphore
- from collections import defaultdict
- from datetime import datetime
- from json import dumps
- from logging import getLogger, basicConfig, INFO
- from os import environ, cpu_count
- from pathlib import Path
- from random import choice
- from sys import path
- from warnings import filterwarnings
- import requests
- from mergedeep import merge
- from requests import RequestException
- from urllib3.exceptions import InsecureRequestWarning
- from yaml import safe_load as load
- if not __package__:
- path.insert(0, str(Path(Path(__file__).parent.parent.parent)))
- from s3recon import __version__
- from s3recon.constants import useragent_list, format_list
- from s3recon.mongodb import MongoDB, Hit, Access
- filterwarnings("ignore", category=InsecureRequestWarning)
- cpus = cpu_count() or 1
- logger = getLogger(__name__)
- # TODO: opt to change log-level
- basicConfig(format="%(message)s", level=INFO)
- def bucket_exists(url, timeout):
- exists = False
- public = False
- try:
- res = requests.head(
- url,
- headers={"User-Agent": choice(useragent_list)},
- verify=False,
- timeout=timeout,
- )
- # TODO: handle redirects
- status_code = res.status_code
- exists = status_code != 404
- public = status_code == 200
- except RequestException:
- pass
- return exists, public
- async def find_bucket(url, timeout, db, sem):
- async with sem:
- exists, public = bucket_exists(url, timeout)
- if exists:
- access = Access.PUBLIC if public else Access.PRIVATE
- access_key = repr(access)
- access_word = str(access).upper()
- logger.info(f"{access_key} {access_word} {url}")
- hit = Hit(url, access)
- if db and hit.is_valid():
- db.update({"url": url}, dict(hit))
- return Hit(url, access)
- return None
- def collect_results(hits):
- d = defaultdict(list)
- for hit in hits:
- url = hit.url
- access = repr(hit.access)
- d[access].append(url)
- return d.get(repr(Access.PRIVATE), []), d.get(repr(Access.PUBLIC), [])
- def read_config():
- config = {}
- config_hierarchy = [
- Path(Path(__file__).parent, "s3recon.yml"), # default
- Path(Path.home(), "s3recon.yaml"),
- Path(Path.home(), "s3recon.yml"),
- Path(Path.cwd(), "s3recon.yaml"),
- Path(Path.cwd(), "s3recon.yml"),
- Path(environ.get("S3RECON_CONFIG") or ""),
- ]
- for c in config_hierarchy:
- try:
- c = load(open(c, "r")) or {}
- merge(config, c)
- except (IOError, TypeError):
- pass
- return config
- def json_output_template(key, total, hits, exclude):
- return {} if exclude else {key: {"total": total, "hits": hits}}
- def main(words, timeout, concurrency, output, use_db, only_public):
- start = datetime.now()
- loop = get_event_loop()
- config = read_config()
- database = config.get("database")
- regions = config.get("regions") or [""]
- separators = config.get("separators") or [""]
- environments = config.get("environments") or [""]
- url_list = {
- f.format(
- region=f"s3.{region}" if region else "s3",
- word=word,
- sep=sep if env else "",
- env=env,
- )
- for f in format_list
- for region in regions
- for word in words
- for sep in separators
- for env in environments
- }
- db = MongoDB(host=database["host"], port=database["port"]) if use_db else None
- sem = Semaphore(concurrency)
- tasks = gather(
- *[
- find_bucket(
- url,
- timeout,
- db,
- sem
- )
- for url in url_list
- ]
- )
- hits = filter(bool, loop.run_until_complete(tasks))
- private, public = collect_results(hits)
- if output:
- json_result = {
- **json_output_template(
- str(Access.PRIVATE), len(private), private, only_public
- ),
- **json_output_template(str(Access.PUBLIC), len(public), public, False),
- }
- output.write(dumps(json_result, indent=4))
- logger.info(f"Output written to file: {output.name}")
- stop = datetime.now()
- logger.info(f"Complete after: {stop - start}")
- def cli():
- import argparse
- parser = argparse.ArgumentParser(
- description=__doc__,
- formatter_class=lambda prog: argparse.HelpFormatter(
- prog, max_help_position=35, width=100
- ),
- )
- parser.add_argument(
- "-o",
- "--output",
- type=argparse.FileType("w"),
- metavar="file",
- help="write output to <file>",
- )
- parser.add_argument(
- "-d", "--db", action="store_true", help="write output to database"
- )
- parser.add_argument(
- "-p",
- "--public",
- action="store_true",
- help="only include 'public' buckets in the output",
- )
- parser.add_argument(
- "-t",
- "--timeout",
- type=int,
- metavar="seconds",
- default=30,
- help="http request timeout in <seconds> (default: 30)",
- )
- parser.add_argument(
- "-v", "--version", action="version", version=f"%(prog)s {__version__}"
- )
- parser.add_argument(
- "-c",
- "--concurrency",
- type=int,
- metavar="num",
- default=cpus,
- help=f"maximum <num> of concurrent requests (default: {cpus})",
- )
- # parser.add_argument("words", nargs="?", type=argparse.FileType("r"), default=stdin, help="list of words to permute")
- parser.add_argument(
- "word_list",
- nargs="+",
- type=argparse.FileType("r"),
- help="read words from one or more <word-list> files",
- )
- args = parser.parse_args()
- output = args.output
- db = args.db
- timeout = args.timeout
- concurrency = args.concurrency
- public = args.public
- words = {l.strip() for f in args.word_list for l in f}
- main(words=words, timeout=timeout, concurrency=max(1, concurrency), output=output, use_db=db, only_public=public)
- if __name__ == "__main__":
- cli()
|