123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
- # -*- coding: utf-8 -*-
- import argparse
- import codecs
- import shutil
- import os
- import json
- import hashlib
- from operator import attrgetter
- import bleach
- import dateutil.parser
- import requests
- from jinja2 import Environment, FileSystemLoader
- THEME_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'theme')
- class Author(object):
- def __init__(self, data):
- self.id = data['id']
- self.name = data['name']
- class Comment(object):
- def __init__(self, item):
- self.id = item['id']
- self.content = None
- self.picture = item['picture'] if 'picture' in item else None
- self.content = bleach.linkify(item['message']) if 'message' in item else None
- self.author = Author(item['from'])
- self.date = dateutil.parser.parse(item['created_time'])
- self.likes = [Author(d) for d in item['likes']['data']] if 'likes' in item else []
- class Entry(object):
- def __init__(self, item):
- self.id = item['id']
- self.picture = item['picture'] if 'picture' in item else None
- self.content = bleach.linkify(item['message']) if 'message' in item else None
- self.author = Author(item['from']) if 'from' in item else None
- self.date = dateutil.parser.parse(item['created_time'])
- self.likes = [Author(d) for d in item['likes']['data']] if 'likes' in item else []
- self.comments = [Comment(d) for d in item['comments']['data']] if 'comments' in item else []
- def render_template(output_path, tpl_name, filename, **options):
- env = Environment(loader=FileSystemLoader(THEME_PATH))
- template = env.get_template(tpl_name)
- output = template.render(**options)
- full_path = os.path.join(output_path, filename)
- with codecs.open(full_path, 'w+', encoding='utf-8') as f:
- f.write(output)
- def copy(source, destination):
- """Recursively copy source into destination.
- Taken from pelican.
- If source is a file, destination has to be a file as well.
- The function is able to copy either files or directories.
- :param source: the source file or directory
- :param destination: the destination file or directory
- """
- source_ = os.path.abspath(os.path.expanduser(source))
- destination_ = os.path.abspath(os.path.expanduser(destination))
- if os.path.isfile(source_):
- dst_dir = os.path.dirname(destination_)
- if not os.path.exists(dst_dir):
- os.makedirs(dst_dir)
- shutil.copy2(source_, destination_)
- elif os.path.isdir(source_):
- if not os.path.exists(destination_):
- os.makedirs(destination_)
- if not os.path.isdir(destination_):
- return
- for src_dir, subdirs, others in os.walk(source_):
- dst_dir = os.path.join(destination_,
- os.path.relpath(src_dir, source_))
- if not os.path.isdir(dst_dir):
- # Parent directories are known to exist, so 'mkdir' suffices.
- os.mkdir(dst_dir)
- for o in others:
- src_path = os.path.join(src_dir, o)
- dst_path = os.path.join(dst_dir, o)
- if os.path.isfile(src_path):
- shutil.copy2(src_path, dst_path)
- def download(url, output_path):
- print("downloading %s" % url)
- if not os.path.exists(output_path):
- os.makedirs(output_path)
- m = hashlib.md5()
- m.update(url)
- filename = m.hexdigest()
- file_path = os.path.join(output_path, filename)
- if not os.path.exists(file_path):
- resp = requests.get(url, stream=True)
- if resp.status_code == 200:
- with open(file_path, 'wb') as f:
- resp.raw.decode_content = True
- shutil.copyfileobj(resp.raw, f)
- return filename
- def get_attachments(item, image_path, token):
- resp = requests.get('https://graph.facebook.com/v2.12/%s/attachments' % item.id, params={
- 'access_token': token,
- })
- if 'data' in resp.json() and resp.json()['data']:
- data = resp.json()['data']
- item.pictures = [get_from_type(d, image_path) for d in data][0]
- def get_from_type(item, image_path):
- if item['type'] == 'photo':
- return [download(item['media']['image']['src'], image_path)]
- elif item['type'] == 'album':
- return [get_from_type(d, image_path)[0] for d in item['subattachments']['data']]
- def parse_data(data):
- entries = [Entry(d) for d in data]
- entries.sort(key=attrgetter('date'))
- return entries
- def enhance_entries(entries, output_path, token):
- pictures_path = os.path.join(output_path, 'pictures')
- for entry in entries:
- get_attachments(entry, pictures_path, token)
- def generate_archive(data, output_path, token):
- with open(data, 'r') as f:
- data_json = json.load(f)
- entries = parse_data(data_json)
- enhance_entries(entries, output_path, token)
- render_template(output_path, 'index.html', 'index.html', entries=entries)
- def copy_assets(output_path):
- copy(os.path.join(THEME_PATH, 'fonts'), os.path.join(output_path, 'fonts'))
- copy(os.path.join(THEME_PATH, 'assets'), os.path.join(output_path, 'assets'))
- def parse_args():
- parser = argparse.ArgumentParser(description='Generate facebook group archive pages.')
- parser.add_argument('--data', dest='data', default='data.json',
- help='Location of the JSON file containing the data.')
- parser.add_argument('--output', dest='output_path',
- default='output',
- help='Path where to output the generated files.')
- parser.add_argument('--token', dest='token', help='the access token from Facebook graph API.')
- return parser.parse_args()
- if __name__ == '__main__':
- args = parse_args()
- generate_archive(args.data, args.output_path, args.token)
- copy_assets(args.output_path)
- print('')
|