__main__.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806
  1. import sys as _sys
  2. from loguru import logger
  3. from tqdm import tqdm as _tqdm
  4. _sys.stdout.reconfigure(encoding='utf-8', errors='backslashreplace')
  5. logger.remove() # removes the default console logger provided by Loguru.
  6. # I find it to be too noisy with details more appropriate for file logging.
  7. # INFO and messages of higher priority only shown on the console.
  8. logger.add(lambda msg: _tqdm.write(msg, end=""), format="{message}", level="INFO")
  9. # This creates a logging sink and handler that puts all messages at or above the TRACE level into a logfile for each run.
  10. logger.add("file_{time}.log", level="TRACE", encoding="utf8") # Unicode instructions needed to avoid file write errors.
  11. @logger.catch(
  12. message=
  13. "WHHoopssiee! Looks like script crashed! This shouldn't happen, although it often does haha :P\n"
  14. "Most of the times, you should cut out the last printed file (it should be down there somehwere) "
  15. "to some other folder, and continue\n"
  16. "\n"
  17. "If this doesn't help, and it keeps doing this after many cut-outs, you can check out issues tab:\n"
  18. "https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/issues \n"
  19. "to see if anyone has similar issue, or contact me other way:\n"
  20. "https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/blob/master/README.md#contacterrors \n",
  21. # Still tell the system that something bad happened
  22. onerror=lambda e: _sys.exit(1)
  23. ) # wraps entire function in a trap to display enhanced error tracebaks after an exception occurs.
  24. def main():
  25. import argparse as _argparse
  26. import json as _json
  27. import os as _os
  28. import re as _re
  29. import shutil as _shutil
  30. import hashlib as _hashlib
  31. import functools as _functools
  32. from collections import defaultdict as _defaultdict
  33. from datetime import datetime as _datetime
  34. from datetime import timedelta as _timedelta
  35. from pathlib import Path as Path
  36. try:
  37. from google_photos_takeout_helper.__version__ import __version__
  38. except ModuleNotFoundError:
  39. from __version__ import __version__
  40. import piexif as _piexif
  41. from fractions import Fraction # piexif requires some values to be stored as rationals
  42. import math
  43. if _os.name == 'nt':
  44. import win32_setctime as _windoza_setctime
  45. parser = _argparse.ArgumentParser(
  46. prog='Google Photos Takeout Helper',
  47. usage='google-photos-takeout-helper -i [INPUT TAKEOUT FOLDER] -o [OUTPUT FOLDER]',
  48. description=
  49. """This script takes all of your photos from Google Photos takeout,
  50. fixes their exif DateTime data (when they were taken) and file creation date,
  51. and then copies it all to one folder.
  52. """,
  53. )
  54. parser.add_argument('--version', action='version', version=f"%(prog)s {__version__}")
  55. parser.add_argument(
  56. '-i', '--input-folder',
  57. type=str,
  58. required=True,
  59. help='Input folder with all stuff from Google Photos takeout zip(s)'
  60. )
  61. parser.add_argument(
  62. '-o', '--output-folder',
  63. type=str,
  64. required=False,
  65. default='ALL_PHOTOS',
  66. help='Output folders which in all photos will be placed in'
  67. )
  68. parser.add_argument(
  69. '--skip-extras',
  70. action='store_true',
  71. help='EXPERIMENTAL: Skips the extra photos like photos that end in "edited" or "EFFECTS".'
  72. )
  73. parser.add_argument(
  74. '--skip-extras-harder', # Oh yeah, skip my extras harder daddy
  75. action='store_true',
  76. help='EXPERIMENTAL: Skips the extra photos like photos like pic(1). Also includes --skip-extras.'
  77. )
  78. parser.add_argument(
  79. '--guess-timestamp-from-filename',
  80. action='store_true',
  81. help="EXPERIMENTAL: If all reliable methods of identifying a timestamp for a photo fail, also search the filename for common date/time patterns (e.g. 20220101_123456)."
  82. )
  83. parser.add_argument(
  84. "--divide-to-dates",
  85. action='store_true',
  86. help="Create folders and subfolders based on the date the photos were taken"
  87. )
  88. parser.add_argument(
  89. '--albums',
  90. type=str,
  91. help="EXPERIMENTAL, MAY NOT WORK FOR EVERYONE: What kind of 'albums solution' you would like:\n"
  92. "'json' - written in a json file\n"
  93. )
  94. args = parser.parse_args()
  95. logger.info('Heeeere we go!')
  96. PHOTOS_DIR = Path(args.input_folder)
  97. FIXED_DIR = Path(args.output_folder)
  98. TAG_DATE_TIME_ORIGINAL = _piexif.ExifIFD.DateTimeOriginal
  99. TAG_DATE_TIME_DIGITIZED = _piexif.ExifIFD.DateTimeDigitized
  100. TAG_DATE_TIME = 306
  101. EXIF_DATETIME_FORMAT = '%Y:%m:%d %H:%M:%S'
  102. photo_formats = ['.jpg', '.jpeg', '.png', '.webp', '.bmp', '.tif', '.tiff', '.svg', '.heic']
  103. video_formats = ['.mp4', '.gif', '.mov', '.webm', '.avi', '.wmv', '.rm', '.mpg', '.mpe', '.mpeg', '.mkv', '.m4v',
  104. '.mts', '.m2ts']
  105. extra_formats = [
  106. '-edited', '-effects', '-smile', '-mix', # EN/US
  107. '-edytowane', # PL
  108. '-bearbeitet', # DE
  109. '-bewerkt', # NL
  110. # Add more "edited" flags in more languages if you want. They need to be lowercase.
  111. ]
  112. # Album Multimap
  113. album_mmap = _defaultdict(list)
  114. # Duplicate by full hash multimap
  115. files_by_full_hash = _defaultdict(list)
  116. # holds all the renamed files that clashed from their
  117. rename_map = dict()
  118. _all_jsons_dict = _defaultdict(dict)
  119. # Statistics:
  120. s_removed_duplicates_count = 0
  121. s_copied_files = 0
  122. s_cant_insert_exif_files = [] # List of files where inserting exif failed
  123. s_date_from_folder_files = [] # List of files where date was set from folder name
  124. s_skipped_extra_files = [] # List of extra files ("-edited" etc) which were skipped
  125. s_no_json_found = [] # List of files where we couldn't find json
  126. s_no_date_at_all = [] # List of files where there was absolutely no option to set correct date
  127. FIXED_DIR.mkdir(parents=True, exist_ok=True)
  128. def for_all_files_recursive(
  129. dir: Path,
  130. file_function=lambda fi: True,
  131. folder_function=lambda fo: True,
  132. filter_fun=lambda file: True
  133. ):
  134. for file in dir.rglob("*"):
  135. if file.is_dir():
  136. folder_function(file)
  137. continue
  138. elif file.is_file():
  139. if filter_fun(file):
  140. file_function(file)
  141. else:
  142. logger.debug(f'Found something weird... {file}')
  143. # This is required, because windoza crashes when timestamp is negative
  144. # https://github.com/joke2k/faker/issues/460#issuecomment-308897287
  145. # This (dynamic assigning a function) mayyy be a little faster than comparing it every time (?)
  146. datetime_from_timestamp = (lambda t: _datetime(1970, 1, 1) + _timedelta(seconds=int(t))) \
  147. if _os.name == 'nt' \
  148. else _datetime.fromtimestamp
  149. timestamp_from_datetime = (lambda dt: (dt - _datetime(1970, 1, 1)).total_seconds()) \
  150. if _os.name == 'nt' \
  151. else _datetime.timestamp
  152. def is_photo(file: Path):
  153. if file.suffix.lower() not in photo_formats:
  154. return False
  155. # skips the extra photo file, like edited or effects. They're kinda useless.
  156. nonlocal s_skipped_extra_files
  157. if args.skip_extras or args.skip_extras_harder: # if the file name includes something under the extra_formats, it skips it.
  158. for extra in extra_formats:
  159. if extra in file.name.lower():
  160. s_skipped_extra_files.append(str(file.resolve()))
  161. return False
  162. if args.skip_extras_harder:
  163. search = r"\(\d+\)\." # we leave the period in so it doesn't catch folders.
  164. if bool(_re.search(search, file.name)):
  165. # PICT0003(5).jpg -> PICT0003.jpg The regex would match "(5).", and replace it with a "."
  166. plain_file = file.with_name(_re.sub(search, '.', file.name))
  167. # if the original exists, it will ignore the (1) file, ensuring there is only one copy of each file.
  168. if plain_file.is_file():
  169. s_skipped_extra_files.append(str(file.resolve()))
  170. return False
  171. return True
  172. def is_video(file: Path):
  173. if file.suffix.lower() not in video_formats:
  174. return False
  175. return True
  176. def chunk_reader(fobj, chunk_size=1024):
  177. """ Generator that reads a file in chunks of bytes """
  178. while True:
  179. chunk = fobj.read(chunk_size)
  180. if not chunk:
  181. return
  182. yield chunk
  183. def get_hash(file: Path, first_chunk_only=False, hash_algo=_hashlib.sha1):
  184. hashobj = hash_algo()
  185. with open(file, "rb") as f:
  186. if first_chunk_only:
  187. hashobj.update(f.read(1024))
  188. else:
  189. for chunk in chunk_reader(f):
  190. hashobj.update(chunk)
  191. return hashobj.digest()
  192. def populate_album_map(path: Path, filter_fun=lambda f: (is_photo(f) or is_video(f))):
  193. if not path.is_dir():
  194. raise NotADirectoryError('populate_album_map only handles directories not files')
  195. meta_file_exists = find_album_meta_json_file(path)
  196. if meta_file_exists is None or not meta_file_exists.exists():
  197. return False
  198. # means that we are processing an album so process
  199. for file in path.rglob("*"):
  200. if not (file.is_file() and filter_fun(file)):
  201. continue
  202. file_name = file.name
  203. # If it's not in the output folder
  204. if not (FIXED_DIR / file.name).is_file():
  205. full_hash = None
  206. try:
  207. full_hash = get_hash(file, first_chunk_only=False)
  208. except Exception as e:
  209. logger.debug(e)
  210. logger.debug(f"populate_album_map - couldn't get hash of {file}")
  211. if full_hash is not None and full_hash in files_by_full_hash:
  212. full_hash_files = files_by_full_hash[full_hash]
  213. if len(full_hash_files) != 1:
  214. logger.error("full_hash_files list should only be one after duplication removal, bad state")
  215. exit(-5)
  216. return False
  217. file_name = full_hash_files[0].name
  218. # check rename map in case there was an overlap namechange
  219. if str(file) in rename_map:
  220. file_name = rename_map[str(file)].name
  221. album_mmap[file.parent.name].append(file_name)
  222. # PART 3: removing duplicates
  223. # THIS IS PARTLY COPIED FROM STACKOVERFLOW
  224. # https://stackoverflow.com/questions/748675/finding-duplicate-files-and-removing-them
  225. #
  226. # We now use an optimized version linked from tfeldmann
  227. # https://gist.github.com/tfeldmann/fc875e6630d11f2256e746f67a09c1ae
  228. #
  229. # THANK YOU Todor Minakov (https://github.com/tminakov) and Thomas Feldmann (https://github.com/tfeldmann)
  230. #
  231. # NOTE: defaultdict(list) is a multimap, all init array handling is done internally
  232. # See: https://en.wikipedia.org/wiki/Multimap#Python
  233. #
  234. def find_duplicates(path: Path, filter_fun=lambda file: True):
  235. files_by_size = _defaultdict(list)
  236. files_by_small_hash = _defaultdict(list)
  237. for file in path.rglob("*"):
  238. if file.is_file() and filter_fun(file):
  239. try:
  240. file_size = file.stat().st_size
  241. except (OSError, FileNotFoundError):
  242. # not accessible (permissions, etc) - pass on
  243. continue
  244. files_by_size[file_size].append(file)
  245. # For all files with the same file size, get their hash on the first 1024 bytes
  246. logger.info('Calculating small hashes...')
  247. for file_size, files in _tqdm(files_by_size.items(), unit='files-by-size'):
  248. if len(files) < 2:
  249. continue # this file size is unique, no need to spend cpu cycles on it
  250. for file in files:
  251. try:
  252. small_hash = get_hash(file, first_chunk_only=True)
  253. except OSError:
  254. # the file access might've changed till the exec point got here
  255. continue
  256. files_by_small_hash[(file_size, small_hash)].append(file)
  257. # For all files with the hash on the first 1024 bytes, get their hash on the full
  258. # file - if more than one file is inserted on a hash here they are certinly duplicates
  259. logger.info('Calculating full hashes...')
  260. for files in _tqdm(files_by_small_hash.values(), unit='files-by-small-hash'):
  261. if len(files) < 2:
  262. # the hash of the first 1k bytes is unique -> skip this file
  263. continue
  264. for file in files:
  265. try:
  266. full_hash = get_hash(file, first_chunk_only=False)
  267. except OSError:
  268. # the file access might've changed till the exec point got here
  269. continue
  270. files_by_full_hash[full_hash].append(file)
  271. # Removes all duplicates in folder
  272. # ONLY RUN AFTER RUNNING find_duplicates()
  273. def remove_duplicates():
  274. nonlocal s_removed_duplicates_count
  275. # Now we have populated the final multimap of absolute dups, We now can attempt to find the original file
  276. # and remove all the other duplicates
  277. for files in _tqdm(files_by_full_hash.values(), unit='duplicates'):
  278. if len(files) < 2:
  279. continue # this file size is unique, no need to spend cpu cycles on it
  280. s_removed_duplicates_count += len(files) - 1
  281. for file in files:
  282. # TODO reconsider which dup we delete these now that we're searching globally?
  283. if len(files) > 1:
  284. file.unlink()
  285. files.remove(file)
  286. return True
  287. # PART 1: Fixing metadata and date-related stuff
  288. # Returns json dict
  289. def find_json_for_file(file: Path):
  290. parenthesis_regexp = r'\([0-9]+\)'
  291. parenthesis = _re.findall(parenthesis_regexp, file.name)
  292. if len(parenthesis) == 1:
  293. # Fix for files that have as image/video IMG_1234(1).JPG with a json IMG_1234.JPG(1).json
  294. stripped_filename = _re.sub(parenthesis_regexp, '', file.name)
  295. potential_json = file.with_name(stripped_filename + parenthesis[0] + '.json')
  296. else:
  297. potential_json = file.with_name(file.name + '.json')
  298. if potential_json.is_file():
  299. try:
  300. with open(potential_json, 'r', encoding="utf-8") as f:
  301. json_dict = _json.load(f)
  302. return json_dict
  303. except:
  304. raise FileNotFoundError(f"Couldn't find json for file: {file}")
  305. nonlocal _all_jsons_dict
  306. # Check if we need to load this folder
  307. if file.parent not in _all_jsons_dict:
  308. for json_file in file.parent.rglob("*.json"):
  309. try:
  310. with json_file.open('r', encoding="utf-8") as f:
  311. json_dict = _json.load(f)
  312. if "title" in json_dict:
  313. # We found a JSON file with a proper title, store the file name
  314. _all_jsons_dict[file.parent][json_dict["title"]] = json_dict
  315. except:
  316. logger.debug(f"Couldn't open json file {json_file}")
  317. # Check if we have found the JSON file among all the loaded ones in the folder
  318. if file.parent in _all_jsons_dict and file.name in _all_jsons_dict[file.parent]:
  319. # Great we found a valid JSON file in this folder corresponding to this file
  320. return _all_jsons_dict[file.parent][file.name]
  321. else:
  322. nonlocal s_no_json_found
  323. s_no_json_found.append(str(file.resolve()))
  324. raise FileNotFoundError(f"Couldn't find json for file: {file}")
  325. # Returns date in 2019:01:01 23:59:59 format
  326. def get_date_from_folder_meta(dir: Path):
  327. file = find_album_meta_json_file(dir)
  328. if not file:
  329. logger.debug("Couldn't pull datetime from album meta")
  330. return None
  331. try:
  332. with open(str(file), 'r', encoding="utf-8") as fi:
  333. album_dict = _json.load(fi)
  334. # find_album_meta_json_file *should* give us "safe" file
  335. time = int(album_dict["albumData"]["date"]["timestamp"])
  336. return datetime_from_timestamp(time).strftime(EXIF_DATETIME_FORMAT)
  337. except KeyError:
  338. logger.error(
  339. "get_date_from_folder_meta - json doesn't have required stuff "
  340. "- that probably means that either google fucked us again, or find_album_meta_json_file"
  341. "is seriously broken"
  342. )
  343. return None
  344. @_functools.lru_cache(maxsize=None)
  345. def find_album_meta_json_file(dir: Path):
  346. for file in dir.rglob("*.json"):
  347. try:
  348. with open(str(file), 'r', encoding="utf-8") as f:
  349. dict = _json.load(f)
  350. if "albumData" in dict:
  351. return file
  352. except Exception as e:
  353. logger.debug(e)
  354. logger.debug(f"find_album_meta_json_file - Error opening file: {file}")
  355. return None
  356. def set_creation_date_from_str(file: Path, str_datetime):
  357. try:
  358. # Turns out exif can have different formats - YYYY:MM:DD, YYYY/..., YYYY-... etc
  359. # God wish that americans won't have something like MM-DD-YYYY
  360. # The replace ': ' to ':0' fixes issues when it reads the string as 2006:11:09 10:54: 1.
  361. # It replaces the extra whitespace with a 0 for proper parsing
  362. str_datetime = str_datetime.replace('-', ':').replace('/', ':').replace('.', ':') \
  363. .replace('\\', ':').replace(': ', ':0')[:19]
  364. timestamp = timestamp_from_datetime(
  365. _datetime.strptime(
  366. str_datetime,
  367. EXIF_DATETIME_FORMAT
  368. )
  369. )
  370. _os.utime(file, (timestamp, timestamp))
  371. if _os.name == 'nt':
  372. _windoza_setctime.setctime(str(file), timestamp)
  373. except Exception as e:
  374. raise ValueError(f"Error setting creation date from string: {str_datetime}")
  375. def set_creation_date_from_exif(file: Path):
  376. try:
  377. # Why do you need to be like that, Piexif...
  378. exif_dict = _piexif.load(str(file))
  379. except Exception as e:
  380. raise IOError("Can't read file's exif!")
  381. tags = [['0th', TAG_DATE_TIME], ['Exif', TAG_DATE_TIME_ORIGINAL], ['Exif', TAG_DATE_TIME_DIGITIZED]]
  382. datetime_str = ''
  383. date_set_success = False
  384. for tag in tags:
  385. try:
  386. datetime_str = exif_dict[tag[0]][tag[1]].decode('UTF-8')
  387. set_creation_date_from_str(file, datetime_str)
  388. date_set_success = True
  389. break
  390. except KeyError:
  391. pass # No such tag - continue searching :/
  392. except ValueError:
  393. logger.debug("Wrong date format in exif!")
  394. logger.debug(datetime_str)
  395. logger.debug(f"does not match {EXIF_DATETIME_FORMAT}")
  396. if not date_set_success:
  397. raise IOError('No correct DateTime in given exif')
  398. def set_file_exif_date(file: Path, creation_date):
  399. try:
  400. exif_dict = _piexif.load(str(file))
  401. except: # Sorry but Piexif is too unpredictable
  402. exif_dict = {'0th': {}, 'Exif': {}}
  403. creation_date = creation_date.encode('UTF-8')
  404. exif_dict['0th'][TAG_DATE_TIME] = creation_date
  405. exif_dict['Exif'][TAG_DATE_TIME_ORIGINAL] = creation_date
  406. exif_dict['Exif'][TAG_DATE_TIME_DIGITIZED] = creation_date
  407. try:
  408. _piexif.insert(_piexif.dump(exif_dict), str(file))
  409. except Exception as e:
  410. logger.debug("Couldn't insert exif!")
  411. logger.debug(e)
  412. nonlocal s_cant_insert_exif_files
  413. s_cant_insert_exif_files.append(str(file.resolve()))
  414. def get_date_str_from_json(json):
  415. return datetime_from_timestamp(
  416. int(json['photoTakenTime']['timestamp'])
  417. ).strftime(EXIF_DATETIME_FORMAT)
  418. # ========= THIS IS ALL GPS STUFF =========
  419. def change_to_rational(number):
  420. """convert a number to rational
  421. Keyword arguments: number
  422. return: tuple like (1, 2), (numerator, denominator)
  423. """
  424. f = Fraction(str(number))
  425. return f.numerator, f.denominator
  426. # got this here https://github.com/hMatoba/piexifjs/issues/1#issuecomment-260176317
  427. def degToDmsRational(degFloat):
  428. min_float = degFloat % 1 * 60
  429. sec_float = min_float % 1 * 60
  430. deg = math.floor(degFloat)
  431. deg_min = math.floor(min_float)
  432. sec = round(sec_float * 100)
  433. return [(deg, 1), (deg_min, 1), (sec, 100)]
  434. def set_file_geo_data(file: Path, json):
  435. """
  436. Reads the geoData from google and saves it to the EXIF. This works assuming that the geodata looks like -100.12093, 50.213143. Something like that.
  437. Written by DalenW.
  438. :param file:
  439. :param json:
  440. :return:
  441. """
  442. # prevents crashes
  443. try:
  444. exif_dict = _piexif.load(str(file))
  445. except:
  446. exif_dict = {'0th': {}, 'Exif': {}}
  447. # converts a string input into a float. If it fails, it returns 0.0
  448. def _str_to_float(num):
  449. if type(num) == str:
  450. return 0.0
  451. else:
  452. return float(num)
  453. # fallbacks to GeoData Exif if it wasn't set in the photos editor.
  454. # https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/pull/5#discussion_r531792314
  455. longitude = _str_to_float(json['geoData']['longitude'])
  456. latitude = _str_to_float(json['geoData']['latitude'])
  457. altitude = _str_to_float(json['geoData']['altitude'])
  458. # Prioritise geoData set from GPhotos editor. If it's blank, fall back to geoDataExif
  459. if longitude == 0 and latitude == 0:
  460. longitude = _str_to_float(json['geoDataExif']['longitude'])
  461. latitude = _str_to_float(json['geoDataExif']['latitude'])
  462. altitude = _str_to_float(json['geoDataExif']['altitude'])
  463. # latitude >= 0: North latitude -> "N"
  464. # latitude < 0: South latitude -> "S"
  465. # longitude >= 0: East longitude -> "E"
  466. # longitude < 0: West longitude -> "W"
  467. if longitude >= 0:
  468. longitude_ref = 'E'
  469. else:
  470. longitude_ref = 'W'
  471. longitude = longitude * -1
  472. if latitude >= 0:
  473. latitude_ref = 'N'
  474. else:
  475. latitude_ref = 'S'
  476. latitude = latitude * -1
  477. # referenced from https://gist.github.com/c060604/8a51f8999be12fc2be498e9ca56adc72
  478. gps_ifd = {
  479. _piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0)
  480. }
  481. # skips it if it's empty
  482. if latitude != 0 or longitude != 0:
  483. gps_ifd.update({
  484. _piexif.GPSIFD.GPSLatitudeRef: latitude_ref,
  485. _piexif.GPSIFD.GPSLatitude: degToDmsRational(latitude),
  486. _piexif.GPSIFD.GPSLongitudeRef: longitude_ref,
  487. _piexif.GPSIFD.GPSLongitude: degToDmsRational(longitude)
  488. })
  489. if altitude != 0:
  490. gps_ifd.update({
  491. _piexif.GPSIFD.GPSAltitudeRef: 1,
  492. _piexif.GPSIFD.GPSAltitude: change_to_rational(round(altitude))
  493. })
  494. gps_exif = {"GPS": gps_ifd}
  495. exif_dict.update(gps_exif)
  496. try:
  497. _piexif.insert(_piexif.dump(exif_dict), str(file))
  498. except Exception as e:
  499. logger.debug("Couldn't insert geo exif!")
  500. # local variable 'new_value' referenced before assignment means that one of the GPS values is incorrect
  501. logger.debug(e)
  502. # ============ END OF GPS STUFF ============
  503. COMMON_DATETIME_PATTERNS = (
  504. # example: Screenshot_20190919-053857_Camera-edited.jpg
  505. (_re.compile(r'(?P<date>20\d{2}(01|02|03|04|05|06|07|08|09|10|11|12)[0-3]\d-\d{6})'),
  506. lambda m: _datetime.strptime(m.group('date'), '%Y%m%d-%H%M%S'),),
  507. # example: IMG_20190509_154733-edited.jpg, MVIMG_20190215_193501.MP4, IMG_20190221_112112042_BURST000_COVER_TOP.MP4
  508. (_re.compile(r'(?P<date>20\d{2}(01|02|03|04|05|06|07|08|09|10|11|12)[0-3]\d_\d{6})'),
  509. lambda m: _datetime.strptime(m.group('date'), '%Y%m%d_%H%M%S'),),
  510. # example: Screenshot_2019-04-16-11-19-37-232_com.google.a.jpg
  511. (_re.compile(r'(?P<date>20\d{2}-(01|02|03|04|05|06|07|08|09|10|11|12)-[0-3]\d-\d{2}-?\d{2}-?\d{2})'),
  512. lambda m: _datetime.strptime(m.group('date'), '%Y-%m-%d-%H-%M-%S'),),
  513. )
  514. def guess_date_from_filename(file: Path):
  515. for regex, extractor in COMMON_DATETIME_PATTERNS:
  516. m = regex.search(file.name)
  517. if m:
  518. return extractor(m).strftime(EXIF_DATETIME_FORMAT)
  519. # Fixes ALL metadata, takes just file and dir and figures it out
  520. def fix_metadata(file: Path):
  521. # logger.info(file)
  522. has_nice_date = False
  523. try:
  524. set_creation_date_from_exif(file)
  525. has_nice_date = True
  526. except (_piexif.InvalidImageDataError, ValueError, IOError) as e:
  527. logger.debug(e)
  528. logger.debug(f'No exif for {file}')
  529. except IOError:
  530. logger.debug('No creation date found in exif!')
  531. try:
  532. google_json = find_json_for_file(file)
  533. date = get_date_str_from_json(google_json)
  534. set_file_geo_data(file, google_json)
  535. set_file_exif_date(file, date)
  536. set_creation_date_from_str(file, date)
  537. has_nice_date = True
  538. return
  539. except FileNotFoundError as e:
  540. logger.debug(e)
  541. if has_nice_date:
  542. return True
  543. logger.debug(f'Try copying folder meta as date for {file}')
  544. date = get_date_from_folder_meta(file.parent)
  545. if date is not None:
  546. set_file_exif_date(file, date)
  547. set_creation_date_from_str(file, date)
  548. nonlocal s_date_from_folder_files
  549. s_date_from_folder_files.append(str(file.resolve()))
  550. return True
  551. if args.guess_timestamp_from_filename:
  552. logger.debug(f'Search the filename for common date/time patterns for {file}')
  553. date = guess_date_from_filename(file)
  554. if date is not None:
  555. set_file_exif_date(file, date)
  556. set_creation_date_from_str(file, date)
  557. return True
  558. logger.warning(f'There was literally no option to set date on {file}')
  559. nonlocal s_no_date_at_all
  560. s_no_date_at_all.append(str(file.resolve()))
  561. return False
  562. # PART 2: Copy all photos and videos to target folder
  563. # Makes a new name like 'photo(1).jpg'
  564. def new_name_if_exists(file: Path):
  565. new_name = file
  566. i = 1
  567. while True:
  568. if not new_name.is_file():
  569. return new_name
  570. else:
  571. new_name = file.with_name(f"{file.stem}({i}){file.suffix}")
  572. rename_map[str(file)] = new_name
  573. i += 1
  574. def copy_to_target(file: Path):
  575. if is_photo(file) or is_video(file):
  576. new_file = new_name_if_exists(FIXED_DIR / file.name)
  577. _shutil.copy2(file, new_file)
  578. nonlocal s_copied_files
  579. s_copied_files += 1
  580. return True
  581. def copy_to_target_and_divide(file: Path):
  582. creation_date = file.stat().st_mtime
  583. date = datetime_from_timestamp(creation_date)
  584. new_path = FIXED_DIR / f"{date.year}/{date.month:02}/"
  585. new_path.mkdir(parents=True, exist_ok=True)
  586. new_file = new_name_if_exists(new_path / file.name)
  587. _shutil.copy2(file, new_file)
  588. nonlocal s_copied_files
  589. s_copied_files += 1
  590. return True
  591. # xD python lambdas are shit - this is only because we can't do 2 commands, so we do them in arguments
  592. def _walk_with_tqdm(res, bar: _tqdm):
  593. bar.update()
  594. return res
  595. # Count *all* photo and video files - this is hacky, and we should use .rglob altogether instead of is_photo
  596. logger.info("Counting how many input files we have ahead...")
  597. _input_files_count = 0
  598. for ext in _tqdm(photo_formats + video_formats, unit='formats'):
  599. _input_files_count += len(list(PHOTOS_DIR.rglob(f'**/*{ext}')))
  600. logger.info(f'Input files: {_input_files_count}')
  601. logger.info('=====================')
  602. logger.info('Fixing files metadata and creation dates...')
  603. # tqdm progress bar stuff
  604. _metadata_bar = _tqdm(total=_input_files_count, unit='files')
  605. for_all_files_recursive(
  606. dir=PHOTOS_DIR,
  607. file_function=lambda f: _walk_with_tqdm(fix_metadata(f), _metadata_bar),
  608. # TODO (probably never, but should): Change this maybe to path.rglob
  609. filter_fun=lambda f: (is_photo(f) or is_video(f))
  610. )
  611. _metadata_bar.close()
  612. logger.info('=====================')
  613. logger.info('=====================')
  614. _copy_bar = _tqdm(total=_input_files_count, unit='files')
  615. if args.divide_to_dates:
  616. logger.info('Creating subfolders and dividing files based on date...')
  617. for_all_files_recursive(
  618. dir=PHOTOS_DIR,
  619. file_function=lambda f: _walk_with_tqdm(copy_to_target_and_divide(f), _copy_bar),
  620. filter_fun=lambda f: (is_photo(f) or is_video(f))
  621. )
  622. else:
  623. logger.info('Copying all files to one folder...')
  624. logger.info('(If you want, you can get them organized in folders based on year and month.'
  625. ' Run with --divide-to-dates to do this)')
  626. for_all_files_recursive(
  627. dir=PHOTOS_DIR,
  628. file_function=lambda f: _walk_with_tqdm(copy_to_target(f), _copy_bar),
  629. filter_fun=lambda f: (is_photo(f) or is_video(f))
  630. )
  631. _copy_bar.close()
  632. logger.info('=====================')
  633. logger.info('=====================')
  634. logger.info('Finding duplicates...')
  635. find_duplicates(FIXED_DIR, lambda f: (is_photo(f) or is_video(f)))
  636. logger.info('Removing duplicates...')
  637. remove_duplicates()
  638. logger.info('=====================')
  639. if args.albums is not None:
  640. if args.albums.lower() == 'json':
  641. logger.info('=====================')
  642. logger.info('Populate json file with albums...')
  643. logger.info('=====================')
  644. for_all_files_recursive(
  645. dir=PHOTOS_DIR,
  646. folder_function=populate_album_map
  647. )
  648. file = PHOTOS_DIR / 'albums.json'
  649. with open(file, 'w', encoding="utf-8") as outfile:
  650. _json.dump(album_mmap, outfile)
  651. logger.info(str(file))
  652. logger.info('')
  653. logger.info('DONE! FREEEEEDOOOOM!!!')
  654. logger.info('')
  655. logger.info("Final statistics:")
  656. logger.info(f"Files copied to target folder: {s_copied_files}")
  657. logger.info(f"Removed duplicates: {s_removed_duplicates_count}")
  658. logger.info(f"Files for which we couldn't find json: {len(s_no_json_found)}")
  659. if len(s_no_json_found) > 0:
  660. with open(PHOTOS_DIR / 'no_json_found.txt', 'w', encoding="utf-8") as f:
  661. f.write("# This file contains list of files for which there was no corresponding .json file found\n")
  662. f.write("# You might find it useful, but you can safely delete this :)\n")
  663. f.write("\n".join(s_no_json_found))
  664. logger.info(f" - you have full list in {f.name}")
  665. logger.info(f"Files where inserting new exif failed: {len(s_cant_insert_exif_files)}")
  666. if len(s_cant_insert_exif_files) > 0:
  667. logger.info("(This is not necessary bad thing - pretty much all videos fail, "
  668. "and your photos probably have their original exif already")
  669. with open(PHOTOS_DIR / 'failed_inserting_exif.txt', 'w', encoding="utf-8") as f:
  670. f.write("# This file contains list of files where setting right exif date failed\n")
  671. f.write("# You might find it useful, but you can safely delete this :)\n")
  672. f.write("\n".join(s_cant_insert_exif_files))
  673. logger.info(f" - you have full list in {f.name}")
  674. logger.info(f"Files where date was set from name of the folder: {len(s_date_from_folder_files)}")
  675. if len(s_date_from_folder_files) > 0:
  676. with open(PHOTOS_DIR / 'date_from_folder_name.txt', 'w', encoding="utf-8") as f:
  677. f.write("# This file contains list of files where date was set from name of the folder\n")
  678. f.write("# You might find it useful, but you can safely delete this :)\n")
  679. f.write("\n".join(s_date_from_folder_files))
  680. logger.info(f" - you have full list in {f.name}")
  681. if args.skip_extras or args.skip_extras_harder:
  682. # Remove duplicates: https://www.w3schools.com/python/python_howto_remove_duplicates.asp
  683. s_skipped_extra_files = list(dict.fromkeys(s_skipped_extra_files))
  684. logger.info(f"Extra files that were skipped: {len(s_skipped_extra_files)}")
  685. with open(PHOTOS_DIR / 'skipped_extra_files.txt', 'w', encoding="utf-8") as f:
  686. f.write("# This file contains list of extra files (ending with '-edited' etc) which were skipped because "
  687. "you've used either --skip-extras or --skip-extras-harder\n")
  688. f.write("# You might find it useful, but you can safely delete this :)\n")
  689. f.write("\n".join(s_skipped_extra_files))
  690. logger.info(f" - you have full list in {f.name}")
  691. if len(s_no_date_at_all) > 0:
  692. logger.info('')
  693. logger.info(f"!!! There were {len(s_no_date_at_all)} files where there was absolutely no way to set "
  694. f"a correct date! They will probably appear at the top of the others, as their 'last modified' "
  695. f"value is set to moment of downloading your takeout :/")
  696. with open(PHOTOS_DIR / 'unsorted.txt', 'w', encoding="utf-8") as f:
  697. f.write("# This file contains list of files where there was no way to set correct date!\n")
  698. f.write("# You probably want to set their dates manually - but you can delete this if you want\n")
  699. f.write("\n".join(s_no_date_at_all))
  700. logger.info(f" - you have full list in {f.name}")
  701. logger.info('')
  702. logger.info('Sooo... what now? You can see README.md for what nice G Photos alternatives I found and recommend')
  703. logger.info('')
  704. logger.info('If I helped you, you can consider donating me: https://www.paypal.me/TheLastGimbus')
  705. logger.info('Have a nice day!')
  706. if __name__ == '__main__':
  707. main()