test_file_name_formatter.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. #!/usr/bin/env python3
  2. # coding=utf-8
  3. import platform
  4. import sys
  5. import unittest.mock
  6. from datetime import datetime
  7. from pathlib import Path
  8. from typing import Optional
  9. from unittest.mock import MagicMock
  10. import praw.models
  11. import pytest
  12. from bdfr.file_name_formatter import FileNameFormatter
  13. from bdfr.resource import Resource
  14. from bdfr.site_downloaders.base_downloader import BaseDownloader
  15. from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback
  16. from bdfr.site_downloaders.self_post import SelfPost
  17. @pytest.fixture()
  18. def submission() -> MagicMock:
  19. test = MagicMock()
  20. test.title = 'name'
  21. test.subreddit.display_name = 'randomreddit'
  22. test.author.name = 'person'
  23. test.id = '12345'
  24. test.score = 1000
  25. test.link_flair_text = 'test_flair'
  26. test.created_utc = datetime(2021, 4, 21, 9, 30, 0).timestamp()
  27. test.__class__ = praw.models.Submission
  28. return test
  29. def do_test_string_equality(result: [Path, str], expected: str) -> bool:
  30. if platform.system() == 'Windows':
  31. expected = FileNameFormatter._format_for_windows(expected)
  32. return str(result).endswith(expected)
  33. def do_test_path_equality(result: Path, expected: str) -> bool:
  34. if platform.system() == 'Windows':
  35. expected = expected.split('/')
  36. expected = [FileNameFormatter._format_for_windows(part) for part in expected]
  37. expected = Path(*expected)
  38. else:
  39. expected = Path(expected)
  40. return str(result).endswith(str(expected))
  41. @pytest.fixture(scope='session')
  42. def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
  43. return reddit_instance.submission(id='lgilgt')
  44. @pytest.mark.parametrize(('test_format_string', 'expected'), (
  45. ('{SUBREDDIT}', 'randomreddit'),
  46. ('{REDDITOR}', 'person'),
  47. ('{POSTID}', '12345'),
  48. ('{UPVOTES}', '1000'),
  49. ('{FLAIR}', 'test_flair'),
  50. ('{DATE}', '2021-04-21T09:30:00'),
  51. ('{REDDITOR}_{TITLE}_{POSTID}', 'person_name_12345'),
  52. ))
  53. def test_format_name_mock(test_format_string: str, expected: str, submission: MagicMock):
  54. test_formatter = FileNameFormatter(test_format_string, '', 'ISO')
  55. result = test_formatter._format_name(submission, test_format_string)
  56. assert do_test_string_equality(result, expected)
  57. @pytest.mark.parametrize(('test_string', 'expected'), (
  58. ('', False),
  59. ('test', False),
  60. ('{POSTID}', True),
  61. ('POSTID', False),
  62. ('{POSTID}_test', True),
  63. ('test_{TITLE}', True),
  64. ('TITLE_POSTID', False),
  65. ))
  66. def test_check_format_string_validity(test_string: str, expected: bool):
  67. result = FileNameFormatter.validate_string(test_string)
  68. assert result == expected
  69. @pytest.mark.online
  70. @pytest.mark.reddit
  71. @pytest.mark.parametrize(('test_format_string', 'expected'), (
  72. ('{SUBREDDIT}', 'Mindustry'),
  73. ('{REDDITOR}', 'Gamer_player_boi'),
  74. ('{POSTID}', 'lgilgt'),
  75. ('{FLAIR}', 'Art'),
  76. ('{SUBREDDIT}_{TITLE}', 'Mindustry_Toxopid that is NOT humane >:('),
  77. ('{REDDITOR}_{TITLE}_{POSTID}', 'Gamer_player_boi_Toxopid that is NOT humane >:(_lgilgt')
  78. ))
  79. def test_format_name_real(test_format_string: str, expected: str, reddit_submission: praw.models.Submission):
  80. test_formatter = FileNameFormatter(test_format_string, '', '')
  81. result = test_formatter._format_name(reddit_submission, test_format_string)
  82. assert do_test_string_equality(result, expected)
  83. @pytest.mark.online
  84. @pytest.mark.reddit
  85. @pytest.mark.parametrize(('format_string_directory', 'format_string_file', 'expected'), (
  86. (
  87. '{SUBREDDIT}',
  88. '{POSTID}',
  89. 'test/Mindustry/lgilgt.png',
  90. ),
  91. (
  92. '{SUBREDDIT}',
  93. '{TITLE}_{POSTID}',
  94. 'test/Mindustry/Toxopid that is NOT humane >:(_lgilgt.png',
  95. ),
  96. (
  97. '{SUBREDDIT}',
  98. '{REDDITOR}_{TITLE}_{POSTID}',
  99. 'test/Mindustry/Gamer_player_boi_Toxopid that is NOT humane >:(_lgilgt.png',
  100. ),
  101. ))
  102. def test_format_full(
  103. format_string_directory: str,
  104. format_string_file: str,
  105. expected: str,
  106. reddit_submission: praw.models.Submission):
  107. test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
  108. test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
  109. result = test_formatter.format_path(test_resource, Path('test'))
  110. assert do_test_path_equality(result, expected)
  111. @pytest.mark.online
  112. @pytest.mark.reddit
  113. @pytest.mark.parametrize(('format_string_directory', 'format_string_file'), (
  114. ('{SUBREDDIT}', '{POSTID}'),
  115. ('{SUBREDDIT}', '{UPVOTES}'),
  116. ('{SUBREDDIT}', '{UPVOTES}{POSTID}'),
  117. ))
  118. def test_format_full_conform(
  119. format_string_directory: str,
  120. format_string_file: str,
  121. reddit_submission: praw.models.Submission):
  122. test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
  123. test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
  124. test_formatter.format_path(test_resource, Path('test'))
  125. @pytest.mark.online
  126. @pytest.mark.reddit
  127. @pytest.mark.parametrize(('format_string_directory', 'format_string_file', 'index', 'expected'), (
  128. ('{SUBREDDIT}', '{POSTID}', None, 'test/Mindustry/lgilgt.png'),
  129. ('{SUBREDDIT}', '{POSTID}', 1, 'test/Mindustry/lgilgt_1.png'),
  130. ('{SUBREDDIT}', '{POSTID}', 2, 'test/Mindustry/lgilgt_2.png'),
  131. ('{SUBREDDIT}', '{TITLE}_{POSTID}', 2, 'test/Mindustry/Toxopid that is NOT humane >:(_lgilgt_2.png'),
  132. ))
  133. def test_format_full_with_index_suffix(
  134. format_string_directory: str,
  135. format_string_file: str,
  136. index: Optional[int],
  137. expected: str,
  138. reddit_submission: praw.models.Submission,
  139. ):
  140. test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
  141. test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
  142. result = test_formatter.format_path(test_resource, Path('test'), index)
  143. assert do_test_path_equality(result, expected)
  144. def test_format_multiple_resources():
  145. mocks = []
  146. for i in range(1, 5):
  147. new_mock = MagicMock()
  148. new_mock.url = 'https://example.com/test.png'
  149. new_mock.extension = '.png'
  150. new_mock.source_submission.title = 'test'
  151. new_mock.source_submission.__class__ = praw.models.Submission
  152. mocks.append(new_mock)
  153. test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
  154. results = test_formatter.format_resource_paths(mocks, Path('.'))
  155. results = set([str(res[0].name) for res in results])
  156. expected = {'test_1.png', 'test_2.png', 'test_3.png', 'test_4.png'}
  157. assert results == expected
  158. @pytest.mark.parametrize(('test_filename', 'test_ending'), (
  159. ('A' * 300, '.png'),
  160. ('A' * 300, '_1.png'),
  161. ('a' * 300, '_1000.jpeg'),
  162. ('😍💕✨' * 100, '_1.png'),
  163. ))
  164. def test_limit_filename_length(test_filename: str, test_ending: str):
  165. result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path('.'))
  166. assert len(result.name) <= 255
  167. assert len(result.name.encode('utf-8')) <= 255
  168. assert len(str(result)) <= FileNameFormatter.find_max_path_length()
  169. assert isinstance(result, Path)
  170. @pytest.mark.parametrize(('test_filename', 'test_ending', 'expected_end'), (
  171. ('test_aaaaaa', '_1.png', 'test_aaaaaa_1.png'),
  172. ('test_aataaa', '_1.png', 'test_aataaa_1.png'),
  173. ('test_abcdef', '_1.png', 'test_abcdef_1.png'),
  174. ('test_aaaaaa', '.png', 'test_aaaaaa.png'),
  175. ('test', '_1.png', 'test_1.png'),
  176. ('test_m1hqw6', '_1.png', 'test_m1hqw6_1.png'),
  177. ('A' * 300 + '_bbbccc', '.png', '_bbbccc.png'),
  178. ('A' * 300 + '_bbbccc', '_1000.jpeg', '_bbbccc_1000.jpeg'),
  179. ('😍💕✨' * 100 + '_aaa1aa', '_1.png', '_aaa1aa_1.png'),
  180. ))
  181. def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str):
  182. result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path('.'))
  183. assert len(result.name) <= 255
  184. assert len(result.name.encode('utf-8')) <= 255
  185. assert result.name.endswith(expected_end)
  186. assert len(str(result)) <= FileNameFormatter.find_max_path_length()
  187. @pytest.mark.skipif(sys.platform == 'win32', reason='Test broken on windows github')
  188. def test_shorten_filename_real(submission: MagicMock, tmp_path: Path):
  189. submission.title = 'A' * 500
  190. submission.author.name = 'test'
  191. submission.subreddit.display_name = 'test'
  192. submission.id = 'BBBBBB'
  193. test_resource = Resource(submission, 'www.example.com/empty', lambda: None, '.jpeg')
  194. test_formatter = FileNameFormatter('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}', 'ISO')
  195. result = test_formatter.format_path(test_resource, tmp_path)
  196. result.parent.mkdir(parents=True)
  197. result.touch()
  198. @pytest.mark.parametrize(('test_name', 'test_ending'), (
  199. ('a', 'b'),
  200. ('a', '_bbbbbb.jpg'),
  201. ('a' * 20, '_bbbbbb.jpg'),
  202. ('a' * 50, '_bbbbbb.jpg'),
  203. ('a' * 500, '_bbbbbb.jpg'),
  204. ))
  205. def test_shorten_path(test_name: str, test_ending: str, tmp_path: Path):
  206. result = FileNameFormatter.limit_file_name_length(test_name, test_ending, tmp_path)
  207. assert len(str(result.name)) <= 255
  208. assert len(str(result.name).encode('UTF-8')) <= 255
  209. assert len(str(result.name).encode('cp1252')) <= 255
  210. assert len(str(result)) <= FileNameFormatter.find_max_path_length()
  211. @pytest.mark.parametrize(('test_string', 'expected'), (
  212. ('test', 'test'),
  213. ('test😍', 'test'),
  214. ('test.png', 'test.png'),
  215. ('test*', 'test'),
  216. ('test**', 'test'),
  217. ('test?*', 'test'),
  218. ('test_???.png', 'test_.png'),
  219. ('test_???😍.png', 'test_.png'),
  220. ))
  221. def test_format_file_name_for_windows(test_string: str, expected: str):
  222. result = FileNameFormatter._format_for_windows(test_string)
  223. assert result == expected
  224. @pytest.mark.parametrize(('test_string', 'expected'), (
  225. ('test', 'test'),
  226. ('test😍', 'test'),
  227. ('😍', ''),
  228. ))
  229. def test_strip_emojies(test_string: str, expected: str):
  230. result = FileNameFormatter._strip_emojis(test_string)
  231. assert result == expected
  232. @pytest.mark.online
  233. @pytest.mark.reddit
  234. @pytest.mark.parametrize(('test_submission_id', 'expected'), (
  235. ('mfuteh', {
  236. 'title': 'Why Do Interviewers Ask Linked List Questions?',
  237. 'redditor': 'mjgardner',
  238. }),
  239. ))
  240. def test_generate_dict_for_submission(test_submission_id: str, expected: dict, reddit_instance: praw.Reddit):
  241. test_submission = reddit_instance.submission(id=test_submission_id)
  242. test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
  243. result = test_formatter._generate_name_dict_from_submission(test_submission)
  244. assert all([result.get(key) == expected[key] for key in expected.keys()])
  245. @pytest.mark.online
  246. @pytest.mark.reddit
  247. @pytest.mark.parametrize(('test_comment_id', 'expected'), (
  248. ('gsq0yuw', {
  249. 'title': 'Why Do Interviewers Ask Linked List Questions?',
  250. 'redditor': 'Doctor-Dapper',
  251. 'postid': 'gsq0yuw',
  252. 'flair': '',
  253. }),
  254. ))
  255. def test_generate_dict_for_comment(test_comment_id: str, expected: dict, reddit_instance: praw.Reddit):
  256. test_comment = reddit_instance.comment(id=test_comment_id)
  257. test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
  258. result = test_formatter._generate_name_dict_from_comment(test_comment)
  259. assert all([result.get(key) == expected[key] for key in expected.keys()])
  260. @pytest.mark.online
  261. @pytest.mark.reddit
  262. @pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme', 'test_comment_id', 'expected_name'), (
  263. ('{POSTID}', '', 'gsoubde', 'gsoubde.json'),
  264. ('{REDDITOR}_{POSTID}', '', 'gsoubde', 'DELETED_gsoubde.json'),
  265. ))
  266. def test_format_archive_entry_comment(
  267. test_file_scheme: str,
  268. test_folder_scheme: str,
  269. test_comment_id: str,
  270. expected_name: str,
  271. tmp_path: Path,
  272. reddit_instance: praw.Reddit,
  273. ):
  274. test_comment = reddit_instance.comment(id=test_comment_id)
  275. test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO')
  276. test_entry = Resource(test_comment, '', lambda: None, '.json')
  277. result = test_formatter.format_path(test_entry, tmp_path)
  278. assert do_test_string_equality(result, expected_name)
  279. @pytest.mark.parametrize(('test_folder_scheme', 'expected'), (
  280. ('{REDDITOR}/{SUBREDDIT}', 'person/randomreddit'),
  281. ('{POSTID}/{SUBREDDIT}/{REDDITOR}', '12345/randomreddit/person'),
  282. ))
  283. def test_multilevel_folder_scheme(
  284. test_folder_scheme: str,
  285. expected: str,
  286. tmp_path: Path,
  287. submission: MagicMock,
  288. ):
  289. test_formatter = FileNameFormatter('{POSTID}', test_folder_scheme, 'ISO')
  290. test_resource = MagicMock()
  291. test_resource.source_submission = submission
  292. test_resource.extension = '.png'
  293. result = test_formatter.format_path(test_resource, tmp_path)
  294. result = result.relative_to(tmp_path)
  295. assert do_test_path_equality(result.parent, expected)
  296. assert len(result.parents) == (len(expected.split('/')) + 1)
  297. @pytest.mark.parametrize(('test_name_string', 'expected'), (
  298. ('test', 'test'),
  299. ('😍', '😍'),
  300. ('test😍', 'test😍'),
  301. ('test😍 ’', 'test😍 ’'),
  302. ('test😍 \\u2019', 'test😍 ’'),
  303. ('Using that real good [1\\4]', 'Using that real good [1\\4]'),
  304. ))
  305. def test_preserve_emojis(test_name_string: str, expected: str, submission: MagicMock):
  306. submission.title = test_name_string
  307. test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
  308. result = test_formatter._format_name(submission, '{TITLE}')
  309. assert do_test_string_equality(result, expected)
  310. @pytest.mark.parametrize(('test_string', 'expected'), (
  311. ('test \\u2019', 'test ’'),
  312. ('My cat\\u2019s paws are so cute', 'My cat’s paws are so cute'),
  313. ))
  314. def test_convert_unicode_escapes(test_string: str, expected: str):
  315. result = FileNameFormatter._convert_unicode_escapes(test_string)
  316. assert result == expected
  317. @pytest.mark.parametrize(('test_datetime', 'expected'), (
  318. (datetime(2020, 1, 1, 8, 0, 0), '2020-01-01T08:00:00'),
  319. (datetime(2020, 1, 1, 8, 0), '2020-01-01T08:00:00'),
  320. (datetime(2021, 4, 21, 8, 30, 21), '2021-04-21T08:30:21'),
  321. ))
  322. def test_convert_timestamp(test_datetime: datetime, expected: str):
  323. test_timestamp = test_datetime.timestamp()
  324. test_formatter = FileNameFormatter('{POSTID}', '', 'ISO')
  325. result = test_formatter._convert_timestamp(test_timestamp)
  326. assert result == expected
  327. @pytest.mark.parametrize(('test_time_format', 'expected'), (
  328. ('ISO', '2021-05-02T13:33:00'),
  329. ('%Y_%m', '2021_05'),
  330. ('%Y-%m-%d', '2021-05-02'),
  331. ))
  332. def test_time_string_formats(test_time_format: str, expected: str):
  333. test_time = datetime(2021, 5, 2, 13, 33)
  334. test_formatter = FileNameFormatter('{TITLE}', '', test_time_format)
  335. result = test_formatter._convert_timestamp(test_time.timestamp())
  336. assert result == expected
  337. def test_get_max_path_length():
  338. result = FileNameFormatter.find_max_path_length()
  339. assert result in (4096, 260, 1024)
  340. def test_windows_max_path(tmp_path: Path):
  341. with unittest.mock.patch('platform.system', return_value='Windows'):
  342. with unittest.mock.patch('bdfr.file_name_formatter.FileNameFormatter.find_max_path_length', return_value=260):
  343. result = FileNameFormatter.limit_file_name_length('test' * 100, '_1.png', tmp_path)
  344. assert len(str(result)) <= 260
  345. assert len(result.name) <= (260 - len(str(tmp_path)))
  346. @pytest.mark.online
  347. @pytest.mark.reddit
  348. @pytest.mark.parametrize(('test_reddit_id', 'test_downloader', 'expected_names'), (
  349. ('gphmnr', YtdlpFallback, {'He has a lot to say today.mp4'}),
  350. ('d0oir2', YtdlpFallback, {"Crunk's finest moment. Welcome to the new subreddit!.mp4"}),
  351. ('jiecu', SelfPost, {'[deleted by user].txt'}),
  352. ))
  353. def test_name_submission(
  354. test_reddit_id: str,
  355. test_downloader: type(BaseDownloader),
  356. expected_names: set[str],
  357. reddit_instance: praw.reddit.Reddit,
  358. ):
  359. test_submission = reddit_instance.submission(id=test_reddit_id)
  360. test_resources = test_downloader(test_submission).find_resources()
  361. test_formatter = FileNameFormatter('{TITLE}', '', '')
  362. results = test_formatter.format_resource_paths(test_resources, Path('.'))
  363. results = set([r[0].name for r in results])
  364. assert results == expected_names