123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423 |
- #!/usr/bin/env python3
- # coding=utf-8
- import platform
- import sys
- import unittest.mock
- from datetime import datetime
- from pathlib import Path
- from typing import Optional
- from unittest.mock import MagicMock
- import praw.models
- import pytest
- from bdfr.file_name_formatter import FileNameFormatter
- from bdfr.resource import Resource
- from bdfr.site_downloaders.base_downloader import BaseDownloader
- from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback
- from bdfr.site_downloaders.self_post import SelfPost
- @pytest.fixture()
- def submission() -> MagicMock:
- test = MagicMock()
- test.title = 'name'
- test.subreddit.display_name = 'randomreddit'
- test.author.name = 'person'
- test.id = '12345'
- test.score = 1000
- test.link_flair_text = 'test_flair'
- test.created_utc = datetime(2021, 4, 21, 9, 30, 0).timestamp()
- test.__class__ = praw.models.Submission
- return test
- def do_test_string_equality(result: [Path, str], expected: str) -> bool:
- if platform.system() == 'Windows':
- expected = FileNameFormatter._format_for_windows(expected)
- return str(result).endswith(expected)
- def do_test_path_equality(result: Path, expected: str) -> bool:
- if platform.system() == 'Windows':
- expected = expected.split('/')
- expected = [FileNameFormatter._format_for_windows(part) for part in expected]
- expected = Path(*expected)
- else:
- expected = Path(expected)
- return str(result).endswith(str(expected))
- @pytest.fixture(scope='session')
- def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
- return reddit_instance.submission(id='lgilgt')
- @pytest.mark.parametrize(('test_format_string', 'expected'), (
- ('{SUBREDDIT}', 'randomreddit'),
- ('{REDDITOR}', 'person'),
- ('{POSTID}', '12345'),
- ('{UPVOTES}', '1000'),
- ('{FLAIR}', 'test_flair'),
- ('{DATE}', '2021-04-21T09:30:00'),
- ('{REDDITOR}_{TITLE}_{POSTID}', 'person_name_12345'),
- ))
- def test_format_name_mock(test_format_string: str, expected: str, submission: MagicMock):
- test_formatter = FileNameFormatter(test_format_string, '', 'ISO')
- result = test_formatter._format_name(submission, test_format_string)
- assert do_test_string_equality(result, expected)
- @pytest.mark.parametrize(('test_string', 'expected'), (
- ('', False),
- ('test', False),
- ('{POSTID}', True),
- ('POSTID', False),
- ('{POSTID}_test', True),
- ('test_{TITLE}', True),
- ('TITLE_POSTID', False),
- ))
- def test_check_format_string_validity(test_string: str, expected: bool):
- result = FileNameFormatter.validate_string(test_string)
- assert result == expected
- @pytest.mark.online
- @pytest.mark.reddit
- @pytest.mark.parametrize(('test_format_string', 'expected'), (
- ('{SUBREDDIT}', 'Mindustry'),
- ('{REDDITOR}', 'Gamer_player_boi'),
- ('{POSTID}', 'lgilgt'),
- ('{FLAIR}', 'Art'),
- ('{SUBREDDIT}_{TITLE}', 'Mindustry_Toxopid that is NOT humane >:('),
- ('{REDDITOR}_{TITLE}_{POSTID}', 'Gamer_player_boi_Toxopid that is NOT humane >:(_lgilgt')
- ))
- def test_format_name_real(test_format_string: str, expected: str, reddit_submission: praw.models.Submission):
- test_formatter = FileNameFormatter(test_format_string, '', '')
- result = test_formatter._format_name(reddit_submission, test_format_string)
- assert do_test_string_equality(result, expected)
- @pytest.mark.online
- @pytest.mark.reddit
- @pytest.mark.parametrize(('format_string_directory', 'format_string_file', 'expected'), (
- (
- '{POSTID}',
- 'test/Mindustry/lgilgt.png',
- ),
- (
- 'test/Mindustry/Toxopid that is NOT humane >:(_lgilgt.png',
- ),
- (
- 'test/Mindustry/Gamer_player_boi_Toxopid that is NOT humane >:(_lgilgt.png',
- ),
- ))
- def test_format_full(
- format_string_directory: str,
- format_string_file: str,
- expected: str,
- reddit_submission: praw.models.Submission):
- test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
- test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
- result = test_formatter.format_path(test_resource, Path('test'))
- assert do_test_path_equality(result, expected)
- @pytest.mark.online
- @pytest.mark.reddit
- @pytest.mark.parametrize(('format_string_directory', 'format_string_file'), (
- ('{SUBREDDIT}', '{POSTID}'),
- ))
- def test_format_full_conform(
- format_string_directory: str,
- format_string_file: str,
- reddit_submission: praw.models.Submission):
- test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
- test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
- test_formatter.format_path(test_resource, Path('test'))
- @pytest.mark.online
- @pytest.mark.reddit
- @pytest.mark.parametrize(('format_string_directory', 'format_string_file', 'index', 'expected'), (
- ('{SUBREDDIT}', '{POSTID}', None, 'test/Mindustry/lgilgt.png'),
- ('{SUBREDDIT}', '{POSTID}', 1, 'test/Mindustry/lgilgt_1.png'),
- ('{SUBREDDIT}', '{POSTID}', 2, 'test/Mindustry/lgilgt_2.png'),
- ('{SUBREDDIT}', '{TITLE}_{POSTID}', 2, 'test/Mindustry/Toxopid that is NOT humane >:(_lgilgt_2.png'),
- ))
- def test_format_full_with_index_suffix(
- format_string_directory: str,
- format_string_file: str,
- index: Optional[int],
- expected: str,
- reddit_submission: praw.models.Submission,
- ):
- test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
- test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
- result = test_formatter.format_path(test_resource, Path('test'), index)
- assert do_test_path_equality(result, expected)
- def test_format_multiple_resources():
- mocks = []
- for i in range(1, 5):
- new_mock = MagicMock()
- new_mock.url = 'https://example.com/test.png'
- new_mock.extension = '.png'
- new_mock.source_submission.title = 'test'
- new_mock.source_submission.__class__ = praw.models.Submission
- mocks.append(new_mock)
- test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
- results = test_formatter.format_resource_paths(mocks, Path('.'))
- results = set([str(res[0].name) for res in results])
- expected = {'test_1.png', 'test_2.png', 'test_3.png', 'test_4.png'}
- assert results == expected
- @pytest.mark.parametrize(('test_filename', 'test_ending'), (
- ('A' * 300, '.png'),
- ('A' * 300, '_1.png'),
- ('a' * 300, '_1000.jpeg'),
- ('😍💕✨' * 100, '_1.png'),
- ))
- def test_limit_filename_length(test_filename: str, test_ending: str):
- result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path('.'))
- assert len(result.name) <= 255
- assert len(result.name.encode('utf-8')) <= 255
- assert len(str(result)) <= FileNameFormatter.find_max_path_length()
- assert isinstance(result, Path)
- @pytest.mark.parametrize(('test_filename', 'test_ending', 'expected_end'), (
- ('test_aaaaaa', '_1.png', 'test_aaaaaa_1.png'),
- ('test_aataaa', '_1.png', 'test_aataaa_1.png'),
- ('test_abcdef', '_1.png', 'test_abcdef_1.png'),
- ('test_aaaaaa', '.png', 'test_aaaaaa.png'),
- ('test', '_1.png', 'test_1.png'),
- ('test_m1hqw6', '_1.png', 'test_m1hqw6_1.png'),
- ('A' * 300 + '_bbbccc', '.png', '_bbbccc.png'),
- ('A' * 300 + '_bbbccc', '_1000.jpeg', '_bbbccc_1000.jpeg'),
- ('😍💕✨' * 100 + '_aaa1aa', '_1.png', '_aaa1aa_1.png'),
- ))
- def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str):
- result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path('.'))
- assert len(result.name) <= 255
- assert len(result.name.encode('utf-8')) <= 255
- assert result.name.endswith(expected_end)
- assert len(str(result)) <= FileNameFormatter.find_max_path_length()
- @pytest.mark.skipif(sys.platform == 'win32', reason='Test broken on windows github')
- def test_shorten_filename_real(submission: MagicMock, tmp_path: Path):
- submission.title = 'A' * 500
- submission.author.name = 'test'
- submission.subreddit.display_name = 'test'
- submission.id = 'BBBBBB'
- test_resource = Resource(submission, 'www.example.com/empty', lambda: None, '.jpeg')
- test_formatter = FileNameFormatter('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}', 'ISO')
- result = test_formatter.format_path(test_resource, tmp_path)
- result.parent.mkdir(parents=True)
- result.touch()
- @pytest.mark.parametrize(('test_name', 'test_ending'), (
- ('a', 'b'),
- ('a', '_bbbbbb.jpg'),
- ('a' * 20, '_bbbbbb.jpg'),
- ('a' * 50, '_bbbbbb.jpg'),
- ('a' * 500, '_bbbbbb.jpg'),
- ))
- def test_shorten_path(test_name: str, test_ending: str, tmp_path: Path):
- result = FileNameFormatter.limit_file_name_length(test_name, test_ending, tmp_path)
- assert len(str(result.name)) <= 255
- assert len(str(result.name).encode('UTF-8')) <= 255
- assert len(str(result.name).encode('cp1252')) <= 255
- assert len(str(result)) <= FileNameFormatter.find_max_path_length()
- @pytest.mark.parametrize(('test_string', 'expected'), (
- ('test', 'test'),
- ('test😍', 'test'),
- ('test.png', 'test.png'),
- ('test*', 'test'),
- ('test**', 'test'),
- ('test?*', 'test'),
- ('test_???.png', 'test_.png'),
- ('test_???😍.png', 'test_.png'),
- ))
- def test_format_file_name_for_windows(test_string: str, expected: str):
- result = FileNameFormatter._format_for_windows(test_string)
- assert result == expected
- @pytest.mark.parametrize(('test_string', 'expected'), (
- ('test', 'test'),
- ('test😍', 'test'),
- ('😍', ''),
- ))
- def test_strip_emojies(test_string: str, expected: str):
- result = FileNameFormatter._strip_emojis(test_string)
- assert result == expected
- @pytest.mark.online
- @pytest.mark.reddit
- @pytest.mark.parametrize(('test_submission_id', 'expected'), (
- ('mfuteh', {
- 'title': 'Why Do Interviewers Ask Linked List Questions?',
- 'redditor': 'mjgardner',
- }),
- ))
- def test_generate_dict_for_submission(test_submission_id: str, expected: dict, reddit_instance: praw.Reddit):
- test_submission = reddit_instance.submission(id=test_submission_id)
- test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
- result = test_formatter._generate_name_dict_from_submission(test_submission)
- assert all([result.get(key) == expected[key] for key in expected.keys()])
- @pytest.mark.online
- @pytest.mark.reddit
- @pytest.mark.parametrize(('test_comment_id', 'expected'), (
- ('gsq0yuw', {
- 'title': 'Why Do Interviewers Ask Linked List Questions?',
- 'redditor': 'Doctor-Dapper',
- 'postid': 'gsq0yuw',
- 'flair': '',
- }),
- ))
- def test_generate_dict_for_comment(test_comment_id: str, expected: dict, reddit_instance: praw.Reddit):
- test_comment = reddit_instance.comment(id=test_comment_id)
- test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
- result = test_formatter._generate_name_dict_from_comment(test_comment)
- assert all([result.get(key) == expected[key] for key in expected.keys()])
- @pytest.mark.online
- @pytest.mark.reddit
- @pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme', 'test_comment_id', 'expected_name'), (
- ('{POSTID}', '', 'gsoubde', 'gsoubde.json'),
- ('{REDDITOR}_{POSTID}', '', 'gsoubde', 'DELETED_gsoubde.json'),
- ))
- def test_format_archive_entry_comment(
- test_file_scheme: str,
- test_folder_scheme: str,
- test_comment_id: str,
- expected_name: str,
- tmp_path: Path,
- reddit_instance: praw.Reddit,
- ):
- test_comment = reddit_instance.comment(id=test_comment_id)
- test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO')
- test_entry = Resource(test_comment, '', lambda: None, '.json')
- result = test_formatter.format_path(test_entry, tmp_path)
- assert do_test_string_equality(result, expected_name)
- @pytest.mark.parametrize(('test_folder_scheme', 'expected'), (
- ('{REDDITOR}/{SUBREDDIT}', 'person/randomreddit'),
- ('{POSTID}/{SUBREDDIT}/{REDDITOR}', '12345/randomreddit/person'),
- ))
- def test_multilevel_folder_scheme(
- test_folder_scheme: str,
- expected: str,
- tmp_path: Path,
- submission: MagicMock,
- ):
- test_formatter = FileNameFormatter('{POSTID}', test_folder_scheme, 'ISO')
- test_resource = MagicMock()
- test_resource.source_submission = submission
- test_resource.extension = '.png'
- result = test_formatter.format_path(test_resource, tmp_path)
- result = result.relative_to(tmp_path)
- assert do_test_path_equality(result.parent, expected)
- assert len(result.parents) == (len(expected.split('/')) + 1)
- @pytest.mark.parametrize(('test_name_string', 'expected'), (
- ('test', 'test'),
- ('😍', '😍'),
- ('test😍', 'test😍'),
- ('test😍 ’', 'test😍 ’'),
- ('test😍 \\u2019', 'test😍 ’'),
- ('Using that real good [1\\4]', 'Using that real good [1\\4]'),
- ))
- def test_preserve_emojis(test_name_string: str, expected: str, submission: MagicMock):
- submission.title = test_name_string
- test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
- result = test_formatter._format_name(submission, '{TITLE}')
- assert do_test_string_equality(result, expected)
- @pytest.mark.parametrize(('test_string', 'expected'), (
- ('test \\u2019', 'test ’'),
- ('My cat\\u2019s paws are so cute', 'My cat’s paws are so cute'),
- ))
- def test_convert_unicode_escapes(test_string: str, expected: str):
- result = FileNameFormatter._convert_unicode_escapes(test_string)
- assert result == expected
- @pytest.mark.parametrize(('test_datetime', 'expected'), (
- (datetime(2020, 1, 1, 8, 0, 0), '2020-01-01T08:00:00'),
- (datetime(2020, 1, 1, 8, 0), '2020-01-01T08:00:00'),
- (datetime(2021, 4, 21, 8, 30, 21), '2021-04-21T08:30:21'),
- ))
- def test_convert_timestamp(test_datetime: datetime, expected: str):
- test_timestamp = test_datetime.timestamp()
- test_formatter = FileNameFormatter('{POSTID}', '', 'ISO')
- result = test_formatter._convert_timestamp(test_timestamp)
- assert result == expected
- @pytest.mark.parametrize(('test_time_format', 'expected'), (
- ('ISO', '2021-05-02T13:33:00'),
- ('%Y_%m', '2021_05'),
- ('%Y-%m-%d', '2021-05-02'),
- ))
- def test_time_string_formats(test_time_format: str, expected: str):
- test_time = datetime(2021, 5, 2, 13, 33)
- test_formatter = FileNameFormatter('{TITLE}', '', test_time_format)
- result = test_formatter._convert_timestamp(test_time.timestamp())
- assert result == expected
- def test_get_max_path_length():
- result = FileNameFormatter.find_max_path_length()
- assert result in (4096, 260, 1024)
- def test_windows_max_path(tmp_path: Path):
- with unittest.mock.patch('platform.system', return_value='Windows'):
- with unittest.mock.patch('bdfr.file_name_formatter.FileNameFormatter.find_max_path_length', return_value=260):
- result = FileNameFormatter.limit_file_name_length('test' * 100, '_1.png', tmp_path)
- assert len(str(result)) <= 260
- assert len(result.name) <= (260 - len(str(tmp_path)))
- @pytest.mark.online
- @pytest.mark.reddit
- @pytest.mark.parametrize(('test_reddit_id', 'test_downloader', 'expected_names'), (
- ('gphmnr', YtdlpFallback, {'He has a lot to say today.mp4'}),
- ('d0oir2', YtdlpFallback, {"Crunk's finest moment. Welcome to the new subreddit!.mp4"}),
- ('jiecu', SelfPost, {'[deleted by user].txt'}),
- ))
- def test_name_submission(
- test_reddit_id: str,
- test_downloader: type(BaseDownloader),
- expected_names: set[str],
- reddit_instance: praw.reddit.Reddit,
- ):
- test_submission = reddit_instance.submission(id=test_reddit_id)
- test_resources = test_downloader(test_submission).find_resources()
- test_formatter = FileNameFormatter('{TITLE}', '', '')
- results = test_formatter.format_resource_paths(test_resources, Path('.'))
- results = set([r[0].name for r in results])
- assert results == expected_names