diff --git a/README.md b/README.md index 902911b..e562c11 100644 --- a/README.md +++ b/README.md @@ -24,12 +24,7 @@ This tool automates the process of downloading photos from Google Photos albums * Be aware of Google’s Terms of Service before using this tool. * It simulates human actions, but Google might not be happy about someone using this. - -## Requirements - -* Python 3.11+ -* Selenium -* Chrome or Chromium + WebDriver (Auto-installed by Selenium if not found) +* Selenium auto-downloads the Chrome driver if not found, which can take up space. ## Installation @@ -37,4 +32,11 @@ This tool automates the process of downloading photos from Google Photos albums ## Usage +### CLI `gp-dl --album-urls ALBUM_URL ALBUM_URL2 --output-dir test --log-level info` + +### As a module +```py +from gp_dl import download_albums +successful_albums, failed_albums, album_times = download_albums(["ALBUM_URL", "ALBUM_URL2"], output_dir="test") +``` diff --git a/gp_dl/__init__.py b/gp_dl/__init__.py index d3ec452..c7132b6 100644 --- a/gp_dl/__init__.py +++ b/gp_dl/__init__.py @@ -1 +1,3 @@ -__version__ = "0.2.0" +from .lib import download_albums + +__version__ = "0.3.0" diff --git a/gp_dl/cli.py b/gp_dl/cli.py new file mode 100644 index 0000000..b88c1a9 --- /dev/null +++ b/gp_dl/cli.py @@ -0,0 +1,66 @@ +import argparse, logging, sys, time +from statistics import median +from .lib import download_albums + +BANNER = """ +██████ ██████ ██████ ██ +██ ██ ██ ██ ██ ██ +██ ███ ██████ █████ ██ ██ ██ +██ ██ ██ ██ ██ ██ +██████ ██ ██████ ███████ + +gp-dl — Google Photos Downloader +Download full-resolution albums from Google Photos using Selenium + +Author: csd4ni3l | GitHub: https://github.com/csd4ni3l +""" + +LOG_LEVELS = { + "DEBUG": logging.DEBUG, + "INFO": logging.INFO, + "ERROR": logging.ERROR, + "FATAL": logging.FATAL, + "QUIET": 999999999 +} + +def parse_cli_args(): + parser = argparse.ArgumentParser(description="Download full-res images from a Google Photos album using Selenium.") + parser.add_argument("--album-urls", nargs="+", required=True, help="Google Photos album URL(s)") + parser.add_argument("--output-dir", required=True, help="The directory to save downloaded albums") + parser.add_argument("--driver-path", default=None, help="Custom Chrome driver path") + parser.add_argument("--profile-dir", default=None, help="A Chrome user data directory for sessions, set this if you want to open non-shared links.") + parser.add_argument("--headless", action="store_true", help="Run Chrome headlessly") + parser.add_argument("--log-level", default="INFO", help="Specifies what to include in log output. Available levels: debug, info, error, fatal") + return parser.parse_args() + +def configure_logging(log_level: str): + if not log_level.upper() in LOG_LEVELS: + print(f"Invalid logging level: {log_level}") + sys.exit(1) + + logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=LOG_LEVELS[log_level.upper()]) + for logger_to_disable in ["selenium", "urllib3"]: + logging.getLogger(logger_to_disable).propagate = False + logging.getLogger(logger_to_disable).disabled = True + +def run_cli(): + args = parse_cli_args() + + if not args.log_level.upper() == "QUIET": + print(BANNER) + + configure_logging(args.log_level) + + all_start = time.perf_counter() + + successful_albums, failed_albums, album_times = download_albums(args.album_urls, args.output_dir, args.driver_path, args.profile_dir, args.headless) + + logging.info("") + logging.info("===== DOWNLOAD STATISTICS =====") + logging.info(f"Total albums given: {len(args.album_urls)}") + logging.info(f"Successful albums ({len(successful_albums)}): {', '.join(successful_albums) or None}") + logging.info(f"Failed albums ({len(failed_albums)}): {', '.join(failed_albums) or 'None'}") + logging.info(f"Median time taken per album: {median(album_times or [0]):.2f} seconds") + logging.info(f"Average time taken per album: {sum(album_times or [0]) / len(album_times or [0]):.2f} seconds") + logging.info(f"Total time taken: {time.perf_counter() - all_start:.2f} seconds") + logging.info("================================") \ No newline at end of file diff --git a/gp_dl/main.py b/gp_dl/lib.py similarity index 52% rename from gp_dl/main.py rename to gp_dl/lib.py index 36c34e2..f1551a0 100644 --- a/gp_dl/main.py +++ b/gp_dl/lib.py @@ -1,43 +1,11 @@ -import os, time, argparse, logging, sys from selenium.webdriver import Chrome, ChromeService from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import TimeoutException from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC -from statistics import median from zipfile import ZipFile - -BANNER = """ -██████ ██████ ██████ ██ -██ ██ ██ ██ ██ ██ -██ ███ ██████ █████ ██ ██ ██ -██ ██ ██ ██ ██ ██ -██████ ██ ██████ ███████ - -gp-dl — Google Photos Downloader -Download full-resolution albums from Google Photos using Selenium - -Author: csd4ni3l | GitHub: https://github.com/csd4ni3l -""" - -LOG_LEVELS = { - "DEBUG": logging.DEBUG, - "INFO": logging.INFO, - "ERROR": logging.ERROR, - "FATAL": logging.FATAL, - "QUIET": 999999999 -} - -def parse_args(): - parser = argparse.ArgumentParser(description="Download full-res images from a Google Photos album using Selenium.") - parser.add_argument("--album-urls", nargs="+", required=True, help="Google Photos album URL(s)") - parser.add_argument("--output-dir", required=True, help="The directory to save downloaded albums") - parser.add_argument("--driver-path", default=None, help="Custom Chrome driver path") - parser.add_argument("--profile-dir", default=None, help="A Chrome user data directory for sessions, set this if you want to open non-shared links.") - parser.add_argument("--headless", action="store_true", help="Run Chrome headlessly") - parser.add_argument("--log-level", default="INFO", help="Specifies what to include in log output. Available levels: debug, info, error, fatal") - return parser.parse_args() +import os, time, logging def setup_driver(driver_path=None, profile_dir=None, headless=True): chrome_options = Options() @@ -71,41 +39,53 @@ def find_crdownload_file(): for file in os.listdir("gp_temp"): if file.endswith(".crdownload"): return file + +def download_albums( + album_urls: list[str], + output_dir: str, + driver_path: str | None = None, + profile_dir: str | None = None, + headless: bool = False, +) -> tuple[list[str], list[str], list[float]]: + """ + 1) Download full-resolution images from one or more Google Photos albums using Selenium. -def configure_logging(log_level: str): - if not log_level.upper() in LOG_LEVELS: - print(f"Invalid logging level: {log_level}") - sys.exit(1) + 2) Return lists of successful and failed album names, as well as download durations. - logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=LOG_LEVELS[log_level.upper()]) - for logger_to_disable in ["selenium", "urllib3"]: - logging.getLogger(logger_to_disable).propagate = False - logging.getLogger(logger_to_disable).disabled = True + :type album_urls: list[str] + :param album_urls: One or more Google Photos album URLs to download images from. -def run_cli(): - args = parse_args() - - if not args.log_level.upper() == "QUIET": - print(BANNER) + :type output_dir: str + :param output_dir: Directory path where the downloaded albums will be saved. - configure_logging(args.log_level) - driver = setup_driver(profile_dir=args.profile_dir, headless=args.headless) + :type driver_path: str | None + :param driver_path: Path to a custom Chrome WebDriver binary. If None, Selenium will download it or choose the default system ChromeDriver. + + :type profile_dir: str | None + :param profile_dir: Path to a Chrome user data directory. Use this to access private albums (non-shared links). + + :type headless: bool + :param headless: Whether to run Chrome in headless mode. Defaults to False. + + :returns: A tuple containing the names of the successful albums, names of the albums that failed to download, and the durations it took to download each album. + :rtype: tuple[list[str], list[str], list[float]] + """ + + driver = setup_driver(driver_path=driver_path, profile_dir=profile_dir, headless=headless) if not os.path.exists("gp_temp") or not os.path.isdir("gp_temp"): logging.info("Creating gp_temp directory to temporarily store the downloaded zip files.") os.makedirs("gp_temp", exist_ok=True) - if not os.path.exists(args.output_dir) or not os.path.isdir(args.output_dir): + if not os.path.exists(output_dir) or not os.path.isdir(output_dir): logging.fatal("Invalid output directory. Please supply a valid and existing directory.") return failed_albums = [] successful_albums = [] - total_albums = len(args.album_urls) - all_start = time.perf_counter() album_times = [] - for album_url in args.album_urls: + for album_url in album_urls: album_start = time.perf_counter() driver.get(album_url) @@ -150,30 +130,22 @@ def run_cli(): zip_file = find_zip_file() time.sleep(0.1) - logging.debug(f"Zip file downloaded, extracting to {args.output_dir}") + logging.debug(f"Zip file downloaded, extracting to {output_dir}") with ZipFile(f"gp_temp/{zip_file}") as opened_file: - opened_file.extractall(args.output_dir) + opened_file.extractall(output_dir) logging.debug("Deleting zip file...") os.remove(f"gp_temp/{zip_file}") - logging.info(f"Succesfully extracted zip file to {args.output_dir}") + logging.info(f"Succesfully extracted zip file to {output_dir}") successful_albums.append(album_title) album_times.append(time.perf_counter() - album_start) + + logging.debug("Removing temporary gp_temp directory.") + os.removedirs("gp_temp") - logging.debug("Removing temporary gp_temp directory.") - os.removedirs("gp_temp") + driver.quit() - logging.info("") - logging.info("===== DOWNLOAD STATISTICS =====") - logging.info(f"Total albums given: {total_albums}") - logging.info(f"Successful albums ({len(successful_albums)}): {', '.join(successful_albums) or None}") - logging.info(f"Failed albums ({len(failed_albums)}): {', '.join(failed_albums) or 'None'}") - logging.info(f"Median time taken per album: {median(album_times or [0]):.2f} seconds") - logging.info(f"Average time taken per album: {sum(album_times or [0]) / len(album_times or [0]):.2f} seconds") - logging.info(f"Total time taken: {time.perf_counter() - all_start:.2f} seconds") - logging.info("================================") - - driver.quit() \ No newline at end of file + return successful_albums, failed_albums, album_times \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e07888c..6eb90f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "gp-dl" -version = "0.2.0" +version = "0.3.0" description = "A Python-based Google Photos downloader built with Selenium." readme = "README.md" requires-python = ">=3.11" @@ -18,7 +18,7 @@ classifiers = [ ] [project.scripts] -gp-dl = "gp_dl.main:run_cli" +gp-dl = "gp_dl.cli:run_cli" [build-system] requires = ["setuptools>=61.0"] diff --git a/uv.lock b/uv.lock index c6f8cf0..9435022 100644 --- a/uv.lock +++ b/uv.lock @@ -39,7 +39,7 @@ wheels = [ [[package]] name = "gp-dl" -version = "0.2.0" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "selenium" },