feat: Fetch all album from an account

* add user and password parameters (not used if profile_dir is set)
* remove mandatory album_url args
* auto login with user and password
* add locale for FR and GOOGLE_LANG (default en) environment variable to switch
* add CHROME_BINARY whenever need to specify chrome binary path
* add headless compatibilty for WSL
* fix missing gp_temp after first loop
* fix unclickable "More option" button by fetching "Share" button first
This commit is contained in:
Adi3000
2025-08-14 21:31:58 +02:00
parent a79d3c96da
commit 8f0a6909e2
5 changed files with 145 additions and 19 deletions
+116 -14
View File
@@ -4,15 +4,48 @@ from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from zipfile import ZipFile
import os, time, logging
import os, time, logging, json
from pathlib import Path
__driver__ = None
WEB_DRIVER_WAIT = int(os.getenv("WEB_DRIVER_WAIT","10"))
WSL_INSIDE = os.getenv("WSL_INSIDE", False)
CHROME_BINARY = os.getenv("CHROME_BINARY","")
GOOGLE_LANG = os.getenv("GOOGLE_LANG","en")
def load_translation(locale):
file_path = Path(__file__).parent / Path("locales") / f"{locale}.json"
with open(file_path, "r", encoding="utf-8") as f:
return json.load(f)
__labels = load_translation(GOOGLE_LANG)
def get_driver(driver_path=None, profile_dir=None, headless=True):
global __driver__
if __driver__ is None:
logging.info(f"Initialize driver with driver {driver_path} and profile ({profile_dir} (headless={headless}))...")
__driver__ = setup_driver(driver_path, profile_dir, headless)
return __driver__
def reset_driver ():
global __driver__
__driver__ = None
def setup_driver(driver_path=None, profile_dir=None, headless=True):
chrome_options = Options()
if CHROME_BINARY:
logging.info(f"Use binary <{CHROME_BINARY}>")
chrome_options.binary_location = CHROME_BINARY
if profile_dir:
chrome_options.add_argument(f"--user-data-dir={profile_dir}")
if headless:
chrome_options.add_argument("--headless")
if WSL_INSIDE:
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
else:
chrome_options.add_argument("--headless")
prefs = {
"download.prompt_for_download": False,
@@ -39,7 +72,74 @@ def find_crdownload_file():
for file in os.listdir("gp_temp"):
if file.endswith(".crdownload"):
return file
def login(
user: str ,
password: str,
driver_path: str | None = None, headless=True):
driver = get_driver(driver_path=driver_path,headless=headless)
driver.get("https://photos.google.com/login")
usernameFieldPath = "identifierId"
usernameNextButtonPath = "identifierNext"
passwordFieldPath = "Passwd"
passwordNextButtonPath = "passwordNext"
usernameField = WebDriverWait(driver, WEB_DRIVER_WAIT).until(EC.presence_of_element_located((By.ID, usernameFieldPath)))
time.sleep(1)
usernameField.send_keys(user)
usernameNextButton = WebDriverWait(driver, WEB_DRIVER_WAIT).until(EC.presence_of_element_located((By.ID, usernameNextButtonPath)))
usernameNextButton.click()
passwordField = WebDriverWait(driver, WEB_DRIVER_WAIT).until(EC.presence_of_element_located((By.NAME, passwordFieldPath)))
time.sleep(1)
passwordField.send_keys(password)
passwordNextButton = WebDriverWait(driver, WEB_DRIVER_WAIT).until(EC.presence_of_element_located((By.ID, passwordNextButtonPath)))
passwordNextButton.click()
def list_albums(
profile_dir: str | None = None,
user: str | None = None,
password: str | None = None,
driver_path: str | None = None,
headless=True):
driver = get_driver(driver_path=driver_path,headless=headless)
if profile_dir is None:
if user and password:
login(user=user, password=password, driver_path=driver_path, headless=headless)
else:
logging.fatal("Neither profile_dir nor user and password has been defined, cannot fetch your albums.")
return
driver.get("https://photos.google.com/albums")
try:
album_div = WebDriverWait(driver, WEB_DRIVER_WAIT).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[aria-label="{albums}"'.format_map(__labels))))
except TimeoutException:
logging.error("Could not find the '{albums}' section in time.".format_map(__labels))
logging.error(f"Check if GOOGLE_LANG (value={GOOGLE_LANG}, default en) is set to your language and available.")
logging.info("Continuing with next album URL.")
failed_albums.append(album_title)
raise
links = album_div.find_elements(By.TAG_NAME, "a")
album_links = [link.get_attribute("href") for link in links]
return album_links
def download_all_albums(
profile_dir: str | None = None,
user: str | None = None,
password: str | None = None,
driver_path: str | None = None,
headless=True):
album_urls = list_albums(driver_path=driver_path, headless=headless)
download_albums(album_urls, output_dir, driver_path, profile_dir, headless)
def download_albums(
album_urls: list[str],
output_dir: str,
@@ -71,11 +171,7 @@ def download_albums(
:rtype: tuple[list[str], list[str], list[float]]
"""
driver = setup_driver(driver_path=driver_path, profile_dir=profile_dir, headless=headless)
if not os.path.exists("gp_temp") or not os.path.isdir("gp_temp"):
logging.info("Creating gp_temp directory to temporarily store the downloaded zip files.")
os.makedirs("gp_temp", exist_ok=True)
driver = get_driver(driver_path=driver_path, profile_dir=profile_dir, headless=headless)
if not os.path.exists(output_dir) or not os.path.isdir(output_dir):
logging.fatal("Invalid output directory. Please supply a valid and existing directory.")
@@ -88,6 +184,10 @@ def download_albums(
for album_url in album_urls:
album_start = time.perf_counter()
if not os.path.exists("gp_temp") or not os.path.isdir("gp_temp"):
logging.info("Creating gp_temp directory to temporarily store the downloaded zip files.")
os.makedirs("gp_temp", exist_ok=True)
driver.get(album_url)
album_title = driver.title.split(" -")[0]
@@ -96,21 +196,23 @@ def download_albums(
logging.debug("Waiting for menu button...")
try:
menu_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@aria-label="More options"]')))
share_buttons = WebDriverWait(driver, WEB_DRIVER_WAIT).until(EC.element_to_be_clickable((By.XPATH, "//*[@aria-label=\"{share}\"]".format_map(__labels))))
except TimeoutException:
logging.error("Could not find the 'more options' button in time.")
logging.error("Could not find the '{share}' button in time.".format_map(__labels))
logging.error(f"Check if GOOGLE_LANG (value={GOOGLE_LANG}, default en) is set to your language and available.")
logging.info("Continuing with next album URL.")
failed_albums.append(album_title)
continue
logging.debug("Clicking menu button...")
share_buttons.send_keys(Keys.TAB)
menu_button = driver.execute_script("return document.activeElement")
menu_button.click()
logging.debug("Waiting for download all button...")
try:
download_all_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@aria-label="Download all"]')))
download_all_button = WebDriverWait(driver, WEB_DRIVER_WAIT).until(EC.presence_of_element_located((By.XPATH, '//*[@aria-label="{download}"]'.format_map(__labels))))
except TimeoutException:
logging.error("Could not find the 'download all' button in time.")
logging.error("Could not find the '{download}' button in time.".format_map(__labels))
logging.info("Continuing with next album.")
failed_albums.append(album_title)
continue