#代码

使用Python 爬取B站UP相册原图
# -*- coding: utf-8 -*-
import os
import requests
import json
import time
import logging
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# 禁用警告
requests.packages.urllib3.disable_warnings()

# 创建一个会话对象,并设置重试策略
session = requests.Session()
retry = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504])
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)

def get_headers():
    return {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Referer": "https://space.bilibili.com/",
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
        "Origin": "https://space.bilibili.com"
    }

def get_urls(uid, page=0):
    pic_list = []
    while True:
        url = f"https://api.vc.bilibili.com/link_draw/v1/doc/doc_list?uid={uid}&page_num={page}&page_size=30&biz=all"
        try:
            response = session.get(url, headers=get_headers(), verify=False, timeout=10)
            response.raise_for_status()  # 如果响应状态不是200,将引发HTTPError异常

            dic = response.json()
            logger.debug(f"API Response: {dic}")

            if not dic.get("data"):
                logger.warning(f"No 'data' key in response for page {page}")
                break

            items = dic["data"].get("items", [])
            if not items:
                logger.info(f"No more items found after page {page}")
                break

            for item in items:
                pictures = item.get("pictures", [])
                if pictures:
                    pic_list.append(pictures[0].get("img_src"))
                    logger.info(f"Found image: {pic_list[-1]}")

            page += 1
            time.sleep(2)  # 添加延迟以避免请求过于频繁

        except requests.RequestException as e:
            logger.error(f"Request failed: {e}")
            break
        except json.JSONDecodeError:
            logger.error("Failed to parse JSON response")
            break
        except Exception as e:
            logger.error(f"Unexpected error: {e}")
            break

    return pic_list

def save_pic(pic_list, file_path="./Images"):
    if not os.path.exists(file_path):
        os.makedirs(file_path)
        logger.info(f"Created directory: {file_path}")

    for i, pic_url in enumerate(pic_list, 1):
        try:
            response = session.get(pic_url, headers=get_headers(), verify=False, timeout=10)
            response.raise_for_status()

            file_name = f"{i}.{pic_url.split('.')[-1]}"
            file_path_full = os.path.join(file_path, file_name)

            with open(file_path_full, "wb") as f:
                f.write(response.content)

            logger.info(f"Downloaded: {file_name}")
            time.sleep(1)  # 添加短暂延迟

        except requests.RequestException as e:
            logger.error(f"Failed to download {pic_url}: {e}")
        except Exception as e:
            logger.error(f"Unexpected error while saving {pic_url}: {e}")

def main():
    uid = 610540264  # 用户id,up主空间url中的最后一串数字
    logger.info(f"Starting to fetch images for UID: {uid}")

    pic_list = get_urls(uid)
    logger.info(f"Total images found: {len(pic_list)}")

    if pic_list:
        save_pic(pic_list)
        logger.info("Download completed")
    else:
        logger.warning("No images found to download")

if __name__ == "__main__":
    main()


via Memos
 
 
Back to Top