根据本地txt文件内链接下载数据

前言

填写网站Cookies,指定文件目录save_directory,指定最大线程数max_threads,指定下载链接文件input_file,运行即可。

代码

import os
import requests
from bs4 import BeautifulSoup
import threading
from urllib.parse import urljoin

# 定义请求头部信息
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0',
'Referer': 'https://mineleak.pro/',
'Cookie': '******' # 替换为你的 Cookie
}

# 指定文件保存目录
save_directory = r'S:\Users\26370\Desktop\Down'

# 控制同时下载的线程数量
max_threads = 3

# 创建文件夹(如果不存在)
os.makedirs(save_directory, exist_ok=True)

def download_file(download_url):
try:
# 发送GET请求下载文件
response = requests.get(download_url, headers=headers, timeout=60)

if response.status_code == 200:
# 解析文件名
content_disposition = response.headers.get('Content-Disposition')
if content_disposition:
filename = content_disposition.split('filename=')[1].strip('"')
else:
filename = os.path.basename(urljoin(download_url, '/'))

# 构建完整的文件保存路径
file_path = os.path.join(save_directory, filename)

# 检查文件是否已经存在,如果存在则跳过下载
if os.path.exists(file_path):
print(f"文件 '{filename}' 已存在,跳过下载。")
return

# 保存文件到指定路径
with open(file_path, 'wb') as f:
f.write(response.content)

print(f"文件 '{filename}' 下载完成,保存至 '{file_path}'")
else:
print(f"下载失败,HTTP响应码:{response.status_code}")
except Exception as e:
print(f"下载失败:{str(e)}")

# 从文本文件中读取链接
input_file = 'download_links copy.txt' # 替换为包含链接的文本文件路径

download_links = []
with open(input_file, 'r', encoding='utf-8') as f:
for line in f:
# 添加有效的下载链接到列表
if '/resources/' in line:
download_url = line.strip() + '/download' # 添加/download后缀
download_links.append(download_url)

# 多线程下载文件
threads = []
for download_url in download_links:
# 创建线程并启动下载任务
thread = threading.Thread(target=download_file, args=(download_url,))
threads.append(thread)
thread.start()

# 控制同时运行的线程数量
if len(threads) >= max_threads:
for t in threads:
t.join()
threads = []

# 等待所有线程完成
for t in threads:
t.join()

print("所有文件下载完成。")