一、功能描述
前提条件:pip3 install beautifulsoup4 requests argparse
使用 Python 执行脚本,传入两个参数,第一个参数为番号(不要求大小写),第二个参数为 代理地址(missav被墙了,必须通过代理访问)。
二、源码
import sys
import argparse
import requests
from bs4 import BeautifulSoup
def get_video_url(av_code, proxy_url):
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
        'Content-Type': 'application/json'
    }
    # 设置代理
    proxies = {
        "http": proxy_url,
        "https": proxy_url
    }
    # 转换为小写
    av_code = av_code.lower()
    url = f"https://missav.com/cn/search/{av_code}"
    response = requests.get(url, proxies=proxies, headers=header)
    html_content = response.text
    # 解析HTML
    soup = BeautifulSoup(html_content, 'html.parser')
    # 去重集合
    unique_links = set()
    # 提取所有 a 标签,筛选出 alt 属性包含avcode的链接
    for a_tag in soup.find_all('a'):
        alt_text = a_tag.get('alt')
        if alt_text and av_code in alt_text:
            unique_links.add(a_tag.get('href'))
    links = list(unique_links)
    return links
def get_magnet_links(link, proxy_url):
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
        'Content-Type': 'application/json'
    }
    # 设置代理
    proxies = {
        "http": proxy_url,
        "https": proxy_url
    }
    response = requests.get(link, proxies=proxies, headers=header)
    html_content = response.text
    # 创建 BeautifulSoup 对象
    soup = BeautifulSoup(html_content, 'html.parser')
    # 提取指定class的table
    target_table = soup.find('table', class_='min-w-full')
    # 检查是否找到了目标表格
    if target_table is not None:
        rows = target_table.find_all('tr')
        # 遍历每一行
        for row in rows:
            # 提取该行内的所有<td>标签
            cols = row.find_all('td')
            # 存储该行的数据
            data = []
            # 遍历每一列
            for col in cols:
                # 查找带有rel="nofollow"的<a>标签
                links = col.find_all('a', rel='nofollow')
                # 如果找到带有rel="nofollow"的<a>标签,则提取href属性
                if links:
                    for link in links:
                        href = link['href']
                        if "keepshare.org" not in href:
                            data.append(href)
                # 同时提取文本内容
                text = col.get_text(strip=True)
                if text != "下载" and "keepshare.org" not in text:
                    data.append(text)
            # 打印数据
            print(data)
    else:
        print("未找到包含磁力链接的表格")
def main(av_code, proxy_url):
    links = get_video_url(av_code, proxy_url)
    for link in links:
        get_magnet_links(link, proxy_url)
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="MissAV Spider Script")
    parser.add_argument("av_code", help="The AV code to search for.")
    parser.add_argument("proxy_url", help="The URL of the proxy server.")
    args = parser.parse_args()
    main(args.av_code, args.proxy_url)三、效果截图

四、可执行文件
Windows:
                            
                            
3 条评论
使用了代理,也加了请求头,missav报403错误.访问其他外网网站也没问题
这个和代理的ip有关系,missav前面套了cf,有部分ip是通过不了cf的反爬策略的,可以直接用我的api
使用了代理,也加了请求头,missav报403错误.访问其他外网网站也没问题