验证代理可用性

你知唔知我系靓仔

验证代理可用性

因为爬取的代理的可用性较低

大概都在几分钟,所以需要在使用前进行验证

输出格式为

socks4://1.1.1.1:1080|VN|1.1.1.1
socks5://1.1.1.1:7891|CN|1.1.1.1
http://1.1.1.1:4003|VN|1.1.1.1

支持http/https/socks4/socks5

import requests
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# 增加连接超时和请求超时时间
TIMEOUT = 15

def validate_proxy(proxy):
    proxies = {"http": proxy, "https": proxy}
    try:
        response = requests.get("https://ipinfo.io/", proxies=proxies, timeout=TIMEOUT)
        response.raise_for_status()
        ip_info = response.json()
        country_code = ip_info.get("country", "N/A")
        ip_address = ip_info.get("ip", "N/A")
        return f"{proxy}|{country_code}|{ip_address}"
    except requests.RequestException:
        return None

def validate_proxies(proxies):
    validated_proxies = []
    with ThreadPoolExecutor(max_workers=50) as executor:
        futures = [executor.submit(validate_proxy, proxy.strip()) for proxy in proxies]
        for future in tqdm(futures, total=len(proxies), desc="Validating Proxies", unit="proxy"):
            result = future.result()
            if result:
                validated_proxies.append(result)
                # 实时保存验证成功的代理信息到ok.txt
                with open("ok.txt", "a") as file:
                    file.write(result + "\n")
    return validated_proxies

def main():
    # 读取proxy.txt并去重
    with open("proxy.txt", "r") as file:
        proxies = list(set(file.readlines()))

    validated_proxies = validate_proxies(proxies)

if __name__ == "__main__":
    main()