帮我生成一个根据用户输入问题在下列网站进行类案检索:https://law.wkinfo.com.cn/ ;https://cn.bing.com/ 找出类似案例的python代码
下面给出一份可直接运行的最小可行示例(Python 3.8+)。
它支持两种方式:
requests-html 的浏览器模式自动渲染页面并抓前 10 条结果。运行前请确保:
pip install requests-html requests beautifulsoup4
如有需要,再装 lxml:pip install lxml
case_search.py)import re
import sys
import json
import time
import requests
from bs4 import BeautifulSoup
from requests_html import HTMLSession
from urllib.parse import quote_plus
# ---------- 工具 ----------
def print_banner():
line = "=" * 60
print(line)
print("类案检索小工具 v0.1")
print("支持威科先行(需 Cookie)与 Bing 国内版")
print(line)
# ---------- 威科先行 ----------
WK_URL = "https://law.wkinfo.com.cn/search/searchList"
HEADERS_WK = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/124.0 Safari/537.36",
}
def search_wk(query: str, cookie: str, top_k: int = 10):
"""query: 用户问题;cookie: 登录后复制整段 Cookie;top_k: 取前几条"""
HEADERS_WK["Cookie"] = cookie.strip()
params = {
"keyword": query,
"openSearch": "true",
"sortType": "date_desc",
"isAdv": "false",
"group": "case", # 仅查案例
"pageSize": top_k,
"pageIndex": 1,
}
resp = requests.get(WK_URL, headers=HEADERS_WK, params=params, timeout=10)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "lxml")
items = []
for box in soup.select("div.result-list div.search-result-item"):
title_tag = box.select_one("a.title-link")
if not title_tag:
continue
title = title_tag.get_text(strip=True)
link = "https://law.wkinfo.com.cn" + title_tag["href"]
items.append({"title": title, "url": link})
return items
# ---------- Bing 国内版 ----------
BING_URL = "https://cn.bing.com/search"
def search_bing(query: str, top_k: int = 10):
"""Bing 可能返回 JS 渲染的页面,用 requests-html 的浏览器模式"""
session = HTMLSession()
# 为了更像真实用户,把查询词用引号包起来
keyword = quote_plus(f'"{query}" 案例 判决')
url = f"{BING_URL}?q={keyword}&ensearch=0&FORM=QBLH"
r = session.get(url, timeout=15)
# 等待 JS 渲染
r.html.render(timeout=15)
items = []
for li in r.html.find("li.b_algo", limit=top_k):
a = li.find("h2 a", first=True)
if not a:
continue
title = a.text
href = a.attrs.get("href")
if href:
items.append({"title": title, "url": href})
return items
# ---------- CLI ----------
def main():
print_banner()
query = input("请输入待检索的问题:").strip()
if not query:
print("问题为空,程序结束")
sys.exit(0)
print("\n【1】Bing 国内版检索……")
try:
bing_results = search_bing(query)
except Exception as e:
print("Bing 抓取失败:", e)
bing_results = []
print("\n【2】威科先行检索……")
cookie = input("请粘贴威科登录后的 Cookie(整段复制即可,若有隐私顾虑可留空跳过):").strip()
wk_results = []
if cookie:
try:
wk_results = search_wk(query, cookie)
except Exception as e:
print("威科抓取失败:", e)
# 汇总输出
print("\n===== Bing 结果 =====")
for idx, it in enumerate(bing_results, 1):
print(f"{idx}. {it['title']}\n {it['url']}\n")
print("===== 威科先行结果 =====")
for idx, it in enumerate(wk_results, 1):
print(f"{idx}. {it['title']}\n {it['url']}\n")
# 可选:保存 JSON
save = input("是否保存为 result.json?(y/n): ").strip().lower()
if save == "y":
with open("result.json", "w", encoding="utf-8") as f:
json.dump({"bing": bing_results, "wk": wk_results}, f, ensure_ascii=False, indent=2)
print("已保存至 result.json")
if __name__ == "__main__":
main()
python case_search.py
Cookie: 整段内容(很长,通常包含 SESSION, TOKEN 等)。requests-html 会自动下载 ~100 MB 的 Chromium;之后即可离线使用。