之前一直使用的聚磁力最近开始收费了,当然也不贵,而且还挺好用的,支持的源也很多。
但是因为用的比较少,收费模式是包月的,所以就想找个替代品了,网上搜了下没找到其他的。
于是写了个爬虫的,跑起来没问题。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| import requests from bs4 import BeautifulSoup
class MagnetApi: def __init__(self, keyword,ilogger): self.logger = ilogger self.keyword = keyword self.url = "https://clzz1053.buzz/search-" + keyword + "-1-0-1.html" self.data_list = []
@staticmethod def get_size(string): if " GB" in string: return string.replace(" GB", "") if " MB" in string: return "{:.2f}".format(float(string.replace(" MB", "")) / 1024) return string
def scrape_data(self): response = requests.get(self.url) html_content = response.text soup = BeautifulSoup(html_content, 'html.parser') data_elements = soup.find_all('div', class_='ssbox') for element in data_elements: magnet_link = element.find('a', href=lambda href: href.startswith('magnet:?xt=urn:btih:')).get('href') size_element = element.find('span', class_='lightColor') size = size_element.text.strip() if size_element else "" name = element.find('a').text.strip()
data = { "magnet": magnet_link, "size": size, "name": name } self.data_list.append(data) self.data_list = sorted(self.data_list, key=lambda x: float(self.get_size(x['size'])), reverse=True) self.logger.info("Scraped data for keyword: " + self.keyword+ " and got: " + str(len(self.data_list)) + " results")
|