1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
| import requests from lxml import etree import time from fake_useragent import UserAgent import click import os import sys import threading import queue
def output(): output=os.path.dirname(__file__)+"\\output" if not os.path.exists(output): os.makedirs(output) return output+'\\' class getSub(threading.Thread): def __init__(self,page,site,f): super().__init__() self.page=page self.site=site self.f=f
def run(self): while not self.page.empty(): P=self.page.get() self.getSubdomain(P)
def getSubdomain(self,P):
url="https://www.baidu.com/s?wd=site%3A"+self.site+"&pn="+str(P) ua=UserAgent().random header={ 'User-Agent':ua } session=requests.Session() html=session.get(url,headers=header) html.encoding='utf-8' tree=etree.HTML(html.text) list=tree.xpath('//h3/a/@href') title=tree.xpath('//h3/a') a=0 for i in list: sub=requests.get(i,headers=header).url time.sleep(1) print("%s%s :\t\t%s"%(" ",sub.split('//')[1].split('/')[0],title[a].text),file=self.f) print("%s%s :\t\t%s"%(" ",sub.split('//')[1].split('/')[0],title[a].text),file=sys.stdout) a+=1
@click.command() @click.option('-d',default='default',help='the target domain')
def main(d): if(d=='default'): use=''' use 'getSubdomain.py --help' to get more help ''' print(use) else: page=queue.Queue() for i in range(0,40,10): page.put(i) threads=[] threadCount=3 f=open(output()+d+'.txt','a',encoding='utf-8') for i in range(threadCount): threads.append(getSub(page,d,f)) for t in threads: t.start() for t in threads: t.join() print("完成!文件已存入%s"%'output\\'+d+'.txt')
if __name__=='__main__': main()
|