批量快速获取Title、server信息工具

使用方法:将需要获取的网站链接列表保存在工具目录下的1.txt中,运行脚本。此脚本会时时输出在控制台中,运行完毕后所有信息全部输入到本地生成的output.xls文件中。如果有建议可以评论提交本人修改。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
'''
title快速获取工具
保存需要获取的ip到本地txt文件直接运行即可
'''
import requests
import threading
import re, xlwt, random

#proxies = {'http': 'http://127.0.0.1:8080'}
ThreadList = [] # 线程列表
runList = [] # 运行线程列表
FileList = [] # 网站链接列表
OutList = [] # 输出数据列表
lock = threading.Lock()

# 随机agent
def requests_headers():
user_agent = [
'Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1 ; x64; en-US; rv:1.9.1b2pre) Gecko/20081026 Firefox/3.1b2pre',
'Opera/10.60 (Windows NT 5.1; U; zh-cn) Presto/2.6.30 Version/10.60',
'Opera/8.01 (J2ME/MIDP; Opera Mini/2.0.4062; en; U; ssr)',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; ; rv:1.9.0.14) Gecko/2009082707 Firefox/3.0.14',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; fr; rv:1.9.2.4) Gecko/20100523 Firefox/3.6.4 ( .NET CLR 3.5.30729)',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; fr-FR) AppleWebKit/528.16 (KHTML, like Gecko) Version/4.0 Safari/528.16',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; fr-FR) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5'
]
UA = random.choice(user_agent)
headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'User-Agent':UA,
'Upgrade-Insecure-Requests':'1',
'Connection':'keep-alive',
'Cache-Control':'max-age=0',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'zh-CN,zh;q=0.8',
"Referer": "http://www.baidu.com/link?url=www.so.com&url=www.soso.com&&url=www.sogou.com"
}
return headers

# 多线程获取页面信息
class MyThread(threading.Thread):
def __init__(self, ip):
threading.Thread.__init__(self)
self.ip = ip

def run(self):
if self.ip[0:4] != 'http':
self.ip = 'http://' + self.ip
try:
headers = requests_headers()
r = requests.get(url=self.ip, headers=headers ,timeout=5, proxies=False)
if r.text.find('charset=utf-8') >= 0:
r.encoding = 'utf-8'
title = re.findall(r'<title>(.*?)</title>', r.text)[0]
lock.acquire(timeout=1)
print([self.ip,title,r.headers['Server']])
OutList.append([self.ip, title ,r.headers['Server']])
lock.release()
except:
pass


def OpenFile():
f = open('1.txt','r')
for i in f.readlines():
FileList.append(i.strip())
f.close()
GetInfo(FileList)

def GetInfo(FileList):
taskNum = 0
for i in FileList:
t = MyThread(i)
ThreadList.append(t)
for t in ThreadList:
t.setDaemon(True)
t.start()
runList.append(t)
taskNum += 1
while taskNum > 19:
for t in runList:
if t.isAlive() == False:
runList.remove(t)
taskNum -= 1
OutPut(OutList)

def OutPut(OutList):
workbook = xlwt.Workbook(encoding='utf-8')
worksheet = workbook.add_sheet('result')
for i in range(0,3):
worksheet.col(i).width = 10000
worksheet.write(0,0, label='网站链接')
worksheet.write(0,1, label='网站标题')
worksheet.write(0,2, label='Server信息')
#worksheet.write(0,3, label='Web指纹')
for i in range(1,len(OutList)):
output = OutList[i]
worksheet.write(i,0, label=output[0])
worksheet.write(i,1, label=output[1])
worksheet.write(i,2, label=output[2])
workbook.save('output.xls')

def main():
OpenFile()

if "__main__" == __name__:
main()