|
- # -*- coding: UTF-8 -*-"""
- import requests
- import tqdm
-
-
- def configs(search, page, number):
- """
- :param search:
- :param page:
- :param number:
- :return:
- """
- url = 'https://image.baidu.com/search/acjson'
- params = {
- "tn": "resultjson_com",
- "logid": "11555092689241190059",
- "ipn": "rj",
- "ct": "201326592",
- "is": "",
- "fp": "result",
- "queryWord": search,
- "cl": "2",
- "lm": "-1",
- "ie": "utf-8",
- "oe": "utf-8",
- "adpicid": "",
- "st": "-1",
- "z": "",
- "ic": "0",
- "hd": "",
- "latest": "",
- "copyright": "",
- "word": search,
- "s": "",
- "se": "",
- "tab": "",
- "width": "",
- "height": "",
- "face": "0",
- "istype": "2",
- "qc": "",
- "nc": "1",
- "fr": "",
- "expermode": "",
- "force": "",
- "pn": str(60 * page),
- "rn": number,
- "gsm": "1e",
- "1617626956685": ""
- }
- return url, params
-
-
- def loadpic(number, page):
- """
- :param number:
- :param page:
- :return:
- """
- bar = tqdm.tqdm(total=number)
- while (True):
- if number == 0:
- break
- url, params = configs(search, page, number)
- response = requests.get(url, headers=header, params=params)
- try:
- result = response.json()
- except:
- response.content.decode('gbk')
- result = response.json()
- url_list = []
- for data in result['data'][:-1]:
- url_list.append(data['thumbURL'])
- for i in range(len(url_list)):
- getImg(url_list[i], 60 * page + i, path)
- bar.update(1)
- number -= 1
- if number == 0:
- break
- page += 1
- print("\nfinish!")
-
-
- def getImg(url, idx, path):
- """
- :param url: 网站
- :param idx: 图片编号
- :param path: 图片文件夹路径
- """
- img = requests.get(url, headers=header)
- path = path + '/img_' + str(idx + 1) + '.jpg'
- file = open(path, 'wb')
- file.write(img.content)
- file.close()
- # print("save:", path)
-
-
- if __name__ == '__main__':
- # search = input("请输入搜索内容:")
- # number = int(input("请输入需求数量:"))
- search = '戴口罩侧脸'
- number = 2000
- path = './爬取戴口罩'
- header = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'}
- # 爬取的起始页面
- page = 0
- loadpic(number, page)
-
-
|