分类 Python 下的文章 - youto`blog

标签搜索

Acha

累计撰写 77 篇文章
累计收到 1 条评论

搜索到 7 篇与的结果

2021-03-15
模拟登录古诗文网 """ 作者：acha 时间：2021-2-16 功能：模拟登录古诗文网 """ import requests from lxml import etree from 爬虫.chaojiying_Python.chaojiying import Chaojiying_Client # 请求头 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36" } # 创建好session对象 sess = requests.Session() # 处理动态变化的请求参数 # 1.解析出本次登录页面对应的验证码图片地址 login_url = 'https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx' page_text = sess.get(url=login_url, headers=headers).text tree = etree.HTML(page_text) # 解析出了验证码图片的地址 img_path = 'https://so.gushiwen.org' + tree.xpath('//*[@id="imgCode"]/@src')[0] img_data = sess.get(url=img_path, headers=headers).content # 请求到了图片数据 # 将图片保存到本地存储 with open('./code.jpg', 'wb') as fp: fp.write(img_data) # 将动态变化的请求参数从页面源码中解析出来 __VIEWSTATE = tree.xpath('//*[@id="__VIEWSTATE"]/@value')[0] __VIEWSTATEGENERATOR = tree.xpath('//*[@id="__VIEWSTATEGENERATOR"]/@value')[0] # 识别验证码 def imgcode(file_path): chaojiying = Chaojiying_Client('用户名', '密码', '软件ID') im = open(file_path, 'rb').read() code = (chaojiying.PostPic(im, 1004)['pic_str']) print(code) return code # 获取验证码 code_result = imgcode('code.jpg') # 古诗文网 url post_url = 'https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx' # 动态参数 data = { "__VIEWSTATE": "lG3WvIKiDx5sEmj8IeYT6LmF1reN9ep/Q2b+U7W2RCMdA2JF5F9NRvaEfIepywyrCTFVIlRHGtorc6dkttOZ0GCzQsQPFdpLeB2kDD6J+vXb/BvqhxWtwSJ+02I=", "__VIEWSTATEGENERATOR": "C93BE1AE", "from: http": "//so.gushiwen.cn/user/collect.aspx", "email": "wz.0527@qq.com", "pwd": "qwerqwer", "code": code_result, "denglu": "登录", } # 模拟登录的请求 response = sess.post(url=post_url, headers=headers, data=data) # 登录成功后页面的源码数据 page_text = response.text # 保存网页 with open('gushiwen.html', 'w', encoding='utf-8') as fp: fp.write(page_text)
- 2021年03月15日
- 602 阅读
- 0 评论
- 3 点赞
2021-03-15
查询化妆品许可证信息 """ 作者：acha 时间：2021-2-15 功能：查询化妆品许可证信息 """ import requests # 药监局许可证信息数据列表 URL url = 'http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList' # 药监局许可证具体数据列表 URL url_id = 'http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsById' # 请求头 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64)" " AppleWebKit/537.36 (KHTML, like Gecko)" " Chrome/87.0.4280.141" " Safari/537.36" } # 获取网页 json 数据 def get_page_text(c_url, c_data): # 请求体 resqonse = requests.post(url=c_url, headers=headers, data=c_data) # 获取数据 page_text = resqonse.json() # 返回数据 json return page_text for i in range(10): # 动态参数需要处理变量页码 data = { "on": " true", "page": str(i), "pageSize": " 15", "productName": " ", "conditionType": "1", "applyname": " ", "applysn": " " } # 获取企业id page_id = get_page_text(url, data) # 生成 id 列表 lst_id = page_id['list'] # 输出页面码 print(data['page']) # 遍历 id列表返回许可证详细信息 for row in lst_id: # 动态参数企业id data_id = {'id': row['ID']} # 获取许可证详细 result = get_page_text(url_id, data_id) # 打印获取信息 print(result)
- 2021年03月15日
- 562 阅读
- 0 评论
- 2 点赞

1
2

Acha

77 文章数

1 评论量

人生倒计时

标签云