菜鸟IT的博客 >> Python
获取重定向之后的Url,然后再爬取数据。
# 先把查询单号进行强制大写
TrackingNo_of_JT_INT=str(TrackingNo_X).upper()
# ——————————————————————
Url_Cookies2 = "http://www.kichisen-exp.com.cn/CenterWeb/trackingInfo.asp"
Cookies_Post = C18_Class_Headers_Make.Headers_Make().Cookie_for_Headers_No_JiaMi_Post(Url_Post=Url_Cookies2, TimeoutN=6)
print("获取Cookies【2】:", Cookies_Post)
# 获取到的cookies样本:
# ASPSESSIONIDSSTDTCRD=EPCBOFPDEEAACJLLOADPDLCK;
Url_Cookies1 = "http://www.kichisen-exp.com.cn/CenterWeb/StepPage2.asp?Source=" + TrackingNo_X + "&Key="
Cookies_Get = C18_Class_Headers_Make.Headers_Make().Cookie_for_Headers_No_JiaMi_Get(Url_Get=Url_Cookies1, TimeoutN=6)
print("获取Cookies【1】:", Cookies_Get)
Headers_1 = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
# 'Connection': 'keep-alive',
'Connection': 'close',
# 'Content-Length': '22', # 这个不用加。
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': Cookies_Post,
'Host': 'www.kichisen-exp.com.cn',
'Origin': 'http://www.kichisen-exp.com.cn',
'Referer': 'http://www.kichisen-exp.com.cn/CenterWeb/index.asp',
'Upgrade-Insecure-Requests': '1',
'User-Agent': C3_Class_UserAgentRandom.UserAgentRandom().GetUserAgent_by_random(),
}
Req_1 = requests.get(url=Url_Cookies1, headers=Headers_1, timeout=10, verify=False)
# 设定网页编码,避免出现乱码
Req_1.encoding = "utf-8"
# 获取响应状态码
print("●--------√(1)--------获取响应状态码:", Req_1.status_code)
Url_2 = Req_1.url
print("●--------√(2)--------获取重定向之后的Url:", Url_2)
Req_2 = requests.get(url=Url_2, headers=Headers_1, timeout=10, verify=False)
# 设定网页编码,避免出现乱码
Req_2.encoding = "utf-8"
# 使用BeautiulSoup构造1个对象
Soup_1 = BeautifulSoup(Req_2.text, "lxml")
print("●--------√(3)--------获取重定向之后的Url的页面源码:", Soup_1)
Soup_Table_1=Soup_1.find_all("table")
print("●--------√(4)--------提取table代码的捕获结果:",Soup_Table_1)
菜鸟IT博客[2023.01.06-21:52] 访问:401