# 网络爬虫
# 1、模拟登录
提示
需要使用chromedriver插件,与google浏览器版本相符 点击此处下载
# -*- coding: UTF-8 -*-
import requests
import sys
import io
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') #改变标准输出的默认编码
#建立Phantomjs浏览器对象,括号里是phantomjs.exe在你的电脑上的路径
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
# browser = webdriver.chrome('/Users/liuhuan/opt/phantomjs/bin/phantomjs')
browser = webdriver.Chrome(executable_path='/Users/liuhuan/opt/chromedriver', chrome_options=chrome_options)
#登录页面
url = r'http://vss.bhgmarketplace.com/Account/Login.aspx'
# 访问登录页面
browser.get(url)
# 等待一定时间,让js脚本加载完毕
browser.implicitly_wait(3)
#输入用户名
username = browser.find_element_by_name('ctl00$MainContent$LoginUser$UserName')
username.send_keys('c10000299')
#输入密码
password = browser.find_element_by_name('ctl00$MainContent$LoginUser$Password')
password.send_keys('zhsx')
#选择“学生”单选按钮
# student = browser.find_element_by_xpath('//input')
# student.click()
#点击“登录”按钮
login_button = browser.find_element_by_name('ctl00$MainContent$LoginUser$LoginButton')
login_button.click()
#网页截图
# browser.save_screenshot('picture1.png')
# print('输出结果')
#打印网页源代码
# print(browser.page_source.encode('utf-8').decode())
#
# browser.quit()
# browser = webdriver.Chrome(executable_path='/Users/liuhuan/opt/chromedriver', chrome_options=chrome_options)
browser.get('http://vss.bhgmarketplace.com/Order/print2.aspx')
browser.implicitly_wait(3)
btnSearch=browser.find_element_by_name('ctl00$MainContent$btnQueryyes')
btnSearch.click()
html =browser.page_source.encode('utf-8').decode()
browser.quit()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# 2、解析html-table数据
#解析table数据
from bs4 import BeautifulSoup
soup = BeautifulSoup(html,'html.parser')
from lxml.html import parse
table = soup.find_all(attrs={'class':'vssgridview'})
for item in table[0].select('tr'):
for item1 in item.select('td'):
if len(item1.select('input'))>0:
print(item1.select('input')[0].attrs['name'])
print(item1.text)
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12