Python selenium模擬網頁點擊爬蟲交管12123違章數據
在上一篇文章《Python教程—模擬網頁點擊爬蟲定位系統》講解怎麼通過模擬點擊方式爬取車輛定位數據,本次介紹怎麼以模擬點擊方式進入交管12123爬取車輛違章數據,本文直接講解過程,使用的命令解釋見上一篇文章。本文同《Python教程—模擬網頁點擊爬蟲定位系統》同樣為企業中實際的爬蟲案例,如果之後想進入車企行業可以做個瞭解。
準備工具:spyder、selenium庫、google瀏覽器及對應版本的chromedriver.exe
效果
註:分享此案例目的是為瞭幫助同行解放雙手,更好管理企業資產,本文程序以刪除網址、賬號密碼,該網址比較麻煩的一點是開始點擊登錄的時候網頁可能會有其他彈窗出現,使得原有路徑改變,程序會因為找不到對應路徑而報錯,重新執行程序即可。除瞭模擬點擊登錄,還可以直接通過Cookie直接登錄網頁,這種方式就可以繞過登錄的繁瑣步驟。
調用庫
from selenium import webdriver import time import csv import datetime from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait import math import xlrd
讀取需要查詢的車牌號
data = xlrd.open_workbook('cheliang.xlsx')
創建瀏覽,打開網頁
opt = webdriver.ChromeOptions() #創建瀏覽 #opt.set_headless() #無窗口模式 driver = webdriver.Chrome(options=opt) #創建瀏覽器對象 driver.maximize_window() #最大化窗口 print("正在打開網頁") driver.get('') #打開網頁
依次點擊單位登錄、輸入賬號、密碼、點擊驗證碼填寫區域觸發圖片、勾選、輸入驗證碼、點擊登錄
time.sleep(3) #加載等待 print("點擊單位登錄") time.sleep(3) #加載等待 driver.find_element_by_xpath("/html/body/div[1]/div[2]/div/div[2]/div[2]/button").click()#點擊單位登錄 time.sleep(3) #加載等待 print("正在填寫賬號") elem = driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[1]/div/input") # 清空原有內容 elem.clear() # 填入賬號 elem.send_keys("") time.sleep(1) #加載等待 print("正在填寫密碼") elem = driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[2]/div/input") # 清空原有內容 elem.clear() # 填入密碼 elem.send_keys("") time.sleep(1) #加載等待 print("正在查看驗證碼") driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[3]/div/input").click()#查看驗證碼 print("請輸入驗證碼") yanzhengma=input() time.sleep(1) #加載等待 driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[4]/div/label/input").click()#勾選 time.sleep(1) #加載等待 # 填入驗證碼 elem = driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[3]/div/input") elem.clear() elem.send_keys(str(yanzhengma)) time.sleep(1) #加載等待 print("正在登陸") driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[5]/button").click()#點擊
點擊違法查詢,設置查詢時間
driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[5]/button").click()#點擊 time.sleep(3) #加載等待 driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/ul/li[5]/a").click()#點擊違法查詢 time.sleep(1) #加載等待 driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div[1]/div[2]/form/div[1]/div/div[1]/span/i").click()#點擊選擇日期 for i in range(3): time.sleep(0.5) #加載等待 driver.find_element_by_xpath("/html/body/div[6]/div[4]/table/thead/tr/th[1]/i").click()#點擊 time.sleep(0.5) #加載等待 driver.find_element_by_xpath("/html/body/div[6]/div[4]/table/tbody/tr/td/span[1]").click()#點擊 time.sleep(0.5) #加載等待 driver.find_element_by_xpath("/html/body/div[6]/div[3]/table/tbody/tr[2]/td[1]").click()#點擊
循環依次查詢每個車牌違章信息,每次都需要清空上次輸入,填寫本次查詢車牌,識別有多少條數據,共多少頁,每頁最多展示10條,最後一頁有多少條數據
for ii in range(0,nrows): rowValues= table.row_values(ii) #某一行數據 print('正在讀取第'+str(ii+1)+'輛車') # 填寫車牌 time.sleep(0.5) #加載等待 elem = driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div[1]/div[2]/form/div[3]/div/input") elem.clear() elem.send_keys(rowValues)#輸入車牌 time.sleep(0.1) #加載等待 driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div[1]/div[2]/form/div[4]/button").click()#點擊查詢 time.sleep(0.5) #加載等待 result=driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div[2]/div[1]/div/p/span").text#總違章條數 result=int(result) a=math.ceil(result/10)#總頁數 b=result%10 #除餘
讀取列表中的數據,其中扣分和罰款需要點擊”查看詳情”,從彈窗中讀取數據
result1=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[1]"))).text result2=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[2]"))).text result3=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[3]"))).text result4=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[4]"))).text result5=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[5]"))).text result6=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[6]"))).text result7=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[7]"))).text WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[8]/a"))).click()#查看詳情,打開彈窗 time.sleep(1) #加載等待 result8=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//form[@class='form-horizontal']/div[7]/span[2]"))).text result9=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//form[@class='form-horizontal']/div[8]/span[2]"))).text result=[result1,result2,result3,result4,result5,result6,result7,result8,result9] R.append(result) WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='modal-footer ui_modal']/button"))).click()#關閉彈窗 time.sleep(0.5) #加載等待
每讀取一輛車的數據就寫入表格中
with open(wenjian,'w',encoding='utf-8',newline='') as fp: writer = csv.writer(fp) writer.writerows(R) #寫入數據
完整代碼
from selenium import webdriver import time import csv import datetime from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait import math import xlrd data = xlrd.open_workbook('cheliang.xlsx') table = data.sheets()[0] nrows = table.nrows #行數 ncols = table.ncols #列數 opt = webdriver.ChromeOptions() #創建瀏覽 #opt.set_headless() #無窗口模式 driver = webdriver.Chrome(options=opt) #創建瀏覽器對象 driver.maximize_window() #最大化窗口 print("正在打開網頁") driver.get('') #打開網頁 time.sleep(3) #加載等待 print("點擊單位登錄") time.sleep(3) #加載等待 driver.find_element_by_xpath("/html/body/div[1]/div[2]/div/div[2]/div[2]/button").click()#點擊單位登錄 time.sleep(3) #加載等待 print("正在填寫賬號") elem = driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[1]/div/input") # 清空原有內容 elem.clear() # 填入賬號 elem.send_keys("") time.sleep(1) #加載等待 print("正在填寫密碼") elem = driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[2]/div/input") # 清空原有內容 elem.clear() # 填入密碼 elem.send_keys("") time.sleep(1) #加載等待 print("正在查看驗證碼") driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[3]/div/input").click()#查看驗證碼 print("請輸入驗證碼") yanzhengma=input() time.sleep(1) #加載等待 driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[4]/div/label/input").click()#勾選 time.sleep(1) #加載等待 # 填入驗證碼 elem = driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[3]/div/input") elem.clear() elem.send_keys(str(yanzhengma)) time.sleep(1) #加載等待 print("正在登陸") driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/form[1]/div[5]/button").click()#點擊 time.sleep(3) #加載等待 driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/ul/li[5]/a").click()#點擊違法查詢 time.sleep(1) #加載等待 driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div[1]/div[2]/form/div[1]/div/div[1]/span/i").click()#點擊選擇日期 for i in range(3): time.sleep(0.5) #加載等待 driver.find_element_by_xpath("/html/body/div[6]/div[4]/table/thead/tr/th[1]/i").click()#點擊 time.sleep(0.5) #加載等待 driver.find_element_by_xpath("/html/body/div[6]/div[4]/table/tbody/tr/td/span[1]").click()#點擊 time.sleep(0.5) #加載等待 driver.find_element_by_xpath("/html/body/div[6]/div[3]/table/tbody/tr[2]/td[1]").click()#點擊 wenjian=datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S') #以開始時間作為數據導出的表格文件名 wenjian=wenjian+'.csv' R=[] for ii in range(0,nrows): rowValues= table.row_values(ii) #某一行數據 print('正在讀取第'+str(ii+1)+'輛車') # 填寫車牌 time.sleep(0.5) #加載等待 elem = driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div[1]/div[2]/form/div[3]/div/input") elem.clear() elem.send_keys(rowValues)#輸入車牌 time.sleep(0.1) #加載等待 driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div[1]/div[2]/form/div[4]/button").click()#點擊查詢 time.sleep(0.5) #加載等待 result=driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div[2]/div[1]/div/p/span").text#總違章條數 result=int(result) a=math.ceil(result/10)#總頁數 b=result%10 #除餘 for i in range(1,a): for j in range(1,11): result1=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[1]"))).text result2=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[2]"))).text result3=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[3]"))).text result4=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[4]"))).text result5=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[5]"))).text result6=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[6]"))).text result7=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[7]"))).text #result1=driver.find_element_by_xpath("//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[1]").text #result2=driver.find_element_by_xpath("//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[2]").text #result3=driver.find_element_by_xpath("//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[3]").text #result4=driver.find_element_by_xpath("//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[4]").text #result5=driver.find_element_by_xpath("//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[5]").text #result6=driver.find_element_by_xpath("//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[6]").text #result7=driver.find_element_by_xpath("//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[7]").text WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[8]/a"))).click()#查看詳情,打開彈窗 time.sleep(1) #加載等待 #driver.find_element_by_xpath("//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[8]/a").click()#點擊列表中的元素 #time.sleep(0.5) #加載等待 result8=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//form[@class='form-horizontal']/div[7]/span[2]"))).text result9=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//form[@class='form-horizontal']/div[8]/span[2]"))).text #result8=driver.find_element_by_xpath("//form[@class='form-horizontal']/div[7]/span[2]").text #result9=driver.find_element_by_xpath("//form[@class='form-horizontal']/div[8]/span[2]").text result=[result1,result2,result3,result4,result5,result6,result7,result8,result9] R.append(result) WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='modal-footer ui_modal']/button"))).click()#關閉彈窗 time.sleep(0.5) #加載等待 #driver.find_element_by_xpath("//div[@class='modal-footer ui_modal']/button").click()#點擊列表中的元素 #time.sleep(0.5) #加載等待 driver.find_element_by_link_text("下一頁").click()#翻頁 time.sleep(0.5) #加載等待 if b>0: for j in range(1,b+1): result1=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[1]"))).text result2=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[2]"))).text result3=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[3]"))).text result4=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[4]"))).text result5=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[5]"))).text result6=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[6]"))).text result7=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[7]"))).text WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[8]/a"))).click()#查看詳情,打開彈窗 time.sleep(1) #加載等待 result8=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//form[@class='form-horizontal']/div[7]/span[2]"))).text result9=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//form[@class='form-horizontal']/div[8]/span[2]"))).text result=[result1,result2,result3,result4,result5,result6,result7,result8,result9] R.append(result) WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='modal-footer ui_modal']/button"))).click()#關閉彈窗 time.sleep(0.5) #加載等待 if b==0: for j in range(1,11): result1=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[1]"))).text result2=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[2]"))).text result3=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[3]"))).text result4=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[4]"))).text result5=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[5]"))).text result6=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[6]"))).text result7=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[7]"))).text WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//table[@id='my-msg-list']/tbody/tr["+str(j)+"]/td[8]/a"))).click()#查看詳情,打開彈窗 time.sleep(1) #加載等待 result8=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//form[@class='form-horizontal']/div[7]/span[2]"))).text result9=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//form[@class='form-horizontal']/div[8]/span[2]"))).text result=[result1,result2,result3,result4,result5,result6,result7,result8,result9] R.append(result) WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='modal-footer ui_modal']/button"))).click()#關閉彈窗 time.sleep(0.5) #加載等待 time.sleep(0.5) #加載等待 with open(wenjian,'w',encoding='utf-8',newline='') as fp: writer = csv.writer(fp) writer.writerows(R) #寫入數據
到此這篇關於Python selenium模擬網頁點擊爬蟲交管12123違章數據的文章就介紹到這瞭,更多相關Python selenium模擬點擊爬蟲內容請搜索WalkonNet以前的文章或繼續瀏覽下面的相關文章希望大傢以後多多支持WalkonNet!
推薦閱讀:
- Python編程使用Selenium模擬淘寶登錄實現過程
- python自動化測試selenium核心技術處理彈框
- Python selenium 實例之通過 selenium 查詢禪道是否有任務或者BUG
- python爬蟲實現最新12306搶票
- Python爬蟲實戰之用selenium爬取某旅遊網站