python for select 公投

from selenium import webdriver driver = webdriver.Chrome('chromedriver') driver.get('http://referendum.2018.nat.gov.tw/pc/zh_TW/01/63000000100000000.html') from bs4 import BeautifulSoup driver.page_source soup = BeautifulSoup(driver.page_source, 'lxml') links = soup.select('div[id^=item] a') import pandas def getRef(url): res = requests.get(url) soup = BeautifulSoup(res.text, 'lxml') dfa = pandas.read_html(res.text) votes = dfa[2] votes.columns= votes.loc[1] votes.drop([0,1,3,4], inplace=True) votes.reset_index(drop=True, inplace=True) totalvotes = dfa[3] totalvotes.columns= totalvotes.loc[0] totalvotes.drop([0,2,3], inplace=True) totalvotes.reset_index(drop=True, inplace=True) m = pandas.concat([votes, totalvotes], axis = 1) area = soup.select_one('b').text m['投票地區'] = area return m getRef('http://referendum.2018.nat.gov.tw/pc/zh_TW/01/63000000100000000.html') domain = 'http://referendum.2018.nat.gov.tw/pc/zh_TW' results = [] #for ele in links: for ele in links[0:12]: try: results.append(getRef(domain + ele.get('href').strip('.'))) except: print(domain + ele.get('href').strip('.')) pandas.concat(results)

Comments

Popular posts from this blog

VMware ESXi 備份 ghettoVCB.sh

VMware ESXi Restore - ghettoVCB-restore.sh - 異機還原