python for select 公投
from selenium import webdriver
driver = webdriver.Chrome('chromedriver')
driver.get('http://referendum.2018.nat.gov.tw/pc/zh_TW/01/63000000100000000.html')
from bs4 import BeautifulSoup
driver.page_source
soup = BeautifulSoup(driver.page_source, 'lxml')
links = soup.select('div[id^=item] a')
import pandas
def getRef(url):
res = requests.get(url)
soup = BeautifulSoup(res.text, 'lxml')
dfa = pandas.read_html(res.text)
votes = dfa[2]
votes.columns= votes.loc[1]
votes.drop([0,1,3,4], inplace=True)
votes.reset_index(drop=True, inplace=True)
totalvotes = dfa[3]
totalvotes.columns= totalvotes.loc[0]
totalvotes.drop([0,2,3], inplace=True)
totalvotes.reset_index(drop=True, inplace=True)
m = pandas.concat([votes, totalvotes], axis = 1)
area = soup.select_one('b').text
m['投票地區'] = area
return m
getRef('http://referendum.2018.nat.gov.tw/pc/zh_TW/01/63000000100000000.html')
domain = 'http://referendum.2018.nat.gov.tw/pc/zh_TW'
results = []
#for ele in links:
for ele in links[0:12]:
try:
results.append(getRef(domain + ele.get('href').strip('.')))
except:
print(domain + ele.get('href').strip('.'))
pandas.concat(results)
Comments
Post a Comment