抓三大法人的網頁資料
TEST OK
import requests
import pandas as pd
import sys
import sqlite3 as lite
from bs4 import BeautifulSoup as bs
from datetime import date,datetime, timedelta
reload(sys)
sys.setdefaultencoding('utf-8')
payload = {
'qdate':'105/05/06',
'select2':'ALL',
'sorting':'by_issue'
}
def getTWDate(dt):
year = int(dt.strftime('%Y')) - 1911
monthdate = dt.strftime('%m/%d')
ymd = '{}/{}'.format(year, monthdate)
return ymd
def getTradingVolume(dt):
payload['qdate'] = getTWDate(dt)
res = requests.post('http://www.twse.com.tw/ch/trading/fund/T86/T86.php', data=payload)
soup = bs(res.text, 'html5lib')
tbl = soup.select('#tbl-sortable')[0]
dfs = pd.read_html(tbl.prettify('utf-8'), encoding='utf-8')
stockdf = dfs[0]
stockdf['ymd'] = dt
return stockdf
payload['select2'] = '24'
dfs = []
currenttime = datetime.now()
for i in range(1,30):
dt = currenttime.date() - timedelta(days = i)
dfs.append(getTradingVolume(dt))
stockdf = pd.concat(dfs, ignore_index=True)
with lite.connect('finance.sqlite') as db:
stockdf.to_sql(name='trading_volume', index=False, con=db, if_exists='replace')
with lite.connect('finance.sqlite') as db:
df = pd.read_sql_query('SELECT count(1) FROM trading_volume;', db)
print df
'''
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
from datetime import date,datetime, timedelta
payload = {
'qdate':'105/05/06',
'select2':'ALL',
'sorting':'by_issue'
}
res = requests.post('http://www.twse.com.tw/ch/trading/fund/T86/T86.php', data=payload)
#print res.text
soup = bs(res.text, 'html5lib')
tbl = soup.select('#tbl-sortable')[0]
dfs = pd.read_html(tbl.prettify('utf-8'), encoding='utf-8')
stockdf = dfs[0]
#print stockdf.head()
#print stockdf
'''
查詢 sqlite
import pandas as pd
import sys
import sqlite3 as lite
reload(sys)
sys.setdefaultencoding('utf-8')
with lite.connect('finance.sqlite') as db:
df = pd.read_sql_query('SELECT "證券 名稱" FROM trading_volume;', db)
print df
抓三大法人統計表,出處來自大數學堂http://course.largitdata.com/course/31/
只能說david是神,有機會看到要拜
第一步是抓三大法人的網頁內容
import requests
res = requests.get('http://www.twse.com.tw/ch/trading/fund/BFI82U/BFI82U.php?report1=day&input_date=104%2F07%2F22&mSubmit=%ACd%B8%DF&yr=2015&w_date=20150720&m_date=20150701')
res.encoding = 'big5'
print res.text
用beautifulsoup分析
import requests
from bs4 import BeautifulSoup
res = requests.get('http://www.twse.com.tw/ch/trading/fund/BFI82U/BFI82U.php?report1=day&input_date=104%2F07%2F22&mSubmit=%ACd%B8%DF&yr=2015&w_date=20150720&m_date=20150701')
res.encoding = 'big5'
soup = BeautifulSoup (res.text)
print soup.select('.board_trad')
只抓tr裡面的td
import requests
from bs4 import BeautifulSoup
res = requests.get('http://www.twse.com.tw/ch/trading/fund/BFI82U/BFI82U.php?report1=day&input_date=104%2F07%2F22&mSubmit=%ACd%B8%DF&yr=2015&w_date=20150720&m_date=20150701')
res.encoding = 'big5'
soup = BeautifulSoup (res.text)
for tr in soup.select('.board_trad tr')[2:]:
for td in tr.select('td'):
print td.text
最後的整理
import requests
from bs4 import BeautifulSoup
res = requests.get('http://www.twse.com.tw/ch/trading/fund/BFI82U/BFI82U.php?report1=day&input_date=104%2F07%2F22&mSubmit=%ACd%B8%DF&yr=2015&w_date=20150720&m_date=20150701')
res.encoding = 'big5'
soup = BeautifulSoup (res.text)
for tr in soup.select('.board_trad tr')[2:]:
td = tr.select('td')
print td[0].text,td[1].text,td[2].text,td[3].text
時間str是整理成字串
from datetime import date,timedelta
today = date.today()
for i in range(1,10):
today = today + timedelta(days=-1)
print str(today).split('-')
設定多組的時間,'-'.join代表插入'-'且只顯示括號內的值
from datetime import date,timedelta
today = date.today()
for i in range(1,10):
today = today + timedelta(days=-1)
dayary = str(today).split('-')
print '-'.join([str(int(dayary[0]) - 1911), dayary[1],dayary[2]])
抓取多天的數據
import requests
from bs4 import BeautifulSoup
from datetime import date,timedelta
url = 'http://www.twse.com.tw/ch/trading/fund/BFI82U/BFI82U.php?report1=day&input_date={0}&mSubmit=%ACd%B8%DF&yr=2015&w_date=20150720&m_date=20150701'
def getTradeValue(dt):
res = requests.get(url.format(dt))
res.encoding = 'big5'
soup = BeautifulSoup (res.text)
for tr in soup.select('.board_trad tr')[2:]:
td = tr.select('td')
print td[0].text,td[1].text,td[2].text,td[3].text
today = date.today()
for i in range(1,10):
today = today + timedelta(days=-1)
dayary = str(today).split('-')
dt = '%2F'.join([str(int(dayary[0]) - 1911), dayary[1],dayary[2]])
getTradeValue(dt)
加上日期的資訊
import requests
from bs4 import BeautifulSoup
from datetime import date,timedelta
url = 'http://www.twse.com.tw/ch/trading/fund/BFI82U/BFI82U.php?report1=day&input_date={0}&mSubmit=%ACd%B8%DF&yr=2015&w_date=20150720&m_date=20150701'
def money_conversion(input_ele):
return int(''.join(input_ele.split(',')))
def getTradeValue(today):
dayary = str(today).split('-')
dt = '%2F'.join([str(int(dayary[0]) - 1911), dayary[1],dayary[2]])
res = requests.get(url.format(dt))
res.encoding = 'big5'
soup = BeautifulSoup (res.text)
for tr in soup.select('.board_trad tr')[2:]:
td = tr.select('td')
print td[0].text, money_conversion(td[1].text),money_conversion(td[2].text),money_conversion(td[3].text),today
today = date.today()
for i in range(1,10):
today = today + timedelta(days=-1)
getTradeValue(today)
用sqlite存到檔案,檔案要寫絕對路徑
import sqlite3 as lite
con = lite.connect('c:/finace.sqlite')
cur = con.cursor()
cur.execute("select * from InvestorTradingValue")
ret = cur.fetchone()
print ret
con.close()
存檔資料及取檔資料(mark處是取檔資料)
import sqlite3 as lite
con = lite.connect('c:/finace.sqlite')
cur = con.cursor()
cur.execute("select * from InvestorTradingValue")
ret = cur.fetchone()
print ret
con.close()
存到sqlite中
import requests
from bs4 import BeautifulSoup
from datetime import date,timedelta
import sqlite3 as lite
con.close()
url = 'http://www.twse.com.tw/ch/trading/fund/BFI82U/BFI82U.php?report1=day&input_date={0}&mSubmit=%ACd%B8%DF&yr=2015&w_date=20150720&m_date=20150701'
sql = "insert into InvestorTradingValue(item,total_buy,totol_sell,difference,date) values(?,?,?,?,?)"
def money_conversion(input_ele):
return int(''.join(input_ele.split(',')))
def getTradeValue(cur,today):
dayary = str(today).split('-')
dt = '%2F'.join([str(int(dayary[0]) - 1911), dayary[1],dayary[2]])
res = requests.get(url.format(dt))
res.encoding = 'big5'
soup = BeautifulSoup (res.text)
for tr in soup.select('.board_trad tr')[2:]:
td = tr.select('td')
ret = [td[0].text, money_conversion(td[1].text),money_conversion(td[2].text),money_conversion(td[3].text),today]
cur.execute(sql,ret)
con = lite.connect('c:/finace.sqlite')
cur = con.cursor()
today = date.today()
for i in range(1,10):
today = today + timedelta(days=-1)
getTradeValue(cur,today)
con.commit()
con.close()