選出股本50億以上的股票代號

 # coding=UTF-8
#藉由網站選股條件先選出股本50億以上的股票代號
import math
import requests
import re  #正規表示式
from bs4 import BeautifulSoup
# 下載網頁內容
header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}

VAL_S = 8   #股本範圍起
VAL_E = 15    #股本範圍迄

url = 'https://goodinfo.tw/StockInfo/StockList.asp?MARKET_CAT=%E8%87%AA%E8%A8%82%E7%AF%A9%E9%81%B8&INDUSTRY_CAT=%E6%88%91%E7%9A%84%E6%A2%9D%E4%BB%B6&FILTER_ITEM0=%E8%82%A1%E6%9C%AC+%28%E5%84%84%E5%85%83%29&FILTER_VAL_S0=' + str(VAL_S) + '&FILTER_VAL_E0=' + str(VAL_E)

print(url)

rt = requests.get(url, headers = header)

# 確認是否下載成功
if rt.status_code == requests.codes.ok:
  # 以 BeautifulSoup 解析 HTML 程式碼
  soup = BeautifulSoup(rt.text, 'html.parser')

  #<td style="mso-number-format:\@;"><nobr><a class="link_black" href="StockDetail.asp?STOCK_ID=1101"...
  data = soup.find_all('td', style='mso-number-format:\@;')

  #使用正規表示式找出所有四位數的股票代號
  m = re.findall('\d{4}', str(data))

  #set:去除重複,sorted:排序
  STOCK_ID = sorted(set(m))

  #print(len(STOCK_ID))

sIDx = [[0 for i in range(9)] for j in range(9)]    #2維陣列(9X9)

# math.ceil() 無條件進位。因為一次爬超過120幾筆會被網站認為攻擊,所以分次爬
for index in range( math.ceil(len(STOCK_ID)/100) ):
  sIDx[index] = STOCK_ID[index*100 : (index+1)*100 - 1]

# for sID in STOCK_ID:
  # r = requests.get('https://goodinfo.tw/StockInfo/ShowK_Chart.asp?STOCK_ID=' + str(sID) + '&CHT_CAT2=WEEK', headers = header)
  # soup = BeautifulSoup(r.text, 'html.parser')
  # if soup.find_all('tr', id='row14') == []:
    # print(sID)



chosen = []
week_s = 1
week_e = 14
grow = 5    #成交量倍數

for y in range( math.ceil(len(STOCK_ID)/100) ):
  for sID in sIDx[y]:
    r = requests.get('https://goodinfo.tw/StockInfo/ShowK_Chart.asp?STOCK_ID=' + str(sID) + '&CHT_CAT2=WEEK', headers = header)
    soup = BeautifulSoup(r.text, 'html.parser')   # 以 BeautifulSoup 解析 HTML 程式碼

    volume = []

    if soup.find_all('tr', id='row' + str(week_e)) != []:
      print(sID)
      for i in range(week_s, week_e):

        #<tr align="center" bgcolor="#EDEDED" height="23px" id="row1" onmouseout="ChgBGColor(['hrow1','row1'],'#EDEDED');" onmouseover="ChgBGColor(['hrow1','row1'],'#fff2cc');"> <td title="2018/7/2~2018/7/6"><nobr>W1827</nobr></td><td><nobr>5</nobr></td> <td style="color:green"><nobr>33.45</nobr></td> <td style="color:red"><nobr>34.8</nobr></td> <td style="color:green"><nobr>33.4</nobr></td> <td style="color:red"><nobr>34.2</nobr></td> <td style="color:red"><nobr>+0.7</nobr></td> <td style="color:red"><nobr>+2.09%</nobr></td> <td><nobr>41.9</nobr></td> <td><nobr>8.4</nobr></td> <td><nobr>14.3</nobr></td> <td><nobr>2.9</nobr></td> <td style="color:green"><nobr>-1.96</nobr></td> <td style="color:red"><nobr>+0.92</nobr></td> <td style="color:red"><nobr>+0.12</nobr></td> <td style="color:green"><nobr>-0.92</nobr></td> <td><nobr>22.9</nobr></td> <td style="color:green"><nobr>-0.42</nobr></td> <td>2.04</td> <td style="color:black"><nobr>0</nobr></td> <td>0.06</td> <td><nobr>2.8</nobr></td></tr>
        data = soup.find_all('tr', id='row' + str(i))

        #使用正規表示式找出數字區
        m = re.findall('<nobr>[+-]?\d+[.]?\d*</nobr>', str(data))

        #第7個剛好是成交張數
        #print(m[6])
        volume.append(m[6][6:-7])

      print(volume)

      #季均量=前13周成交量平均(含本週)
      tmp = 0
      for x in volume:
        tmp = tmp + float(x)

      seasonAVG = tmp / len(volume)

      if float(volume[0]) > (seasonAVG * grow):   #週量大於季均量x倍數(爆量)
      #if float(volume[0]) < seasonAVG:       #週量小於季均量(量縮)
        chosen.append(sID)

print('chosen')
print(chosen)