露天拍賣 換頁
import re
import sys
import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36',
}
cookies = {
'_ts_id': '999999999999999999',
}
reload(sys)
sys.setdefaultencoding('utf8')
def PageCount(keyword) :
h = requests.get('http://search.ruten.com.tw/search/s000.php?enc=u&searchfrom=indexbar&k='+keyword+'&t=0', headers=headers, cookies=cookies)
soup = BeautifulSoup(h.text,'html.parser')
pattern = re.compile(r'total"\:(\d+)\,\"perPage"\:(\d+)')
script = soup.find("script", text=pattern)
script = script.text.strip().split('\n')
if int(pattern.findall(script[1])[0][0]) / int(pattern.findall(script[1])[0][1]) > 100:
return 100
else:
return int(pattern.findall(script[1])[0][0]) / int(pattern.findall(script[1])[0][1])
def crawler(keyword):
ALLpage = PageCount(keyword)
print ALLpage
for number in range(ALLpage, 0, -1):
h = requests.get('http://search.ruten.com.tw/search/s000.php?enc=u&searchfrom=indexbar&k='+keyword+'&t=0&p='+str(number), headers=headers, cookies=cookies)
wb = h.text
b = wb.encode('latin-1')
h = b.decode('utf-8')
soup = BeautifulSoup(h,'html.parser')
for each_li in soup.findAll('li',{'class':'rt-goods-list-item'}):
print each_li.div.a['href']
print each_li.span.text
print each_li.p.strong.text
'''
wb = h.text
b = wb.encode('latin-1')
u = b.decode('utf-8')
print u
'''
if __name__ == "__main__":
keyword = str(sys.argv[1])
print keyword
crawler(keyword)
python test.py 'Lowe Alpine Strike'