色彩时光 | 记录程序员世界的点点滴滴

沪港通数据获取


沪港通数据获取:
http://quote.eastmoney.com/center/hgtstock.html

http://hqres.eastmoney.com/EMQuote_Center2.0/js/function.min.js

http://hqres.eastmoney.com/EMQuote_Center2.0/js/index_hgt.min.js

之前沪港通采集代码备份:


url = "http://data.eastmoney.com/zjlx/hgt.html"
            content = str(urllib2.urlopen(url).read())
            # print content.decode('gbk').encode('utf8')
            item_soup = BeautifulSoup(content.decode('gbk').encode('utf8'))
            list_content = item_soup.select('div.content')
            # desc_content=list_content[0].select("table.tab1")

            # 调整简要信息
            desc_item = list_content[0]
            ths = desc_item.select("th")
            tds = desc_item.select("td")
            for th in ths:
                th.string = th.get_text().replace("当日", "");
            len = 8
            print "_" * 21, "沪港通"
            # 沪股通链接
            hgt_href = tds[0].a['href']
            # 资金当日流入
            hgt_in = tds[1].get_text()
            # 当日余额
            hgt_flow = tds[2].get_text()
            # 总余额
            hgt_total_flow = tds[3].get_text()
            # 港股通链接
            ggt_href = tds[len].a['href']
            # 资金当日流入
            ggt_in = tds[len + 1].get_text()
            # 当日余额
            ggt_flow = tds[len + 2].get_text()
            # 总余额
            ggt_total_flow = tds[len + 3].get_text()
            print "沪股通 链接:%s,当日流入:%s,当日余额:%s,总余额:%s" % (hgt_href, hgt_in, hgt_flow, hgt_total_flow)
            print "港股通 链接:%s,当日流入:%s,当日余额:%s,总余额:%s" % (ggt_href, ggt_in, ggt_flow, ggt_total_flow)

            content = ""
            if tag == "all":
                all_content = ""
                index = 0
                for v in list_content:
                    if index > 0:
                        len = 10
                        list1 = list_content[index]
                        ths = list1.select("th")
                        tds = list1.select("td")
                        ths[3].extract()
                        ths[7].extract()
                        ths[8].extract()
                        for i in range(0, len):
                            del tds[i * len + 1].a["href"]
                            tds[i * len + 2].a['target'] = '_blank'
                            tds[i * len + 3].extract()
                            tds[i * len + 7].extract()
                            tds[i * len + 8].extract()
                    v['class'] = 'box'
                    v.table['class'] = 'txt14'
                    all_content = "%s%s" % (all_content, v)
                    index += 1
                content = all_content
            elif tag == "desc":
                content = {"hgt_href": hgt_href,
                           "hgt_in": hgt_in,
                           "hgt_flow": hgt_flow,
                           "hgt_total_flow": hgt_total_flow,
                           "ggt_href": ggt_href,
                           "ggt_in": ggt_in,
                           "ggt_flow": ggt_flow,
                           "ggt_total_flow": ggt_total_flow, }
                content = json.dumps(content)


您可能也对下面文章感兴趣:

Write a Comment


* Content (required) 10~500s

分类

热门标签

友情链接