python处理html的table标签
生活随笔
收集整理的這篇文章主要介紹了
python处理html的table标签
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
轉載:http://www.xuebuyuan.com/583071.html
python處理html的table標簽
2012年01月06日?? 綜合?? 共 5279字 ? 字號?小?中?大???評論關閉 import sys import csv import urllib2import BeautifulSoup#page = urllib2.urlopen(sys.argv[1]).read() soup = BeautifulSoup.BeautifulSoup(open(sys.argv[1]).read()) csvout = csv.writer(sys.stdout)for table in soup.findAll('table'):print "<table border='1'>"#print '#'#print '# Table'#print '# Fields: ' + ','.join([tr.text for tr in table.findAll('th')])for row in table.findAll('tr'):print "<tr>"#csvout.writerow([tr.text for tr in row.findAll('td')])for tr in row.findAll('td'):print "<td>"print tr.text.encode("utf-8")print "</td>"print "</tr>"print "</table>"break?
#!/bin/bash#process.hbasedir=$(dirname $1)echo $basedir#echo \<head\>\<meta http-equiv=\"Content-Type\" content=\"text/html\; charset=UTF-8\" /\>\</head\> >> $basedir/baobei.htmlprodname=$(grep -o '<title id="id_title">.*</title>' $1 | cut -d \> -f 2 | cut -d \< -f 1)prodname=$(echo $prodname | cut -d _ -f 1)price=$(grep -o '<span class="s1">[0-9]*</span>' $1 | cut -d \> -f 2 | cut -d \< -f 1)echo \<table\> > $basedir/baobei.htmlecho \<tr\> >> $basedir/baobei.html echo \<td\>Name\</td\> >> $basedir/baobei.html echo \<td\>$prodname\</td\> >> $basedir/baobei.html echo \</tr\> >> $basedir/baobei.htmlecho \<tr\> >> $basedir/baobei.html echo \<td\>Price\</td\> >> $basedir/baobei.html echo \<td\>$price\</td\> >> $basedir/baobei.html echo \</tr\> >> $basedir/baobei.htmlpython ./printtab.py $1 >> $basedir/baobei.htmlecho \</table\> >> $basedir/baobei.htmlimgsrc=$(head -n 1 $basedir/imglist) if test y$imgsrc = y; thenrm -rf $basedir/baobei.html && exit; fi echo \<img src=\'$imgsrc\'/\> >> $basedir/baobei.htmlcat $basedir/baobei.html | tr -d '\n' | tr -d '"' > $basedir/baobei.html.tmp mv $basedir/baobei.html.tmp $basedir/baobei.html?
#!/bn/bash#process2.shbasedir=$(dirname $1)name=$(grep -o "<td>Name</td><td>.*</td>" $1 | cut -d \> -f 4 | cut -d \< -f 1 )if test "x$name" = "x" ; thenexit ; fiprice=$(grep -o "<td>Price</td><td>.*</td>" $1 | cut -d \> -f 4 | cut -d \< -f 1 )if test "x$price" = "x" ; thenexit; fiif test "x$class" = "x" thenclass=$(grep -o "<td>產品類型</td><td>.*</td>" $1 | cut -d \> -f 4 | cut -d \< -f 1 ) fiif test "x$class" = "x" thenclass=$(grep -o "<td>設備類型</td><td>.*</td>" $1 | cut -d \> -f 4 | cut -d \< -f 1 ) fiif test "x$class" = "x" thenclass=$(grep -o "<td>打印針數</td><td>.*</td>" $1 | cut -d \> -f 4 | cut -d \< -f 1 ) fiif test "x$class" = "x" thenclass="條形碼打印機" fiif $( echo $class | grep --quiet '票據' ) thenclass="536187477" elif $( echo $class | grep --quiet '發票' ) thenclass="536187477" elif $( echo $class | grep --quiet '票證' ) thenclass="536187477" elif $( echo $class | grep --quiet '存折' ) thenclass="536187477" ################################################################## elif $( echo $class | grep --quiet '針' ) thenclass="536187477"################################################################## elif $( echo $class | grep --quiet '燈泡' ) thenclass="536187479" elif $( echo $class | grep --quiet 'UHE' ) thenclass="536187479" elif $( echo $class | grep --quiet 'UHP' ) thenclass="536187479" elif $( echo $class | grep --quiet 'HSCR' ) thenclass="536187479"############################################################### elif $( echo $class | grep --quiet '條形碼打印機' ) thenclass="536187480"################################################################## elif $( echo $class | grep --quiet '證卡打印' ) thenclass="536187483"################################################################## elif $( echo $class | grep --quiet '條碼' ) thenclass="536187481"elif $( echo $class | grep --quiet '掃描' ) thenclass="536187481"elif $( echo $class | grep --quiet '閱讀' ) thenclass="536187481"elif $( echo $class | grep --quiet '采集' ) thenclass="536187481"elif $( echo $class | grep --quiet '手持' ) thenclass="536187481"elif $( echo $class | grep --quiet '數據終端' ) thenclass="536187481"################################################################## elif $( echo $class | grep --quiet '激光' ) thenclass="536187484"################################################################## elif $( echo $class | grep --quiet '噴墨' ) thenclass="536187486"################################################################## elif $( echo $class | grep --quiet '復印' ) thenclass="536187615"################################################################## elif $( echo $class | grep --quiet '一體機' ) thenclass="536187485"################################################################## elif $( echo $class | grep --quiet '硒鼓' ) thenclass="536187616"elif $( echo $class | grep --quiet '墨盒' ) thenclass="536187616" elseclass="536187616" fi################################################################ imagepath=$(find $basedir -type f -iname "*.jpg") if test "x$imagepath" = "x"; thenexit ; fi image=$(md5sum $imagepath | cut -d ' ' -f 1) cp -rf $imagepath $basedir/../../template/$image.tbi ################################################################ desc=$(cat $1) ################################################################echo -e \"$name\""\t"110514"\t"\",$class,\""\t"1"\t"\"上海\""\t"\"上海\""\t"\"b\""\t"$price"\t"0.000000"\t"1"\t"7"\t"2"\t"0.000000"\t"0.000000"\t"0.000000"\t""\t""\t"1"\t"1"\t"0"\t"1"\t"1"\t"0"\t"\"2012-10-16 13:09:48\""\t""\t"\"$desc\""\t""\t"\"20000:31140\;20196:3228846\;29969:107401\;30681:32998\;31468:102250\;31479:92188\;3415558:27513\;3415563:21959\;3415571:21959\;3415581:10122\;3415609:22041\;7884463:75957615\;14319244:80897641\;14319250:123483713\;14791484:10285019\;\""\t""\t""\t"0"\t"0"\t"\"2012-10-16 13:37:51\""\t"100"\t""\t"0"\t"\"$image:0:0:\|\;\""\t"\"\""\t"\"\""\t"\",\""\t"\",\""\t"\"\""\t"\"\""\t"0"\t"\"15758222730\""\t"15758222730?
classtable = { 17 "536187477" : "票據打印機" , 18 "536187478" : "針式打印機" , 19 "536187479" : "投影燈泡" , 20 "536187480" : "條形碼打印機" , 21 "536187481" : "條碼設備" , 22 "536187483" : "證卡打印機" , 23 "536187484" : "激光打印機" , 24 "536187485" : "多功能一體機" , 25 "536187486" : "噴墨打印機" , 26 "536187615" : "復印復合機" , 27 "536187616" : "硒鼓" , 28 }轉載于:https://www.cnblogs.com/stepit/p/4143039.html
《新程序員》:云原生和全面數字化實踐50位技術專家共同創作,文字、視頻、音頻交互閱讀總結
以上是生活随笔為你收集整理的python处理html的table标签的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: javascript---》argume
- 下一篇: 微信公众号,商城开发