No Comments 作者:小项-怪物猪 分类:临时文件

[buysz]$ time touch {1..10001}.txt

real 0m0.091s
user 0m0.024s
sys 0m0.064s

real 程序实际运行时间
user 用户态运行时间
sys 内核态运行时间

No Comments 作者:小项-怪物猪 分类:临时文件

[buysz]$ touch {1..100}
[buysz]$ ls
1 14 2 25 30 36 41 47 52 58 63 69 74 8 85 90 96
10 15 20 26 31 37 42 48 53 59 64 7 75 80 86 91 97
100 16 21 27 32 38 43 49 54 6 65 70 76 81 87 92 98
11 17 22 28 33 39 44 5 55 60 66 71 77 82 88 93 99
12 18 23 29 34 4 45 50 56 61 67 72 78 83 89 94
13 19 24 3 35 40 46 51 57 62 68 73 79 84 9 95

[buysz]$ wget http://www.52dpet.com/images/vip_{say,title}.gif
[buysz]$ ls
vip_say.gif vip_title.gif

No Comments 作者:小项-怪物猪 分类:临时文件

<script>  
window.google={kEI:"OvfJTYuEH8SxcZ3t_aUP",kEXPI:"17259,28505,28555,29685,29795,29810,30035,30107,30152",kCSI:{e:"17259,28505,28555,29685,29795,29810,30035,30107,30152",ei:"OvfJTYuEH8SxcZ3t_aUP",expi:"17259,28505,28555,29685,29795,29810,30035,30107,30152"},   
Toolbelt:{}};   
</script>  
<style>  
    #hplogo{background:white;cursor:pointer;height:156px;position:relative;width:403px}   
    #hplogo div{pointer-events:none;position:absolute}   
</style>  
<div id=hplogo>  
<img src="http://www.google.com.hk/logos/2011/graham11-hp-start.png" border=0 />  
</div>  
<script>  
(function(){try{if(!google.doodle)google.doodle={};var d=[[307,48,88,89],[307,48,89,89],[307,48,91,89],[305,49,93,89],[305,50,93,88],[305,50,93,88],[306,52,92,86],[305,53,93,84],[305,54,94,83],[306,54,93,83],[307,54,92,83],[307,54,92,83],[308,54,90,83],[308,54,90,83],[306,53,91,84],[306,53,91,84],[308,53,90,84],[308,53,90,84],[305,53,92,84],[305,52,92,85],[306,52,91,85],[308,51,88,87,1],[308,50,88,88],[308,49,88,88],[307,49,89,88],[307,50,89,87],[308,51,89,86],[307,54,90,83],[307,57,90,80],[306,58,92,79],[306,58,92,79],[305,60,92,77],[302,61,95,76],[302,63,95,74],[302,51,96,86],[302,66,98,71],[304,67,96,69],[301,63,96,74],[301,58,93,79],[291,52,94,85],[288,50,71,88],[285,43,76,95],[285,37,70,101],[281,29,55,109],[278,20,58,119],[278,20,55,119,1],[277,12,121,127],[271,2,122,138],[267,1,126,139],[264,0,136,140],[260,0,141,140],[255,0,148,140],[252,0,151,140],[249,2,121,138],[247,3,123,137],[246,3,123,137],[246,2,124,137],[258,2,112,137],[263,2,106,137],[263,2,106,137],[262,2,103,137],[260,2,104,136],[260,2,104,137,1],[268,2,98,137],[267,2,99,137],[266,2,97,137],[266,3,96,136],[264,3,99,136],[263,3,100,136],[261,3,100,136],[259,2,138,137],[254,2,126,137],[247,2,101,136],[240,2,108,136],[238,1,110,137],[230,1,118,138],[220,15,128,124],[211,18,137,121],[205,43,102,96],[202,45,104,93],[200,38,97,101],[198,38,104,101,1],[197,39,107,100],[197,39,112,100],[213,39,94,110],[212,40,95,111],[211,41,97,111],[209,42,99,112],[209,43,98,112],[213,43,87,112],[213,42,83,113],[211,40,86,109],[211,38,86,103],[211,37,88,112],[211,20,186,131],[213,27,167,122],[212,44,87,105],[210,44,88,98],[195,44,106,98],[189,44,110,98],[182,46,117,99],[173,44,118,96,1],[161,43,130,99],[154,42,137,97],[153,42,137,97],[153,42,137,97],[152,41,137,98],[151,41,137,97],[149,41,145,97],[148,25,144,114],[148,13,144,126],[141,12,153,127],[115,11,173,128],[108,7,180,133],[108,4,180,136],[108,3,176,137,1],[108,1,161,139],[105,1,235,138],[103,1,295,148],[103,0,277,149],[108,0,234,137],[101,0,232,137],[99,0,135,139],[95,0,244,139],[81,0,152,139],[69,0,164,139,1],[66,0,169,139],[65,0,170,139],[63,0,168,138],[61,0,159,138],[35,0,304,139],[19,0,189,140],[18,11,138,129],[18,11,137,129],[18,11,137,128],[18,6,135,133],[7,4,146,136],[6,4,147,136],[3,4,150,136,1],[3,5,150,135],[3,8,150,132],[4,6,394,145],[12,6,388,145],[11,8,389,144],[11,8,387,144],[11,8,387,143,1],[10,8,113,131],[11,8,111,131],[10,9,112,130],[12,9,116,130],[12,9,111,130],[12,9,111,130],[12,9,110,131],[12,34,113,106],[13,35,110,104]],e=d.length,f,g,h,i,j=-1,k=function(){google.nav&&google.nav.go?google.nav.go(""):window.location.href=""},l=function(){var a=d[f],c=document.getElementById("hplogo");if(c&&a[0]){var b=document.createElement("div");b.id="hplogo"+f;b.style.left=a[0]+"px";b.style.top=a[1]+"px";b.style.width=a[2]+"px";b.style.height=a[3]+"px";b.style.background="url(http://www.google.com.hk/logos/2011/graham11-hp-sprite.png) no-repeat "+-g+"px "+-h+"px";b.onmousedown=k;a[3]>i&&(i=a[3]);a[4]?(g=0,h+=i,i=0):g+=a[2];c.appendChild(b);++f;f< e&&(j=window.setTimeout(l,83))}},m=function(){google.doodle.a=!1;i=h=g=f=0;j!=-1&&(window.clearTimeout(j),j=-1);for(var a=0;a< e;++a){var c=document.getElementById("hplogo"+a);c&&c.parentNode&&c.parentNode.removeChild(c)}j=window.setTimeout(l,83)};if(!google.doodle.a){google.doodle.a=!0;var n=document.createElement("img");n.addEventListener?n.addEventListener("load",m,!1):n.attachEvent("onload",m);n.src="http://www.google.com.hk/logos/2011/graham11-hp-sprite.png"}}catch(o){google.ml(o,!1,{cause:"DOODLE"})};})();</script>

1 Comment 作者:小项-怪物猪 分类:Ubuntu

#!/usr/bin/envpython
#-*-coding:utf-8-*-
#-*-encoding=utf-8
   
#--作者:小项--
#--预览:http://www.20hotel.com/news--
   
importsys;
importos;
importre;
importrandom;
importurllib2;
importtime;
importdatetime;
#importsocket;
importMySQLdbasmysql;
   
reload(sys)
   
sys.setdefaultencoding('utf-8')
   
#--转到目录--
os.chdir('img')
   
#urllib2.socket.setdefaulttimeout(15)
   
User='username'
Passwd='password'
Host='localhost'
Db='dbname'
   
home="http://www.8264.com/"
   
#--链接数据库--
contents=mysql.connect(user=User,passwd=Passwd,host=Host,db=Db,charset='utf8').cursor()
   
lsid=[]
   
pnext=[]
   
forsidinxrange(1,100,10):
   lsid.append(str(sid))
   
print"进行列表分段",lsid,"完成."
fortidinreversed(xrange(2,len(lsid)+1)):
   foriinreversed(xrange(int(lsid[(int(tid)-2):(int(tid)-1)][0]),int(lsid[(int(tid)-1):int(tid)][0]))):
   #printi
   #==进行列表获取==#
   request=urllib2.Request("http://www.8264.com/portal-list-catid-251-page-"+str(i)+".html")
   request.add_header('User-Agent','Mozilla/5.0(compatible;Googlebot/2.1;+http://www.google.com/bot.html)')
   foruinreversed(re.findall('<h2><ahref=\"(.*?)\"title=\'',re.findall('<divclass=\"title_8264\">(.*?)<divclass=\"pg\">',urllib2.urlopen(request).read(),re.DOTALL)[0],re.DOTALL)):
   #printu
   #--获取内容页面--
   newsurl=urllib2.Request(u)
   newsurl.add_header('User-Agent','Mozilla/5.0(compatible;Googlebot/2.1;+http://www.google.com/bot.html)')
   news=urllib2.urlopen(newsurl).read()
   time.sleep(int(random.uniform(1,5)))
   #--获取标题--
   title=re.findall('<divclass=\"newstitle\">(.*?)<\/div>',news,re.DOTALL)
   #--获取时间--
   dates=list(eval(re.sub('\,0',',',re.sub(':||-',',',re.findall('<tdalign=\"center\"valign=\"middle\">.*?<divstyle=\"line-height:1.8;text-align:center;\">\xcc\xed\xbc\xd3\xca\xb1\xbc\xe4\xa3\xba(.*?)',news,re.DOTALL)[0]))))
   #--进行时间格式化--
   #--2011-05-1008:19to1305010787.029--
   ttime=datetime.datetime(dates[0],dates[1],dates[2],dates[3],dates[4])
   ptime=time.mktime(ttime.timetuple())
   
   #--获取作者--
   athour=re.sub('<.*?>','',re.findall('\xd7\xf7\xd5\xdf\xa3\xba(.*?)<br\/><a',news,re.DOTALL)[0])
   
   #--获取分页链接--
   page=re.findall('<divclass=\"pg\">(.*?)<\/div>',news,re.DOTALL)
   ifpage!=[]:
   pnext=re.findall('<ahref=\"(.*?)\">[0-9]*<\/a>',page[0],re.DOTALL)
   one_img=[]
   one_txt=re.sub('<[a|A].*?>|<\/[a|A]>','',re.findall('<divclass=\"newstext\">(.*?)<\/div>',news,re.DOTALL)[0])
   newstxt=re.sub('[http:\/\/image.8264.com\/portal\/[0-9]*\/[0-9]*\/|http:\/\/image.8264.com\/portal\/photo\/[0-9]*\/[0-9]*\/]','',one_txt)
   one_img.extend(re.findall('<IMGsrc=\"(.*?)\">',one_txt,re.DOTALL))
   forone_dimginone_img:
   #--下载文章内图片--
   one_yscurl='wget-q'+one_dimg
   os.system(one_yscurl)
   forpinpnext:
   #printp,"\n"
   more_img=[]
   morepage=urllib2.Request(p)
   morepage.add_header('User-Agent','Mozilla/5.0(compatible;Googlebot/2.1;+http://www.google.com/bot.html)')
   pnewtxt=urllib2.urlopen(morepage).read()
   txt=re.sub('<[a|A].*?>|<\/[a|A]>','',re.findall('<divclass=\"newstext\">(.*?)<\/div>',pnewtxt,re.DOTALL)[0])
   #--得到入库的内容--
   ntxt=re.sub('[http:\/\/image.8264.com\/portal\/[0-9]*\/[0-9]*\/|http:\/\/image.8264.com\/portal\/photo\/[0-9]*\/[0-9]*\/]','',txt)
   #--处理内容中的图片--
   more_img.extend(re.findall('<IMGsrc=\"(.*?)\">',txt,re.DOTALL))
   formore_dimginmore_img:
   more_syscurl='wget-q'+more_dimg
   os.system(more_syscurl)
   
   newstxt+=ntxt
   texts=title[0].decode('gbk','ignore').encode('utf-8'),newstxt.decode('gbk','ignore').encode('utf-8'),athour.decode('gbk','ignore').encode('utf-8'),ptime
   #--进行数据插入--
   contents.execute("INSERTINTO`dbname`.`table_name`(`aid`,`class_id`,`title`,`content`,`author`,`order`,`state_radio`,`time`,`view_num`,`img`,`CityID`)VALUES(NULL,'2',%s,%s,%s,'0','2',%s,'0','','53');",texts);
   printathour.decode('gbk','ignore').encode('utf-8'),"在",tuple(dates),"发表的",title[0].decode('gbk','ignore').encode('utf-8'),"发布成功!"
   time.sleep(int(random.uniform(30,90)))
   else:
   #pass
   only_img=[]
   only_txt=re.sub('<[a|A].*?>|<\/[a|A]>','',re.findall('<divclass=\"newstext\">(.*?)<\/div>',news,re.DOTALL)[0])
   newstxt=re.sub('[http:\/\/image.8264.com\/portal\/[0-9]*\/[0-9]*\/|http:\/\/image.8264.com\/portal\/photo\/[0-9]*\/[0-9]*\/]','',only_txt)
   only_img.extend(re.findall('<IMGsrc=\"(.*?)\">',only_txt,re.DOTALL))
   foronly_imginonly_img:
   only_syscurl='wget-q'+only_img
   os.system(only_syscurl)
   texts=title[0].decode('gbk','ignore').encode('utf-8'),newstxt.decode('gbk','ignore').encode('utf-8'),athour.decode('gbk','ignore').encode('utf-8'),ptime
   contents.execute("INSERTINTO`dbname`.`table_name`(`aid`,`class_id`,`title`,`content`,`author`,`order`,`state_radio`,`time`,`view_num`,`img`,`CityID`)VALUES(NULL,'2',%s,%s,%s,'0','2',%s,'0','','53');",texts);
   printathour.decode('gbk','ignore').encode('utf-8'),"在",tuple(dates),"发表的",title[0].decode('gbk','ignore').encode('utf-8'),"发布成功!"
   time.sleep(int(random.uniform(30,90)))
   
   print"第",i,"页采集完成.休息一下,进入下一页采集."
   #--停顿一会--
   time.sleep(int(random.uniform(1200,3200)))
#--关闭数据库连接--
contents.close();

No Comments 作者:小项-怪物猪 分类:FreeBSD

#!/usr/bin/env bash

LANG=en_US.UTF-8 

if [ -n "$2" ] then 

logpath=~/logs/$1/http/access.log.$2 
else 
logpath=~/logs/$1/http/access.log 
fi 

if [ -n "$1" ] then 
for i in baidu Sogou Googlebot yahoo bingbot YandexBot YoudaoBot  do 
spider=`cat $logpath | grep -E -v 'jpg|gif|png|js|css' | grep -E $i | wc -l` 
echo "$i Spider:$spider" 
done
topip=`cat $logpath | grep -E -v 'jpg|gif|png|js|css' | awk '$1 {print $1,$12,$13,$14,$15,$16,$17,$18,$19}' | sort | uniq -c | sort -rn | head -n 10 | awk '{printf "\\n%-8s %-15s %s %s %s %s %s %s %s",$1,$2,$3,$4,$5,$6,$7,$8,$9}'` 

echo "TOP10 IP:$topip" 
else 
echo "Using #./spider_log hosting(20hotel.com) [`date +%F`]." 
fi