37,722
社区成员
发帖
与我相关
我的任务
分享
# -*- coding: utf-8 -*-
#统计csdn技术专家男女比例
import urllib,re,thread,sets
male=0
female=0
mylock=thread.allocate_lock()
def tongji(m,n):
global male
global female
for i in range(m,n):
sock=urllib.urlopen("http://hi.csdn.net/RankingStaticPage/True/3/5001/%d.htm"%i)
source=sock.read()
namepattern=re.compile("a href='/\w*/profile")
link=namepattern.findall(source)
link=list(set(link))
for j in link:
sock=urllib.urlopen("http://hi.csdn.net/"+j[9:])
source=sock.read()
mylock.acquire()
if source.find("他的博客")!=-1:
male+=1
mylock.release()
elif source.find("她的博客")!=-1:
female+=1
mylock.release()
def test():
thread.start_new_thread(tongji,(0,1))
thread.start_new_thread(tongji,(1,2))
thread.start_new_thread(tongji,(2,3))
thread.start_new_thread(tongji,(3,4))
if __name__== '__main__':
test()
print male,female
# -*- coding: utf-8 -*-
#统计csdn技术专家男女比例
import time
import urllib,re,threading,sets
class thr_tongji(threading.Thread):
def __init__(self, threadname, m, n):
threading.Thread.__init__(self,name=threadname)
self.m = m
self.n = n
self.male = 0
self.female = 0
def run(self):
print self.getName(),self.m,self.n
for i in range(self.m,self.n):
sock=urllib.urlopen("http://hi.csdn.net/RankingStaticPage/True/3/5001/%d.htm"%i)
source=sock.read()
namepattern=re.compile("a href='/\w*/profile")
link=namepattern.findall(source)
link=list(set(link))
for j in link:
sock=urllib.urlopen("http://hi.csdn.net/"+j[9:])
source=sock.read()
#mylock.acquire()
if source.find("他的博客")!=-1:
self.male+=1
elif source.find("她的博客")!=-1:
self.female+=1
#mylock.release()
def output(self):
print self.getName(),
print self.male, self.female
def test():
start = 0
stop = 4
step = 1
thrs = []
for t in range(start, stop, step):
name = 'thr_' + str(t / step)
thrs.append(thr_tongji(name, t, t+step))
print "start time:%s"%(time.ctime())
for t in thrs:
t.start()
for t in thrs:
t.join()
print "stop time:%s"%(time.ctime())
for t in thrs:
t.output()
if __name__== '__main__':
test()
start time:Sun Aug 30 21:25:41 2009
thr_0 0 1
thr_1 1 2
thr_2 2 3
thr_3 3 4
stop time:Sun Aug 30 21:26:20 2009
thr_0 14 1
thr_1 15 0
thr_2 15 0
thr_3 14 1
# -*- coding: utf-8 -*-
#统计csdn技术专家男女比例
import time
import urllib,re,threading,sets
male=0
female=0
class thr_tongji(threading.Thread):
def __init__(self, threadname, m, n):
threading.Thread.__init__(self,name=threadname)
self.m = m
self.n = n
self.male = 0
self.female = 0
def run(self):
print self.getName(),self.m,self.n
for i in range(self.m,self.n):
sock=urllib.urlopen("http://hi.csdn.net/RankingStaticPage/True/3/5001/%d.htm"%i)
source=sock.read()
namepattern=re.compile("a href='/\w*/profile")
link=namepattern.findall(source)
link=list(set(link))
for j in link:
sock=urllib.urlopen("http://hi.csdn.net/"+j[9:])
source=sock.read()
#mylock.acquire()
if source.find("他的博客")!=-1:
self.male+=1
elif source.find("她的博客")!=-1:
self.female+=1
#mylock.release()
def output(self):
print self.getName(),
print self.male, self.female
def test():
t1=thr_tongji('t1', 0, 1)
t2=thr_tongji('t2', 1, 2)
t1.start()
t2.start()
t1.join()
t2.join()
t1.output()
t2.output()
if __name__== '__main__':
test()