37,722
社区成员
发帖
与我相关
我的任务
分享
# -*- coding: utf-8 -*-
#统计csdn技术专家男女人数
import urllib,re,thread
male=0
female=0
def test(m):
global male
global female
# for i in range(m,n):
sock=urllib.urlopen("http://hi.csdn.net/RankingStaticPage/True/3/5001/%d.htm"%m)
source=sock.read()
namepattern=re.compile("a href=\D/\w*/profile")
link=namepattern.findall(source)
for j in link:
sock=urllib.urlopen("http://hi.csdn.net/"+j[9:])
source=sock.read()
if source.find("她的博客"):
male+=1
elif source.find("他的博客"):
female+=1
if __name__== '__main__':
test(0)
print male,female
# -*- coding: utf-8 -*-
#统计csdn技术专家男女人数
import urllib,re,thread
male=0
female=0
def test(m):
global male
global female
# for i in range(m,n):
sock=urllib.urlopen("http://hi.csdn.net/RankingStaticPage/True/3/5001/%d.htm"%m)
source=sock.read()
namepattern=re.compile("a href=\D/\w*/profile")
link=namepattern.findall(source)
for j in link:
print j[9:]
sock=urllib.urlopen("http://hi.csdn.net/"+j[9:])
source=sock.read()
#print source
if source.find("她的博客") != -1:
female+=1
elif source.find("他的博客") != -1:
male+=1
if __name__== '__main__':
test(0)
print male,female
handi/profile
handi/profile
baolqun/profile
baolqun/profile
xiaoqhuang/profile
xiaoqhuang/profile
txhack/profile
txhack/profile
bmwboy/profile
bmwboy/profile
cindytsai/profile
cindytsai/profile
tengulre/profile
tengulre/profile
sworddx/profile
sworddx/profile
abandonship/profile
abandonship/profile
sswater/profile
sswater/profile
snake09003232/profile
snake09003232/profile
knight_qmh/profile
knight_qmh/profile
chijingde/profile
chijingde/profile
linuxyuxi/profile
linuxyuxi/profile
chenzhuo/profile
chenzhuo/profile
28 2
# -*- coding: utf-8 -*-
#统计csdn技术专家男女人数
import urllib,re,sets
male=0
female=0
def test(m):
global male
global female
# for i in range(m,n):
sock=urllib.urlopen("http://hi.csdn.net/RankingStaticPage/True/3/5001/%d.htm"%m)
source=sock.read()
namepattern=re.compile("a href='/\w*/profile")
link=namepattern.findall(source)
# link={}.fromkeys(link).keys()
link=list(set(link))
for j in link:
sock=urllib.urlopen("http://hi.csdn.net/"+j[9:])
source=sock.read()
if source.find("她的博客")!=-1:
female+=1
elif source.find("他的博客")!=-1:
male+=1
if __name__== '__main__':
for i in range(0,2889/15+1):
test(i)
print "男专家%d人,女专家%d人"%(male,female)