1 import requests  2 from bs4 import BeautifulSoup  3 import re  4 import traceback  5 
 6 def GetHtmlText(url):  7     for i in range(0,1):        #尝试两次
 8         try:  9             r=requests.get(url) 10             r.encoding = 'utf-8'
11  r.raise_for_status(); 12             return r.text; 13         except: 14  traceback.print_exc() 15             continue
16     return 
17 
18 def GetMovieInfo(url): 19     movieDict={} 20     for page in range(0,10): 21         try: 22             page_url = '?start='+str(page*25) 23             html = GetHtmlText(url+page_url) 24             Soup = BeautifulSoup(html, 'html.parser') 25             movie = Soup.find(name="ol",class_='grid_view') #所有电影信息
26             movieList = movie.find_all(name='li')  #电影信息列表
27             for single in movieList:        #循环单页的电影信息
28                 num = single.find(name='em').string    #电影排名
29                 title1 = single.find_all(name='span',class_='title') 30                 title2 = single.find(name='span',class_='other').string 31                 if len(title1)==2: 32                     movieTitle = title1[0].string+title1[1].string+title2.string 33                 else: 34                     movieTitle = title1[0].string+title2.string 35                 classBD = single.find(name='div',class_='bd').contents    #我也不知道为什么bs给我返回7个节点
36                 movieActor = classBD[1].text 37                 movieRating = re.findall(r'\d?\.\d?',str(classBD[3]))[0] 38                 movieQuote = classBD[5].text 39                 movieDict['num'] = num 40                 movieDict['movieTitle'] = movieTitle 41                 movieDict['actor'] = movieActor 42                 movieDict['rating'] = movieRating 43                 movieDict['quote'] = movieQuote 44  printMovieInfo(movieDict) 45         except: 46  traceback.print_exc() 47 
48 
49 def printMovieInfo(Info): 50     try: 51         with open('/home/why/py/movieInfo.txt','a',encoding='utf-8') as f: 52             f.write(str(Info['num']+Info['movieTitle']+'\n'+Info['actor']+'\n评分:'+Info['rating']+'\n评价:'+Info['quote']+'\n')) 53     except: 54  traceback.print_exc() 55 
56 
57 def main(): 58     base_url = 'https://movie.douban.com/top250'
59  GetMovieInfo(base_url) 60 main()

更多相关文章

  1. 运用Python语言编写获取Linux基本系统信息(三):Python与数据库编
  2. 使用python实现一个简单的学生信息管理系统
  3. Python脚本如何获取当前环节和用户等信息
  4. 使用/proc/meminfo文件查看内存状态信息
  5. 通过指令“ps -l”查看进程信息
  6. 为什么函数在ELF中的其他共享库的长度信息?
  7. 如何从PHP的mail()失败中获取额外的错误信息?
  8. Linux下CPU显示信息解释
  9. linux查看硬件信息及驱动设备相关整理

随机推荐

  1. Android一些经常涉及到的权限【转】
  2. This text field does not specify an in
  3. android wrapper C调用java api
  4. Android跨进程通信IPC之11——Binder驱动
  5. android上传图片至服务器
  6. Android导出Kml
  7. minSdkVersion,targetSdkVersion,maxSdkVer
  8. Android开发学习---使用Intelij idea 13.
  9. android画图——Path()的使用
  10. 坑爹的android碎片化