defsub_article(self,response): soup = BeautifulSoup(response.text,'html.parser') print('\n',soup.select('.title')[0].text) for i in soup.select('.toc-text'): print('\t',i.text)
classBlogSpider(scrapy.Spider): name = 'blogurl' start_urls = ['https://www.teamssix.com']
defparse(self,response): soup = BeautifulSoup(response.text,'html.parser') for i in soup.select('.post-title'): url = 'https://www.teamssix.com{}'.format(i['href']) yield scrapy.Request(url,callback=self.sub_article)
defsub_article(self,response): soup = BeautifulSoup(response.text,'html.parser') title = self.article_title(soup) list = self.article_list(soup) print(title) for i inlist: print('\t',i)