Web scraping code for extract any text by site.

Web scraping code for extract any text by site. 
Penetration testing and cyber security, example in Python language. 

from scrapy.selector import Selector

from scrapy import Spider

from wikiSpider.items import Article

class ArticleSpider(Spider):

 name="article"

 allowed_domains = ["mydomain"]

 start_urls = ["http://mysite.xxx",

 "http://mysite.xxx/mypage"]

 def parse(self, response):

 item = Article()

 title = response.xpath('//h1/text()')[0].extract()

 print("Title is: "+title)

 item['title'] = title

 return item

#python #linux #programming #cybersecurity #ethicalhacker #hacking #penetration-testing #penetrationtester #webscraping

Commenti

Post popolari in questo blog