class AljazeeraSpider(XMLFeedSpider):
name = "aljazeera"
allowed_domains = ["aljazeera.com"]
start_urls = [
'http://www.aljazeera.com/',
]
def parse(self, response):
hxs = HtmlXPathSelector(response) # The xPath selector
titles = hxs.select('//div[contains(@class,"SkyScrapperBoxes")]/div[contains(@class,"skyscLines")]')
if not titles:
MailNotify().send_mail("Aljazeera", "Scraper Report")
items = []
for titles in titles:
item = NewsItem()
item['title'] = escape(''.join(titles.select('a/text()').extract()))
item['link'] = "http://www.aljazeera.com" + escape(''.join(titles.select('a/@href').extract()))
item['description'] = ''
item = Request(item['link'], meta={'item': item}, callback=self.parse_detail)
items.append(item)
return items
def parse_detail(self, response):
item = response.meta['item']
sel = HtmlXPathSelector(response)
detail = sel.select('//td[@class = "DetailedSummary"]')
item['details'] = remove_html_tags(escape(''.join(detail.select('p').extract())))
item['location'] …
Run Code Online (Sandbox Code Playgroud) 我编写的PHP
代码使用Regular Expression
检查字符串包含UPPER CASE
和DIGITS
仅的组合.我无法测试$ str.
$str = "ABCD1234";
if(preg_match('[A-Z0-9]', $str)){
echo "yes";
}else{
echo "No";
}
Run Code Online (Sandbox Code Playgroud)