Web Scraping with Nokogiri (4)

Put some space between titles.

require 'open-uri'
require 'nokogiri'

url = "http://nuttycellist-unknown.blogspot.jp/"

loop do
  charset = nil
  html = open(url) do |f|
    charset = f.charset
    f.read
  end

  doc = Nokogiri::HTML.parse(html, nil, charset)

  doc.css('.date-outer').each do |node|
    print node.css('.date-header').inner_html
    print '<span style="margin-left: -60px;"></span>'
    node.css('.entry-title').each do |title|
      print '<span style="margin-left: 80px;"></span>'
      print title.inner_html
    end
    puts '<br>'
  end

  unless doc.css('.blog-pager-older-link').empty?
    url = doc.css('.blog-pager-older-link').attribute('href').value
  else
    break
  end
end

The output file is here.

Leave a Reply

Your email address will not be published. Required fields are marked *