#!/usr/bin/env ruby # # Author: IKARASHI Yoshinori # License: GPLv2 # $Id: twitlogger.rb 13 2008-04-21 20:49:11Z yoosee $ # username = "" # write your twitter id password = "" # write your twitter password logdir = "" # logfile destination directory interval = 60 # limited api access 70 times per 60 minutes since_margin = 0 # set ~30 if you find some timeline dropping around intervals require 'open-uri' require 'uri' require 'timeout' require 'zlib' require 'rexml/document' TIMELINE_URI="http://twitter.com/statuses/friends_timeline.xml" def fetch uri, user, password opts = { "Accept-Encoding"=>"gzip, deflate", "User-Agent"=>'Mozilla/6.0 TwitLogger', :http_basic_authentication=>[user,password] } page, meta, content_type, status, message = nil, nil, nil, nil, nil begin page = timeout(15) { URI.parse(uri).open(opts) {|f| content_type = f.content_type f.read } } rescue OpenURI::HTTPError STDERR.puts "[#{Time.now.to_s}] HTTP Error #{$!}" return '' rescue StandardError, TimeoutError STDERR.puts "[#{Time.now.to_s}] Error #{$!}" return '' end if /\A\x1f\x8b/ =~ page begin page = Zlib::GzipReader.new(StringIO.new(page)).read || '' rescue Zlib::Error page = '(zlib decode error)' end end return page end def parse_xml page rexml = REXML::Document.new page a = Array.new lastmodified = nil rexml.elements.each("//statuses/status") do |status| begin name = status.elements["user/screen_name"].text time = Time.parse(status.elements["created_at"].text) lastmodified = time if !lastmodified or lastmodified < time text = status.elements["text"].text.chomp.gsub(/\n/, " / ") rescue puts "#{$!}:#{$@}" end a.push "#{time.strftime("%H:%M:%S")} #{name}: #{text}" end return a, lastmodified end if username.empty? or password.empty? STDERR.puts "Write your twitter id and password in twitlogger.rb" exit end logfile = Time.now.strftime("%Y%m%d.log") logger = File.open(logdir + logfile, "a") last_fetched = nil last=Array.new while(1) do if logfile != Time.now.strftime("%Y%m%d.log") logger.close logfile = Time.now.strftime("%Y%m%d.log") logger = File.open(logdir + logfile, "a") end uri = TIMELINE_URI uri += "?since=#{URI.encode((last_fetched-since_margin).httpdate)}" if last_fetched page = fetch(uri, username, password) if !page or page.empty? sleep interval next end a, last_fetched_temp = parse_xml(page) last_fetched = last_fetched_temp if last_fetched_temp logger.puts((a-last).reverse.join("\n")) unless (a-last).empty? logger.flush # 'last' pools latest 20 items for avoiding duplicate log lines. # (twitter api returns maximum 20 items at once). last += a - last last = last.last(20) if last.size > 20 sleep interval end logger.close