#!/usr/bin/env ruby # # Author: IKARASHI Yoshinori # Created: 2005-09-17 21:45:01 JST # Modified: 2007-10-01 23:4426: # Licence: GPLv2 # $Id$ # =begin = mixi2gmail description mixi2gmail feeds mixi diary to some e-mail account, such as gmail. = configuration this program requires configuration file at $HOME/.mixi/mixirc . It should be including parameters as follows: username = foo@example.com # mixi account e-mail address password = xxxxxxxx # mixi login password mailaddress = foo@gmail.com # e-mail address mail to send mailfrom = mixi@init.org # dummy email From address smtpserver = localhost # it dosen't work yet because of # mechanize override problem excludelist = 1111, 34567 # skip list watchlist = 98765, 43210, 2222 # watch list who are not your friend = website http://trac.yoosee.net/mixi2gmail/ =end $LOAD_PATH.unshift("/home/yoshi/work/web/mixi") require 'nkf' require 'mechanize' require 'net/smtp' require 'time' require 'optparse' module Mixi class Config attr_reader :config def initialize(rcfile) @rcfile = rcfile @config = Hash.new config_file_parser end def config_file_parser(rcfile=@rcfile) return nil unless File.exist? rcfile File.open(rcfile) do |file| file.each do |line| key, value = line.chomp.split(/\s*=\s*/) if /,/ =~ value values = value.split(/\s*,\s*/) value = values end @config[key] = value end end end def command_line_parser opt = OptionParser.new end end IMAGEBLOCK_REGEXP = ']*?>' IMAGEBLOCK_CONTAINER_REGEXP = '
\s*
' USERNAME_REGEXP = '
\s*

(.+)さんの日記

\s*
' DIARY_REGEXP = '
\s*
(.+) .+?
(\d{4}年\d{2}月\d{2}日)(\d{2}:\d{2})
\s*
\s*
(.+?)
' COMMENT_REGEXP = '(.+?) \s* (\d{4}年\d{2}月\d{2}日) (\d{2}:\d{2}) \s* \s*
(.+?)
' class Diary attr_accessor :author, :datetime, :title, :imagepages, :images, :body, :url, :comments def initialize @comments = Array.new @imagepages = Array.new end def mkdatetime date, time if(/(\d{4})\D+(\d{2})\D+(\d{2})/) =~ date year, month, date = $1, $2, $3 end if(/(\d{2}):(\d{2})/) =~ time hour, minute = $1, $2 end Time.local(year,month,date,hour,minute) end def parse html html = NKF::nkf('-ed', html) author, datetime, title, body = nil, nil, nil, nil @author = nil html.gsub!(/#{IMAGEBLOCK_REGEXP}/) { imagepage_uri = $1 next unless imagepage_uri @imagepages.push imagepage_uri puts " [i] image #{imagepage_uri}" "" } html.gsub!(/#{IMAGEBLOCK_CONTAINER_REGEXP}/m,'') if /#{USERNAME_REGEXP}/m =~ html author = $1 end if /#{DIARY_REGEXP}/m =~ html title, date, time, body = $1, $2, $3, $4 puts " + #{author}: #{title} (#{date} #{time})" else puts " + failed to parse diary of #{author}" return false end @author = author @title = title @body = body @datetime = mkdatetime(date, time) html.gsub(/#{COMMENT_REGEXP}/m){ url, author, date, time, body = $1, $2, $3, $4, $5 @comments.push(DiaryComment.new(author, mkdatetime(date,time), url, body)) } return true end def html output = '' output << "

#{@title}

by #{@author} at #{@datetime.strftime('%Y-%m-%d %H:%M')}

" # @images.each do |image| # output << "\"[IMAGE]\" " # end output << "

#{@body}


" @comments.each do |comment| output << "

#{comment.author} at #{comment.datetime.strftime('%Y-%m-%d %H:%M')}

#{comment.body}
" end output << '' output end end class DiaryComment attr_accessor :author, :datetime, :url, :body def initialize(author, datetime, url, body) @author, @datetime, @url, @body = author, datetime, url, body end end class DiaryFetcher attr_accessor :username, :password, :lastidfile, :logfile, :debug, :excludelist, :wait attr_reader :lastid def initialize(username, password) @username, @password = username, password @logfile = ENV['HOME'] + '/.mixi/' + 'mixi-access.log' @lastidfile = ENV['HOME'] + '/.mixi/' + 'lastid.log' @excludelist = Array.new @wait = 3 login @lastid = File.open(@lastidfile).read.to_i @maxid = @lastid @debug = false end def login @agent = WWW::Mechanize.new {|a| a.log = Logger.new(@logfile) } @agent.user_agent_alias = 'Windows Mozilla' puts "access to http://mixi.jp as #{@agent.user_agent}" page = @agent.get('http://mixi.jp/') # page = @agent.get('mixi.html') form = page.forms[0] # form.fields.find {|f| p f} form.fields.find {|f| f.name == 'email'}.value = @username form.fields.find {|f| f.name == 'password'}.value = @password form.fields.find {|f| f.name == 'next_url'}.value = '/home.pl' page = @agent.submit(form, form.buttons.first) puts "login to mixi as #{@username}." if @debug if /url=([^"])"/ =~ page.body link = 'http://mixi.jp' + $1.to_s @agent.get(link) end sleep @wait if @wait > 0 end def fetch_user_diary ownerid listurl = "http://mixi.jp/list_diary.pl?id=#{ownerid}" page = @agent.get(listurl) diary = Array.new page.links.each do |link| if /続きを読む/ =~ link.node.text && /view_diary.pl\?id=(\d+)&owner_id=(\d+)/ =~ link.href diaryid = $1.to_i @maxid = diaryid if @maxid < diaryid if diaryid <= @lastid # puts "pre-fetched diary: #{link.href}" if @debug next end sleep @wait if @wait > 0 mixidiary = Diary.new mixidiary.url = link.href mixidiary.parse(@agent.get(link.href).body) diary.push mixidiary end end return diary end def fetch_friend_diary listurl = 'http://mixi.jp/new_friend_diary.pl' page = @agent.get(listurl) list = Array.new page.body.gsub(/view_diary.pl\?id=(\d+)&owner_id=(\d+)/) do |href| diaryid, ownerid = $1.to_i, $2.to_i @maxid = diaryid if @maxid < diaryid if @lastid.to_i >= diaryid # puts "pre-fetched diary: #{href}" if @debug next end if @excludelist.include? ownerid.to_s # to_s is temporaly... puts "skip #{$1} because of exclude list" if @debug next end list.push ownerid end diary = Array.new list.reverse.each do |ownerid| sleep @wait if @wait > 0 puts "fetching diary: id=#{ownerid}" if @debug userdiary = fetch_user_diary(ownerid) diary << userdiary unless userdiary.empty? end return diary.flatten end def fetch_image imagepage_uri return if imagepage_uri.empty? # puts " + try to get #{imagepage_uri}" image = nil page = @agent.get("http://mixi.jp/" + imagepage_uri) page.body.gsub(/]*>/i) { src = $1 puts " -> fetch image: #{src}" image = @agent.get(src).body } return image end def close File.open(@lastidfile, 'w') do |file| file.write(@maxid) end end end end def send_mail(text, subject, datetime, mail_from, recipients, images) header = '' imagebody = '' unless images.empty? boundary = '__mixi2gmail_123456789ABCDEFGHIJ__' header << "Content-Type: multipart/mixed; boundary=\"#{boundary}\"" content = '' tmp = text text = "--#{boundary}\n" text << "Content-Type: text/html;\n\n" text << tmp text << "\n\n" images.each do |image| filename = Time.now.to_i.to_s + '_' + rand(1000).to_i.to_s + '.jpg' content = "--#{boundary}\n" # content << "Content-Type: application/octed-stream;\n" content << "Content-Type: image/jpeg;\n" content << " name=\"#{filename}\"\n" content << "Content-Transfer-Encoding: base64\n" content << "Content-Disposition: attachment;\n" content << " filename=\"#{filename}\"\n\n" content << [image].pack('m') content << "\n\n" imagebody << content end imagebody << "--#{boundary}\n" end text << imagebody tempfile = Tempfile.open('/tmp') tempfile.write NKF.nkf('-j',text) tempfile.close system("cat #{tempfile.path} | mailx -s \"#{NKF.nkf('-M', subject)}\" -a 'X-ML-Name: mixi' -a 'From: mixi diary <#{mail_from}>' -a 'Content-Type: text/html; charset=\"iso-2021-jp\"' -a 'Date: #{datetime.rfc2822}' -a '#{header}' #{recipients}") tempfile.unlink return # following (net/smtp) dosen't work because of mechanize's override =begin smtpserver = 'localhost' message = "From: #{mail_from} To: #{recipients} Subject: #{NKF::nkf('-M', subject)} Date: #{Time.now.rfc2822} MIME-Version: 1.0 X-ML-Name: mixi Content-Type: text/plain; charset=\"iso-2022-jp\" Content-Transfer-Encoding: 7bit #{NKF.nkf('-j',text)} " Net::SMTP.start(smtpserver, 25) do |smtp| smtp.send_message(message, mail_from, recipients) end =end end rcfile = "#{ENV['HOME']}/.mixi/mixirc" config = Mixi::Config.new(rcfile) unless config puts "configuration file #{rcfile} not found." exit end agent = Mixi::DiaryFetcher.new(config.config['username'], config.config['password']) agent.debug = true agent.excludelist = config.config['excludelist'] agent.fetch_friend_diary.reverse.each do |diary| begin images = Array.new diary.imagepages.each do |imagepage| images.push agent.fetch_image(imagepage) sleep 2 end send_mail(diary.html, diary.author+'さんの日記', diary.datetime, config.config['mailfrom'], config.config['mailaddress'], images) rescue puts "Error: #{diary.url if diary.url} => #{$!}:#{$@}" p diary end end config.config['watchlist'].each do |ownerid| agent.fetch_user_diary(ownerid).each do |diary| begin images = Array.new diary.imagepages.each do |imagepage| images.push agent.fetch_image(imagepage) sleep 2 end send_mail(diary.html, diary.author+'さんの日記', diary.datetime, config.config['mailfrom'], config.config['mailaddress'], images) rescue puts "Error: #{diary.url if diary.url} => #{$!}:#{$@}" end end end agent.close