Diff for /mail2lj/mail2lj.pl between versions 1.1 and 1.7

version 1.1, 2007/08/12 19:54:44 version 1.7, 2007/08/14 20:59:15
Line 42 Line 42
 # Original script seems to be distributed as freeware, so I stick to that  # Original script seems to be distributed as freeware, so I stick to that
 # decision.  No warranty whatsoever, of course - use at your own risk ;-).  # decision.  No warranty whatsoever, of course - use at your own risk ;-).
 #  #
   # Changes by Boris Veytsman - added --cut option
   #
 # ------------------------------------------------------------------------  # ------------------------------------------------------------------------
   
 use     strict ;  use     strict ;
Line 75  my $host = $ENV{MAIL2LJ_DOMAIN} || "ledo Line 77  my $host = $ENV{MAIL2LJ_DOMAIN} || "ledo
 # my    $home = $ENV{HOME} || "/home/mail2lj" ;                 # Changed by LG  # my    $home = $ENV{HOME} || "/home/mail2lj" ;                 # Changed by LG
 my      $home = $ENV{HOME} || "/tmp/mail2lj" ;  my      $home = $ENV{HOME} || "/tmp/mail2lj" ;
   
   # Changed by LG - added because sometimes procmail doesn't set $USER.
   my      $SysUser = $ENV{USER} || $ENV{LOGNAME} || getpwuid($>) || $> ;
   
 # Changed by LG.  Specifies the default incoming and outgoing charset for  # Changed by LG.  Specifies the default incoming and outgoing charset for
 # all e-mails (i.e, the posts CONTENT and the script replies).    # all e-mails (i.e, the posts CONTENT and the script replies).  
 # For incoming mails, the MIME header is analyzed and actual MIME charset  # For incoming mails, the MIME header is analyzed and actual MIME charset
Line 115  my %tr = ( Line 120  my %tr = (
   
 # ------------------------------------------------------------------------ #  # ------------------------------------------------------------------------ #
 # Changed by LG - added parsing of command line.  # Changed by LG - added parsing of command line.
   # Changed by BV - added options cut
 # ------------------------------------------------------------------------ #  # ------------------------------------------------------------------------ #
 my      %Opt = ();                      # Main options go here  my      %Opt = ();                      # Main options go here
 my      $opt_h ;                        # Help flag  my      $opt_h ;                        # Help flag
Line 123  my  $opt_addfrom ;    # Add the From fie Line 129  my  $opt_addfrom ;    # Add the From fie
 my      $opt_addfromh ;                 # Add the htmlized From to the post  my      $opt_addfromh ;                 # Add the htmlized From to the post
 my      $opt_keepspaces ;               # HTML-encode multiple spaces in e-mail  my      $opt_keepspaces ;               # HTML-encode multiple spaces in e-mail
 my      @opt_taglist ;                  # command-line taglist first goes here  my      @opt_taglist ;                  # command-line taglist first goes here
   my      $opt_ljcut ;                    # Add lj-cut after line number N
   my      $ljcut_delta = 5 ;              # No lj-cut if less lines left after it
   my      $opt_ljcut_text ;               # A text for lj-cut.
 my      $Parse = GetOptions( \%Opt,  my      $Parse = GetOptions( \%Opt,
                         'user|u=s',                          'user|u=s',
                         'password|passwd|p=s',                          'password|passwd|p=s',
Line 133  my  $Parse = GetOptions( \%Opt, Line 142  my  $Parse = GetOptions( \%Opt,
                         'prop_opt_backdated|backdated|back-dated|backdate|back-date|back!',                          'prop_opt_backdated|backdated|back-dated|backdate|back-date|back!',
                         'subject|subj|s=s',                          'subject|subj|s=s',
                         'taglist|tags|tag|t=s' => \@opt_taglist,  # Will tweak                          'taglist|tags|tag|t=s' => \@opt_taglist,  # Will tweak
                           'notaglist|notags|notag|not|no-taglist|no-tags|no-tag|no-t' => sub {undef @opt_taglist},
                         'usejournal|use-journal|use|journal|j=s',                          'usejournal|use-journal|use|journal|j=s',
                         'prop_current_mood|current_mood|mood=s',                          'prop_current_mood|current_mood|mood=s',
                         'prop_current_music|current_music|music=s',                          'prop_current_music|current_music|music=s',
Line 142  my  $Parse = GetOptions( \%Opt, Line 152  my  $Parse = GetOptions( \%Opt,
                         'bounces|bounce|b=s' => \$opt_bounces,                          'bounces|bounce|b=s' => \$opt_bounces,
                         'addfrom|add-from|from!' => \$opt_addfrom,                          'addfrom|add-from|from!' => \$opt_addfrom,
                         'addfromh|add-fromh|fromh!' => \$opt_addfromh,                          'addfromh|add-fromh|fromh!' => \$opt_addfromh,
                           'ljcut|lj-cut|cut|l=i'=>\$opt_ljcut,
                           'ljcut-text|lj-cut-text|cut-text|ljcuttext|cuttext=s'=>\$opt_ljcut_text,
                         'keep-spaces|keep-space|keepspaces|keepspace|spaces|space!' => \$opt_keepspaces,                          'keep-spaces|keep-space|keepspaces|keepspace|spaces|space!' => \$opt_keepspaces,
                         'help|h' => \$opt_h,                          'help|h' => \$opt_h,
                            );                             );
Line 198  if ( exists $Opt{'comments'} ) { Line 210  if ( exists $Opt{'comments'} ) {
 # with other parameters.  # with other parameters.
 $Opt{'prop_taglist'} = join( ", ", @opt_taglist )  if ( @opt_taglist ) ;  $Opt{'prop_taglist'} = join( ", ", @opt_taglist )  if ( @opt_taglist ) ;
   
 # Convert all command line options to unicode.  # Convert $opt_ljcut_text to UTF8.
   if ( defined $opt_ljcut_text ) {
      $opt_ljcut_text = 
           to_utf8({ -string => $opt_ljcut_text, -charset => $SystemCharset }) ;
   }
   
   # Convert all %Opt command line options to unicode.
 # Function href2utf8() uses a reference to input hash, so %Opt is  # Function href2utf8() uses a reference to input hash, so %Opt is
 # being modified "in-place".  # being modified "in-place".
 href2utf8( \%Opt, $SystemCharset) ;  href2utf8( \%Opt, $SystemCharset) ;
Line 213  my $alias = shift @ARGV || "none" ; Line 231  my $alias = shift @ARGV || "none" ;
 my      $mp = new MIME::Parser() or die "new MIME::Parser(): $!\n" ;  my      $mp = new MIME::Parser() or die "new MIME::Parser(): $!\n" ;
   
   
 # Changed by LG - changed directory.  # Changed by LG - changed directory to be user and process-specific.
 # $mp->output_dir("$home/mimetmp") ;  # $mp->output_dir("$home/mimetmp") ;
 $mp->output_dir("/tmp/mimetmp-$ENV{user}") ;  $mp->output_dir("/tmp/mimetmp-" . $SysUser . "-$$") ;
 mkdir $mp->output_dir if not -d $mp->output_dir ;       # Create it if missing  mkdir $mp->output_dir if not -d $mp->output_dir ;       # Create it if missing
   
 # Get the whole mail.  # Get the whole mail.
   # Changed by LG - added removal of output directory.
 my      $me = $mp->parse(\*STDIN) ;  my      $me = $mp->parse(\*STDIN) ;
 END     { $me && $me->purge() } ;  END     { $me and $me->purge() ;
             rmdir $mp->output_dir if -d $mp->output_dir 
                   or print STDERR "Error removing $mp->output_dir: $!\n" ;
           } ;
   
   
 # Changed by LG -  different log file name.  # Changed by LG -  different log file name.
 # open(STDERR, ">>$home/generic.log") or die "open(`log'): $!\n" ;  # open(STDERR, ">>$home/generic.log") or die "open(`log'): $!\n" ;
Line 514  sub post_body2href { Line 537  sub post_body2href {
   
                 # Changed by LG - added 'tags' option.                    # Changed by LG - added 'tags' option.  
                 } elsif ($var =~ /^tags?$/ || $var eq "taglist") {                  } elsif ($var =~ /^tags?$/ || $var eq "taglist") {
                         $req_data->{prop_taglist} = $val;                      $req_data->{prop_taglist} = $val;
   
                   # Changed by LG - added 'notags' option.  Empty the preceding
                   # taglist if set to true, otherwise do nothing 
                   } elsif ($var =~ /^no-?tags?$/ || $var eq "no-?taglist") {
                       $req_data->{prop_taglist} = "" if $val =~ /^\s*((on)|(yes))\s*$/i ;
   
                 # Anything else - just assign.                  # Anything else - just assign.
                 } else {                  } else {
Line 558  sub hdr2utf8 { Line 586  sub hdr2utf8 {
         return $r ;          return $r ;
 }  }
   
   
   # Changed by LG - added this subroutine for a shortcut call to to_utf8().
   # All it does is conversion of a string to utf8.
   sub     str2utf8 {
           my      ($s, $e) = @_;
           my      $r = "" ;
   
           $r .= to_utf8({ -string => $s, -charset => $e }) ;
           return $r ;
   }
   
 sub     post_me2req {  sub     post_me2req {
         my      ($me, $e, $hints) = @_ ;          my      ($me, $e, $hints) = @_ ;
         my      $mebh = $me->bodyhandle() or die "post_message(): no body?\n" ;          my      $mebh = $me->bodyhandle() or die "post_message(): no body?\n" ;
Line 565  sub post_me2req { Line 604  sub post_me2req {
         my      $charset = $mehh->mime_attr("content-type.charset") || $e ;          my      $charset = $mehh->mime_attr("content-type.charset") || $e ;
         my      $subject = hdr2utf8($me->get('Subject') || "", $charset) ;          my      $subject = hdr2utf8($me->get('Subject') || "", $charset) ;
         chomp $subject ;                                        # Changed by LG          chomp $subject ;                                        # Changed by LG
           # Changed by LG.
         # Changed by LG          my $from = hdr2utf8($me->get('From') || "", $charset) ;
         my      $from = hdr2utf8($me->get('From') || "", $charset) ;  
         chomp $from ;          chomp $from ;
   
         my      $hr = href2utf8(post_body2href($mebh->open("r")), $charset) ;          my      $hr = href2utf8(post_body2href($mebh->open("r")), $charset) ;
Line 588  sub post_me2req { Line 626  sub post_me2req {
         # Changed by LG - removed prefixing.          # Changed by LG - removed prefixing.
         # $hr->{subject} = "[mail2lj] " . $hr->{subject} ;          # $hr->{subject} = "[mail2lj] " . $hr->{subject} ;
   
         # Changed by LG - added options to add the 'From' field to the          # Changed by LG - added options to add the plain or HTML-ized 'From'
         # posted message.          # field to the posted message.
         if ( $opt_addfrom ) {          # 
            $hr->{event} = "From: $from" . "\n\n" . $hr->{event} ;          # NOTE: $from is already in UTF8, but the "From:" and HTML tags are
         } elsif ( $opt_addfromh ) {          #       not.  Strictly speaking, everything that goes to $hr->{event}
            my $html_from = "<nobr><i><b>From:</b> $from</i></nobr>" ;          #       MUST ALSO BE IN UTF8.  A cheating shortcut is possible:
            $html_from =~ s/\@/[_\@_]/g ;          #       since all lower ASCII characters are guaranteed to have
            $hr->{event} = $html_from . "\n\n" . $hr->{event} ;          #       the same values in UTF8 as in plain ISO-8859-1, you could
         }          #       possibly stick ASCII strings to $from without risk.  But in
           #       order to add something non-ASCII, you absolutely MUST convert
           #       it to UTF8 first!  To avoid the risk of forgetting this, the
           #       following substitutions are done in a _proper_ (albeit 
           #       somewhat awkward) way.
           if ( $opt_addfrom || $opt_addfromh ) {
   
              # Assemble the added From string in UTF8.
              my $added_from ;
              if ( $opt_addfrom ) {
                 $added_from = str2utf8("From: ", "ISO-8859-1") 
                               . $from . str2utf8("\n\n", "ISO-8859-1") ;
              } elsif ( $opt_addfromh ) {
                 $added_from = str2utf8("<nobr><i><b>From:</b> ", "ISO-8859-1" )
                               . $from
                               . str2utf8("</i></nobr>\n\n", "ISO-8859-1") ;
              }
   
              # Obfuscate the address.
              my $olddog = str2utf8("\@", "ISO-8859-1") ;          # @ in utf8
              my $newdog = str2utf8("[_\@_]", "ISO-8859-1") ;      # [_@_] in utf8
              $added_from =~ s/$olddog/$newdog/g ;                 # Obfuscate
              $hr->{event} = $added_from . $hr->{event} ;          # And append
           } 
   
         # Changed by LG - added an option to preserve (html-ize) multiple          # Changed by LG - added an option to preserve (html-ize) multiple
         # spaces and tabs (convert '\t' to eight '&nbsp;' and convert          # spaces and tabs (convert '\t' to eight '&nbsp;' and convert
         # multiple continuous spaces into sequence of ' &nbsp;').          # multiple continuous spaces into sequence of ' &nbsp;').
         # Lines with tabs are additionally wrapped in <nobr>...</nobr> tags.          # Lines with tabs are additionally wrapped in <nobr>...</nobr> tags.
           #
           # NOTE: These tags should be in UTF8.  But since HTML tags themselves
           #       are *certainly* in lower ASCII, we can safely stick them on
           #       top of the existing UTF8 post.  But if you dare to add 
           #       anything more than ASCII-markup, you'd better str2utf8() it
           #       first!  See note in the $opt_addfrom/$opt_addfromh processing above.
         if ( $opt_keepspaces ) {          if ( $opt_keepspaces ) {
            $hr->{event} =~ s/^(.*\t.*)$/<nobr>$1<\/nobr>/gm ;             $hr->{event} =~ s/^(.*\t.*)$/<nobr>$1<\/nobr>/gm ;
            $hr->{event} =~ s/\t/\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;/g ;             $hr->{event} =~ s/\t/\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;/g ;
            $hr->{event} =~ s/  / \&nbsp;/g ;             $hr->{event} =~ s/  / \&nbsp;/g ;
         }          }
           
           #
           # Change by BV - added the option to put lj-cut after '--cut XX' lines
           #
           # Tweaked by LG - only adding lj-cut if more than $ljcut_delta lines
           # is left in the posting.  Also added $opt_ljcut_text.
           #
           if ($opt_ljcut>0) {
               my $nlines = scalar( my @junk=split( /\n/, $hr->{event}, -1) ) - 1;
               my $start=0;
               for (my $i=0; $i<$opt_ljcut; $i++) {
                   $start=index($hr->{event},"\n",$start)+1;
                   if ($start == 0) {
                       last;
                   }
               }
               # And insert the lj-cut if not too close to the end of the post.
               if ($start>0 ) {
                   if ( $nlines >= $opt_ljcut+$ljcut_delta ) {
                      my $ljcut = ( $opt_ljcut_text =~ /^\s*$/ ) ?
                                   '<lj-cut>' :
                                   '<lj-cut text="' . $opt_ljcut_text . '">' ;
                      substr($hr->{event}, $start,0) = $ljcut ;
                   } else {
                      print STDERR "'--cut $opt_ljcut' requested, which is " .
                                   "within $ljcut_delta of the total $nlines " .
                                   "lines. Skipping lj-cut.\n" ;
                   }
               }
           }
   
         $req->content_type('application/x-www-form-urlencoded');          $req->content_type('application/x-www-form-urlencoded');
         $req->content(href2string $hr) ;          $req->content(href2string $hr) ;
Line 860  Options: Line 957  Options:
                single or double quotes to protect from the shell.  Multiple                 single or double quotes to protect from the shell.  Multiple
                '-t' options are allowed and taglists will be combined.                 '-t' options are allowed and taglists will be combined.
   
   --notaglist, --notags
                  Unsets all previously defined tags.  Thus, a call to 
                     $shortname ... --tags X --tags Y ... --notags --tags Z
                  will yield a taglist consisting of just "Z".  This option is
                  rarely needed and added only for the sake of completeness.
   
 -d DATE, --date DATE  -d DATE, --date DATE
                Label posting with this date.  Date should be in LiveJournal's                 Label posting with this date.  Date should be in LiveJournal's
                format: DD.MM.YYYY HH:mm.  If absent, current date/time is used.                 format: DD.MM.YYYY HH:mm.  If absent, current date/time is used.
Line 923  Options: Line 1026  Options:
                better preserved in the journal.  The option can be negated                 better preserved in the journal.  The option can be negated
                ('--nospaces').  Default is '--nospaces'.                 ('--nospaces').  Default is '--nospaces'.
   
   --ljcut NUM, --cut NUM, -l NUM
                  Inserts '<lj-cut>' after NUM lines of the post content.
                  If the resulting lj-cut happens to be within $ljcut_delta lines from
                  the end of the post, the cut will not be added.
   
   --ljcut-text TEXT, --cut-text TEXT, --cuttext TEXT
                  Text to use as lj-cut text parameter (in <lj-cut text="TEXT">).
                  If the text contains nothing but whitespace, it is ignored.
                  Remember to quote spaces and special characters from the shell.
   
 --charset CHARSET  --charset CHARSET
                This option tells the script that all COMMAND LINE options are                 This option tells the script that all COMMAND LINE options are
                given in this charset.  Default is "$SystemCharset".                 given in this charset.  Default is "$SystemCharset".
Line 931  Options: Line 1044  Options:
                utf8).  It also has absolutely no effect on the in-the-body                 utf8).  It also has absolutely no effect on the in-the-body
                keywords (they are also governed by email's charset).  This                 keywords (they are also governed by email's charset).  This
                option is meaningful ONLY for the text that you supply VIA                 option is meaningful ONLY for the text that you supply VIA
                COMMAND LINE (e.g. '-s Subject').                 COMMAND LINE (e.g. '-s Subject' or '--cuttext TEXT').
                                 
 -b xxx\@yyy, --bounces xxx\@yyy  -b xxx\@yyy, --bounces xxx\@yyy
                Normally, if errors occur during posting (e.g. wrong password),                 Normally, if errors occur during posting (e.g. wrong password),
Line 959  command line options), they should look Line 1072  command line options), they should look
         Security: private          Security: private
         Subject: Rzhevskij zhiv!          Subject: Rzhevskij zhiv!
         Tags:  Junk, Viva Rzhevskij!          Tags:  Junk, Viva Rzhevskij!
           Notags: yes                     # Clears all preceding tags
         Formatted: on                   # Or equivalent "Autoformat: off"          Formatted: on                   # Or equivalent "Autoformat: off"
         Usejournal: gusary          Usejournal: gusary
         Mood: okay          Mood: okay

Removed from v.1.1  
changed lines
  Added in v.1.7


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>