Diff for /mail2lj/mail2lj.pl between versions 1.3 and 1.10

version 1.3, 2007/08/13 15:06:38 version 1.10, 2014/05/06 00:42:15
Line 30 Line 30
 #     original From: address).  This is convenient when you want to notify   #     original From: address).  This is convenient when you want to notify 
 #     script maintainer instead of the poster (exactly what I need).  #     script maintainer instead of the poster (exactly what I need).
 #  #
   # Changes by Boris Veytsman - added --cut option
   #
   # Changes by LG: added --obfuscate option to protect e-mails in the body.
 #  #
 # NB: to generate MD5 hash of your password, use the following command:  # NB: to generate MD5 hash of your password, use the following command:
 #     perl -MDigest::MD5 -e 'print Digest::MD5::md5_hex("yourpassword")."\n"'  #     perl -MDigest::MD5 -e 'print Digest::MD5::md5_hex("yourpassword")."\n"'
Line 42 Line 45
 # Original script seems to be distributed as freeware, so I stick to that  # Original script seems to be distributed as freeware, so I stick to that
 # decision.  No warranty whatsoever, of course - use at your own risk ;-).  # decision.  No warranty whatsoever, of course - use at your own risk ;-).
 #  #
 # Changes by Boris Veytsman - added --cut option  
 #  
 # ------------------------------------------------------------------------  # ------------------------------------------------------------------------
   
 use     strict ;  use     strict ;
Line 54  use HTTP::Request ; Line 55  use HTTP::Request ;
 use     URI::Escape ;  use     URI::Escape ;
 use     MIME::Parser ;  use     MIME::Parser ;
 use     MIME::Words qw/decode_mimewords encode_mimeword/ ;  use     MIME::Words qw/decode_mimewords encode_mimeword/ ;
 use     Unicode::MapUTF8 qw/to_utf8 from_utf8/ ;  use     Unicode::MapUTF8 qw/to_utf8 from_utf8 utf8_charset_alias/ ;
 use     HTML::TokeParser ;  use     HTML::TokeParser ;
   
 # Changed by LG - commented out configs.  # Changed by LG - commented out configs.
Line 77  my $host = $ENV{MAIL2LJ_DOMAIN} || "ledo Line 78  my $host = $ENV{MAIL2LJ_DOMAIN} || "ledo
 # my    $home = $ENV{HOME} || "/home/mail2lj" ;                 # Changed by LG  # my    $home = $ENV{HOME} || "/home/mail2lj" ;                 # Changed by LG
 my      $home = $ENV{HOME} || "/tmp/mail2lj" ;  my      $home = $ENV{HOME} || "/tmp/mail2lj" ;
   
   # Changed by LG - added because sometimes procmail doesn't set $USER.
   my      $SysUser = $ENV{USER} || $ENV{LOGNAME} || getpwuid($>) || $> ;
   
 # Changed by LG.  Specifies the default incoming and outgoing charset for  # Changed by LG.  Specifies the default incoming and outgoing charset for
 # all e-mails (i.e, the posts CONTENT and the script replies).    # all e-mails (i.e, the posts CONTENT and the script replies).  
 # For incoming mails, the MIME header is analyzed and actual MIME charset  # For incoming mails, the MIME header is analyzed and actual MIME charset
Line 110  my %tr = ( Line 114  my %tr = (
 'À' => 'yu', 'Ñ' => 'ya'  'À' => 'yu', 'Ñ' => 'ya'
 );  );
   
   # What to convert '@' to when obfuscating e-mail addresses (in '--add-from'
   # and/or '--obfuscate' modes.
   my $newdog = '[_@_]';
   
 # ------------------------------------------------------------------------ #  # ------------------------------------------------------------------------ #
 # End configuration settings.  # End configuration settings.
 # ------------------------------------------------------------------------ #  # ------------------------------------------------------------------------ #
Line 129  my  @opt_taglist ;    # command-line tag Line 137  my  @opt_taglist ;    # command-line tag
 my      $opt_ljcut ;                    # Add lj-cut after line number N  my      $opt_ljcut ;                    # Add lj-cut after line number N
 my      $ljcut_delta = 5 ;              # No lj-cut if less lines left after it  my      $ljcut_delta = 5 ;              # No lj-cut if less lines left after it
 my      $opt_ljcut_text ;               # A text for lj-cut.  my      $opt_ljcut_text ;               # A text for lj-cut.
   my      $opt_obfuscate ;                # Obfuscate e-mail addresses in body
 my      $Parse = GetOptions( \%Opt,  my      $Parse = GetOptions( \%Opt,
                         'user|u=s',                          'user|u=s',
                         'password|passwd|p=s',                          'password|passwd|p=s',
Line 139  my  $Parse = GetOptions( \%Opt, Line 148  my  $Parse = GetOptions( \%Opt,
                         'prop_opt_backdated|backdated|back-dated|backdate|back-date|back!',                          'prop_opt_backdated|backdated|back-dated|backdate|back-date|back!',
                         'subject|subj|s=s',                          'subject|subj|s=s',
                         'taglist|tags|tag|t=s' => \@opt_taglist,  # Will tweak                          'taglist|tags|tag|t=s' => \@opt_taglist,  # Will tweak
                           'notaglist|notags|notag|not|no-taglist|no-tags|no-tag|no-t' => sub {undef @opt_taglist},
                         'usejournal|use-journal|use|journal|j=s',                          'usejournal|use-journal|use|journal|j=s',
                         'prop_current_mood|current_mood|mood=s',                          'prop_current_mood|current_mood|mood=s',
                         'prop_current_music|current_music|music=s',                          'prop_current_music|current_music|music=s',
Line 151  my  $Parse = GetOptions( \%Opt, Line 161  my  $Parse = GetOptions( \%Opt,
                         'ljcut|lj-cut|cut|l=i'=>\$opt_ljcut,                          'ljcut|lj-cut|cut|l=i'=>\$opt_ljcut,
                         'ljcut-text|lj-cut-text|cut-text|ljcuttext|cuttext=s'=>\$opt_ljcut_text,                          'ljcut-text|lj-cut-text|cut-text|ljcuttext|cuttext=s'=>\$opt_ljcut_text,
                         'keep-spaces|keep-space|keepspaces|keepspace|spaces|space!' => \$opt_keepspaces,                          'keep-spaces|keep-space|keepspaces|keepspace|spaces|space!' => \$opt_keepspaces,
                           'obfuscate|obfu|o!' => \$opt_obfuscate,
                         'help|h' => \$opt_h,                          'help|h' => \$opt_h,
                            );                             );
   
Line 194  if ( exists $Opt{'comments'} ) { Line 205  if ( exists $Opt{'comments'} ) {
       $Opt{'prop_opt_nocomments'} = "" ;        $Opt{'prop_opt_nocomments'} = "" ;
       $Opt{'prop_opt_noemail'} = 1 ;        $Opt{'prop_opt_noemail'} = 1 ;
    } elsif ( $Opt{'comments'} =~ /^\s*((off)|(no))\s*$/i ) {     } elsif ( $Opt{'comments'} =~ /^\s*((off)|(no))\s*$/i ) {
       $Opt{'prop_opt_nocomments'} = 1         $Opt{'prop_opt_nocomments'} = 1 ;
    } else {     } else {
       $Opt{'prop_opt_nocomments'} = $Opt{'comments'} ;        $Opt{'prop_opt_nocomments'} = $Opt{'comments'} ;
    }     }
Line 206  if ( exists $Opt{'comments'} ) { Line 217  if ( exists $Opt{'comments'} ) {
 # with other parameters.  # with other parameters.
 $Opt{'prop_taglist'} = join( ", ", @opt_taglist )  if ( @opt_taglist ) ;  $Opt{'prop_taglist'} = join( ", ", @opt_taglist )  if ( @opt_taglist ) ;
   
 # Convert all command line options to unicode.  # Convert $opt_ljcut_text to UTF8.
   $opt_ljcut = 0  unless defined $opt_ljcut ;             # Safety
   if ( defined $opt_ljcut_text ) {
      $opt_ljcut_text = 
           to_utf8({ -string => $opt_ljcut_text, -charset => $SystemCharset }) ;
   }
   
   # Convert all %Opt command line options to unicode.
 # Function href2utf8() uses a reference to input hash, so %Opt is  # Function href2utf8() uses a reference to input hash, so %Opt is
 # being modified "in-place".  # being modified "in-place".
 href2utf8( \%Opt, $SystemCharset) ;  href2utf8( \%Opt, $SystemCharset) ;
Line 216  href2utf8( \%Opt, $SystemCharset) ; Line 234  href2utf8( \%Opt, $SystemCharset) ;
 umask 077 ;  umask 077 ;
   
   
   # Changed by LG: make sure that 'UTF-8' is recognized as a valid charset
   # along with "UTF8" ;-)
   utf8_charset_alias({ 'UTF-8' => 'UTF8' });
   
   
 # Changed by LG - moved from above.  # Changed by LG - moved from above.
 my      $alias = shift @ARGV || "none" ;  my      $alias = shift @ARGV || "none" ;
 my      $mp = new MIME::Parser() or die "new MIME::Parser(): $!\n" ;  my      $mp = new MIME::Parser() or die "new MIME::Parser(): $!\n" ;
   
   
 # Changed by LG - changed directory.  # Changed by LG - changed directory to be user and process-specific.
 # $mp->output_dir("$home/mimetmp") ;  # $mp->output_dir("$home/mimetmp") ;
 $mp->output_dir("/tmp/mimetmp-".$ENV{USER}) ;  $mp->output_dir("/tmp/mimetmp-" . $SysUser . "-$$") ;
 mkdir $mp->output_dir if not -d $mp->output_dir ;       # Create it if missing  mkdir $mp->output_dir if not -d $mp->output_dir ;       # Create it if missing
   
 # Get the whole mail.  # Get the whole mail.
   # Changed by LG - added removal of output directory.
 my      $me = $mp->parse(\*STDIN) ;  my      $me = $mp->parse(\*STDIN) ;
 END     { $me && $me->purge() } ;  END     { $me and $me->purge() ;
             rmdir $mp->output_dir if -d $mp->output_dir 
                   or print STDERR "Error removing $mp->output_dir: $!\n" ;
           } ;
   
   
 # Changed by LG -  different log file name.  # Changed by LG -  different log file name.
 # open(STDERR, ">>$home/generic.log") or die "open(`log'): $!\n" ;  # open(STDERR, ">>$home/generic.log") or die "open(`log'): $!\n" ;
Line 522  sub post_body2href { Line 550  sub post_body2href {
   
                 # Changed by LG - added 'tags' option.                    # Changed by LG - added 'tags' option.  
                 } elsif ($var =~ /^tags?$/ || $var eq "taglist") {                  } elsif ($var =~ /^tags?$/ || $var eq "taglist") {
                         $req_data->{prop_taglist} = $val;                      $req_data->{prop_taglist} = $val;
   
                   # Changed by LG - added 'notags' option.  Empty the preceding
                   # taglist if set to true, otherwise do nothing 
                   } elsif ($var =~ /^no-?tags?$/ || $var eq "no-?taglist") {
                       $req_data->{prop_taglist} = "" if $val =~ /^\s*((on)|(yes))\s*$/i ;
   
                   # Changed by LG - added 'Obfuscate' option to protect e-mail
                   # addresses in the body of the message.
                   } elsif ($var =~ /^obfuscate$/ ) {
                       $val = 1 if $val =~ /^\s*((on)|(yes))\s*$/i ;
                       $val = 0 if $val =~ /^\s*((off)|(no))\s*$/i ;
                       $opt_obfuscate = $val ;
   
                 # Anything else - just assign.                  # Anything else - just assign.
                 } else {                  } else {
Line 566  sub hdr2utf8 { Line 606  sub hdr2utf8 {
         return $r ;          return $r ;
 }  }
   
   
   # Changed by LG - added this subroutine for a shortcut call to to_utf8().
   # All it does is conversion of a string to utf8.
   sub     str2utf8 {
           my      ($s, $e) = @_;
           my      $r = "" ;
   
           $r .= to_utf8({ -string => $s, -charset => $e }) ;
           return $r ;
   }
   
 sub     post_me2req {  sub     post_me2req {
         my      ($me, $e, $hints) = @_ ;          my      ($me, $e, $hints) = @_ ;
         my      $mebh = $me->bodyhandle() or die "post_message(): no body?\n" ;          # Changed by LG - if no body found (may happen sometimes in multipart
           # messages) then attempt to grab the very first MIME part.  This is
           # somewhat hack-ish, but generally works ;-)
           # my    $mebh = $me->bodyhandle() or die "post_message(): no body?\n" ;
           my      $mebh = $me->bodyhandle() ;
         my      $mehh = $me->head() ;          my      $mehh = $me->head() ;
           if ( ! defined $mebh ) {
               # Hack!  And get the corresponding header instead of overall one.
               $mebh = $me->parts(0)->bodyhandle() or die "post_message(): no body?\n" ;
               $mehh = $me->parts(0)->head() ;
           }
         my      $charset = $mehh->mime_attr("content-type.charset") || $e ;          my      $charset = $mehh->mime_attr("content-type.charset") || $e ;
         my      $subject = hdr2utf8($me->get('Subject') || "", $charset) ;          my      $subject = hdr2utf8($me->get('Subject') || "", $charset) ;
         chomp $subject ;                                        # Changed by LG          chomp $subject ;                                        # Changed by LG
           # Changed by LG.
         # Changed by LG          my $from = hdr2utf8($me->get('From') || "", $charset) ;
         my      $from = hdr2utf8($me->get('From') || "", $charset) ;  
         chomp $from ;          chomp $from ;
           my $olddog_utf8 = str2utf8("\@", "ISO-8859-1") ;    # @ in utf
           my $newdog_utf8 = str2utf8($newdog, "ISO-8859-1") ; # obfuscated in utf
   
         my      $hr = href2utf8(post_body2href($mebh->open("r")), $charset) ;          my      $hr = href2utf8(post_body2href($mebh->open("r")), $charset) ;
         my      $req = new HTTP::Request('POST', $post_uri) or          my      $req = new HTTP::Request('POST', $post_uri) or
Line 596  sub post_me2req { Line 657  sub post_me2req {
         # Changed by LG - removed prefixing.          # Changed by LG - removed prefixing.
         # $hr->{subject} = "[mail2lj] " . $hr->{subject} ;          # $hr->{subject} = "[mail2lj] " . $hr->{subject} ;
   
         # Changed by LG - added options to add the 'From' field to the  
         # posted message.          # Changed by LG - added option to obfuscate all e-mail addresses in
         if ( $opt_addfrom ) {          # the body of mail messages.
            $hr->{event} = "From: $from" . "\n\n" . $hr->{event} ;          if ( $opt_obfuscate ) {
         } elsif ( $opt_addfromh ) {             $hr->{event} =~ 
            my $html_from = "<nobr><i><b>From:</b> $from</i></nobr>" ;                 s/\b([-+_.\w]+)($olddog_utf8)([-_.\w]+)\b/$1${newdog_utf8}$3/g ;
            $html_from =~ s/\@/[_\@_]/g ;  
            $hr->{event} = $html_from . "\n\n" . $hr->{event} ;  
         }          }
   
   
           # Changed by LG - added options to add the plain or HTML-ized 'From'
           # field to the posted message.
           # 
           # NOTE: $from is already in UTF8, but the "From:" and HTML tags are
           #       not.  Strictly speaking, everything that goes to $hr->{event}
           #       MUST ALSO BE IN UTF8.  A cheating shortcut is possible:
           #       since all lower ASCII characters are guaranteed to have
           #       the same values in UTF8 as in plain ISO-8859-1, you could
           #       possibly stick ASCII strings to $from without risk.  But in
           #       order to add something non-ASCII, you absolutely MUST convert
           #       it to UTF8 first!  To avoid the risk of forgetting this, the
           #       following substitutions are done in a _proper_ (albeit 
           #       somewhat awkward) way.
           if ( $opt_addfrom || $opt_addfromh ) {
   
              # Assemble the added From string in UTF8.
              my $added_from ;
              if ( $opt_addfrom ) {
                 $added_from = str2utf8("From: ", "ISO-8859-1") 
                               . $from . str2utf8("\n\n", "ISO-8859-1") ;
              } elsif ( $opt_addfromh ) {
                 $added_from = str2utf8("<nobr><i><b>From:</b> ", "ISO-8859-1" )
                               . $from
                               . str2utf8("</i></nobr>\n\n", "ISO-8859-1") ;
              }
   
              # This address is alway obfuscated (independently of the 
              # '--obfuscate' option which only governs addresses _already_
              # in the body.
              $added_from =~ s/$olddog_utf8/$newdog_utf8/g ;       # Obfuscate
              $hr->{event} = $added_from . $hr->{event} ;          # And append
           } 
   
         # Changed by LG - added an option to preserve (html-ize) multiple          # Changed by LG - added an option to preserve (html-ize) multiple
         # spaces and tabs (convert '\t' to eight '&nbsp;' and convert          # spaces and tabs (convert '\t' to eight '&nbsp;' and convert
         # multiple continuous spaces into sequence of ' &nbsp;').          # multiple continuous spaces into sequence of ' &nbsp;').
         # Lines with tabs are additionally wrapped in <nobr>...</nobr> tags.          # Lines with tabs are additionally wrapped in <nobr>...</nobr> tags.
           #
           # NOTE: These tags should be in UTF8.  But since HTML tags themselves
           #       are *certainly* in lower ASCII, we can safely stick them on
           #       top of the existing UTF8 post.  But if you dare to add 
           #       anything more than ASCII-markup, you'd better str2utf8() it
           #       first!  See note in the $opt_addfrom/$opt_addfromh processing above.
         if ( $opt_keepspaces ) {          if ( $opt_keepspaces ) {
            $hr->{event} =~ s/^(.*\t.*)$/<nobr>$1<\/nobr>/gm ;             $hr->{event} =~ s/^(.*\t.*)$/<nobr>$1<\/nobr>/gm ;
            $hr->{event} =~ s/\t/\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;/g ;             $hr->{event} =~ s/\t/\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;/g ;
Line 620  sub post_me2req { Line 719  sub post_me2req {
         # Change by BV - added the option to put lj-cut after '--cut XX' lines          # Change by BV - added the option to put lj-cut after '--cut XX' lines
         #          #
         # Tweaked by LG - only adding lj-cut if more than $ljcut_delta lines          # Tweaked by LG - only adding lj-cut if more than $ljcut_delta lines
         # is left in the posting.          # is left in the posting.  Also added $opt_ljcut_text.
         #          #
         if ($opt_ljcut>0) {          if ($opt_ljcut>0) {
             my $nlines = scalar( my @junk=split( /\n/, $hr->{event}, -1) ) - 1;              my $nlines = scalar( my @junk=split( /\n/, $hr->{event}, -1) ) - 1;
Line 898  Options: Line 997  Options:
                single or double quotes to protect from the shell.  Multiple                 single or double quotes to protect from the shell.  Multiple
                '-t' options are allowed and taglists will be combined.                 '-t' options are allowed and taglists will be combined.
   
   --notaglist, --notags
                  Unsets all previously defined tags.  Thus, a call to 
                     $shortname ... --tags X --tags Y ... --notags --tags Z
                  will yield a taglist consisting of just "Z".  This option is
                  rarely needed and added only for the sake of completeness.
   
 -d DATE, --date DATE  -d DATE, --date DATE
                Label posting with this date.  Date should be in LiveJournal's                 Label posting with this date.  Date should be in LiveJournal's
                format: DD.MM.YYYY HH:mm.  If absent, current date/time is used.                 format: DD.MM.YYYY HH:mm.  If absent, current date/time is used.
Line 938  Options: Line 1043  Options:
                Insert the From: field from the e-mail as the first line of                  Insert the From: field from the e-mail as the first line of 
                the posted message.  The field is added in plain text (without                 the posted message.  The field is added in plain text (without
                any HTML-formatting - see '--fromh' for that).  For slight                  any HTML-formatting - see '--fromh' for that).  For slight 
                antispam protection, '\@' is replaced by '[_\@_]'.  The option                  antispam protection, '\@' is replaced by '$newdog'.  The option 
                can be negated ('--nofrom').  Default is '--nofrom'.                 can be negated ('--nofrom').  Default is '--nofrom'.
                  Note: this option is independent from '--obfuscate' (i.e. the
                  prepended From is always obfuscated, even if the rest of the 
                  message is not).
   
 --fromh, --addfromh  --fromh, --addfromh
                Same as '--from', but uses HTML-markup to highlight inserted                  Same as '--from', but uses HTML-markup to highlight inserted 
Line 947  Options: Line 1055  Options:
                nice for mailing list -> Livejournal crossposting.  The option                 nice for mailing list -> Livejournal crossposting.  The option
                can be negated ('--nofromh').  Default is '--nofromh'.                 can be negated ('--nofromh').  Default is '--nofromh'.
   
   -o, --obfuscate
                  Obfuscate all e-mail addresses that are present in the body
                  of the message.  For slight antispam protection, '\@' in these
                  addresses is replaced by '$newdog'.  The option can be negated
                  ('--noobfuscate').  Default is '--noobfuscate'.
   
 --spaces, --keepspaces  --spaces, --keepspaces
                Normally the script does not change original message text,                 Normally the script does not change original message text,
                and all of it is preserved in the body of resulting LJ post.                 and all of it is preserved in the body of resulting LJ post.
Line 1007  command line options), they should look Line 1121  command line options), they should look
         Security: private          Security: private
         Subject: Rzhevskij zhiv!          Subject: Rzhevskij zhiv!
         Tags:  Junk, Viva Rzhevskij!          Tags:  Junk, Viva Rzhevskij!
           Notags: yes                     # Clears all preceding tags
         Formatted: on                   # Or equivalent "Autoformat: off"          Formatted: on                   # Or equivalent "Autoformat: off"
         Usejournal: gusary          Usejournal: gusary
         Mood: okay          Mood: okay

Removed from v.1.3  
changed lines
  Added in v.1.10


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>