--- mail2lj/mail2lj.pl 2007/08/14 06:13:58 1.6 +++ mail2lj/mail2lj.pl 2007/08/31 17:06:24 1.9 @@ -30,6 +30,9 @@ # original From: address). This is convenient when you want to notify # script maintainer instead of the poster (exactly what I need). # +# Changes by Boris Veytsman - added --cut option +# +# Changes by LG: added --obfuscate option to protect e-mails in the body. # # NB: to generate MD5 hash of your password, use the following command: # perl -MDigest::MD5 -e 'print Digest::MD5::md5_hex("yourpassword")."\n"' @@ -42,8 +45,6 @@ # Original script seems to be distributed as freeware, so I stick to that # decision. No warranty whatsoever, of course - use at your own risk ;-). # -# Changes by Boris Veytsman - added --cut option -# # ------------------------------------------------------------------------ use strict ; @@ -77,6 +78,9 @@ my $host = $ENV{MAIL2LJ_DOMAIN} || "ledo # my $home = $ENV{HOME} || "/home/mail2lj" ; # Changed by LG my $home = $ENV{HOME} || "/tmp/mail2lj" ; +# Changed by LG - added because sometimes procmail doesn't set $USER. +my $SysUser = $ENV{USER} || $ENV{LOGNAME} || getpwuid($>) || $> ; + # Changed by LG. Specifies the default incoming and outgoing charset for # all e-mails (i.e, the posts CONTENT and the script replies). # For incoming mails, the MIME header is analyzed and actual MIME charset @@ -110,6 +114,10 @@ my %tr = ( 'À' => 'yu', 'Ñ' => 'ya' ); +# What to convert '@' to when obfuscating e-mail addresses (in '--add-from' +# and/or '--obfuscate' modes. +my $newdog = '[_@_]'; + # ------------------------------------------------------------------------ # # End configuration settings. # ------------------------------------------------------------------------ # @@ -129,6 +137,7 @@ my @opt_taglist ; # command-line tag my $opt_ljcut ; # Add lj-cut after line number N my $ljcut_delta = 5 ; # No lj-cut if less lines left after it my $opt_ljcut_text ; # A text for lj-cut. +my $opt_obfuscate ; # Obfuscate e-mail addresses in body my $Parse = GetOptions( \%Opt, 'user|u=s', 'password|passwd|p=s', @@ -152,6 +161,7 @@ my $Parse = GetOptions( \%Opt, 'ljcut|lj-cut|cut|l=i'=>\$opt_ljcut, 'ljcut-text|lj-cut-text|cut-text|ljcuttext|cuttext=s'=>\$opt_ljcut_text, 'keep-spaces|keep-space|keepspaces|keepspace|spaces|space!' => \$opt_keepspaces, + 'obfuscate|obfu|o!' => \$opt_obfuscate, 'help|h' => \$opt_h, ); @@ -228,14 +238,19 @@ my $alias = shift @ARGV || "none" ; my $mp = new MIME::Parser() or die "new MIME::Parser(): $!\n" ; -# Changed by LG - changed directory. +# Changed by LG - changed directory to be user and process-specific. # $mp->output_dir("$home/mimetmp") ; -$mp->output_dir("/tmp/mimetmp-".$ENV{USER}) ; +$mp->output_dir("/tmp/mimetmp-" . $SysUser . "-$$") ; mkdir $mp->output_dir if not -d $mp->output_dir ; # Create it if missing # Get the whole mail. +# Changed by LG - added removal of output directory. my $me = $mp->parse(\*STDIN) ; -END { $me && $me->purge() } ; +END { $me and $me->purge() ; + rmdir $mp->output_dir if -d $mp->output_dir + or print STDERR "Error removing $mp->output_dir: $!\n" ; + } ; + # Changed by LG - different log file name. # open(STDERR, ">>$home/generic.log") or die "open(`log'): $!\n" ; @@ -536,6 +551,13 @@ sub post_body2href { } elsif ($var =~ /^no-?tags?$/ || $var eq "no-?taglist") { $req_data->{prop_taglist} = "" if $val =~ /^\s*((on)|(yes))\s*$/i ; + # Changed by LG - added 'Obfuscate' option to protect e-mail + # addresses in the body of the message. + } elsif ($var =~ /^obfuscate$/ ) { + $val = 1 if $val =~ /^\s*((on)|(yes))\s*$/i ; + $val = 0 if $val =~ /^\s*((off)|(no))\s*$/i ; + $opt_obfuscate = $val ; + # Anything else - just assign. } else { $req_data->{$var} = $val ; @@ -578,6 +600,17 @@ sub hdr2utf8 { return $r ; } + +# Changed by LG - added this subroutine for a shortcut call to to_utf8(). +# All it does is conversion of a string to utf8. +sub str2utf8 { + my ($s, $e) = @_; + my $r = "" ; + + $r .= to_utf8({ -string => $s, -charset => $e }) ; + return $r ; +} + sub post_me2req { my ($me, $e, $hints) = @_ ; my $mebh = $me->bodyhandle() or die "post_message(): no body?\n" ; @@ -585,10 +618,11 @@ sub post_me2req { my $charset = $mehh->mime_attr("content-type.charset") || $e ; my $subject = hdr2utf8($me->get('Subject') || "", $charset) ; chomp $subject ; # Changed by LG - - # Changed by LG - my $from = hdr2utf8($me->get('From') || "", $charset) ; + # Changed by LG. + my $from = hdr2utf8($me->get('From') || "", $charset) ; chomp $from ; + my $olddog_utf8 = str2utf8("\@", "ISO-8859-1") ; # @ in utf + my $newdog_utf8 = str2utf8($newdog, "ISO-8859-1") ; # obfuscated in utf my $hr = href2utf8(post_body2href($mebh->open("r")), $charset) ; my $req = new HTTP::Request('POST', $post_uri) or @@ -608,29 +642,58 @@ sub post_me2req { # Changed by LG - removed prefixing. # $hr->{subject} = "[mail2lj] " . $hr->{subject} ; - # Changed by LG - added options to add the 'From' field to the - # posted message. - # - # NOTE: $from is already in UTF8. Strictly speaking, everything - # that we add to it MUST ALSO BE IN UTF8 (i.e. you need to run - # a to_utf8() function on it). But since all I'm adding is in - # ISO-8859-1 lower ASCII characters (which are guaranteed to - # have the same values in UTF8 as in plain ISO-8859-1), I'm - # cheating here and taking a shortcut. If you want to add - # something non-ASCII, you MUST convert it to UTF8 first! - # Be forewarned! - if ( $opt_addfrom ) { - $hr->{event} = "From: $from" . "\n\n" . $hr->{event} ; - } elsif ( $opt_addfromh ) { - my $html_from = "From: $from" ; - $html_from =~ s/\@/[_\@_]/g ; - $hr->{event} = $html_from . "\n\n" . $hr->{event} ; + + # Changed by LG - added option to obfuscate all e-mail addresses in + # the body of mail messages. + if ( $opt_obfuscate ) { + $hr->{event} =~ + s/\b([-+_.\w]+)($olddog_utf8)([-_.\w]+)\b/$1${newdog_utf8}$3/g ; } + + # Changed by LG - added options to add the plain or HTML-ized 'From' + # field to the posted message. + # + # NOTE: $from is already in UTF8, but the "From:" and HTML tags are + # not. Strictly speaking, everything that goes to $hr->{event} + # MUST ALSO BE IN UTF8. A cheating shortcut is possible: + # since all lower ASCII characters are guaranteed to have + # the same values in UTF8 as in plain ISO-8859-1, you could + # possibly stick ASCII strings to $from without risk. But in + # order to add something non-ASCII, you absolutely MUST convert + # it to UTF8 first! To avoid the risk of forgetting this, the + # following substitutions are done in a _proper_ (albeit + # somewhat awkward) way. + if ( $opt_addfrom || $opt_addfromh ) { + + # Assemble the added From string in UTF8. + my $added_from ; + if ( $opt_addfrom ) { + $added_from = str2utf8("From: ", "ISO-8859-1") + . $from . str2utf8("\n\n", "ISO-8859-1") ; + } elsif ( $opt_addfromh ) { + $added_from = str2utf8("From: ", "ISO-8859-1" ) + . $from + . str2utf8("\n\n", "ISO-8859-1") ; + } + + # This address is alway obfuscated (independently of the + # '--obfuscate' option which only governs addresses _already_ + # in the body. + $added_from =~ s/$olddog_utf8/$newdog_utf8/g ; # Obfuscate + $hr->{event} = $added_from . $hr->{event} ; # And append + } + # Changed by LG - added an option to preserve (html-ize) multiple # spaces and tabs (convert '\t' to eight ' ' and convert # multiple continuous spaces into sequence of '  '). # Lines with tabs are additionally wrapped in ... tags. + # + # NOTE: These tags should be in UTF8. But since HTML tags themselves + # are *certainly* in lower ASCII, we can safely stick them on + # top of the existing UTF8 post. But if you dare to add + # anything more than ASCII-markup, you'd better str2utf8() it + # first! See note in the $opt_addfrom/$opt_addfromh processing above. if ( $opt_keepspaces ) { $hr->{event} =~ s/^(.*\t.*)$/$1<\/nobr>/gm ; $hr->{event} =~ s/\t/\ \ \ \ \ \ \ \ /g ; @@ -641,7 +704,7 @@ sub post_me2req { # Change by BV - added the option to put lj-cut after '--cut XX' lines # # Tweaked by LG - only adding lj-cut if more than $ljcut_delta lines - # is left in the posting. + # is left in the posting. Also added $opt_ljcut_text. # if ($opt_ljcut>0) { my $nlines = scalar( my @junk=split( /\n/, $hr->{event}, -1) ) - 1; @@ -965,8 +1028,11 @@ Options: Insert the From: field from the e-mail as the first line of the posted message. The field is added in plain text (without any HTML-formatting - see '--fromh' for that). For slight - antispam protection, '\@' is replaced by '[_\@_]'. The option + antispam protection, '\@' is replaced by '$newdog'. The option can be negated ('--nofrom'). Default is '--nofrom'. + Note: this option is independent from '--obfuscate' (i.e. the + prepended From is always obfuscated, even if the rest of the + message is not). --fromh, --addfromh Same as '--from', but uses HTML-markup to highlight inserted @@ -974,6 +1040,12 @@ Options: nice for mailing list -> Livejournal crossposting. The option can be negated ('--nofromh'). Default is '--nofromh'. +-o, --obfuscate + Obfuscate all e-mail addresses that are present in the body + of the message. For slight antispam protection, '\@' in these + addresses is replaced by '$newdog'. The option can be negated + ('--noobfuscate'). Default is '--noobfuscate'. + --spaces, --keepspaces Normally the script does not change original message text, and all of it is preserved in the body of resulting LJ post.