--- mail2lj/mail2lj.pl 2007/08/14 06:13:58 1.6
+++ mail2lj/mail2lj.pl 2007/08/31 17:06:24 1.9
@@ -30,6 +30,9 @@
# original From: address). This is convenient when you want to notify
# script maintainer instead of the poster (exactly what I need).
#
+# Changes by Boris Veytsman - added --cut option
+#
+# Changes by LG: added --obfuscate option to protect e-mails in the body.
#
# NB: to generate MD5 hash of your password, use the following command:
# perl -MDigest::MD5 -e 'print Digest::MD5::md5_hex("yourpassword")."\n"'
@@ -42,8 +45,6 @@
# Original script seems to be distributed as freeware, so I stick to that
# decision. No warranty whatsoever, of course - use at your own risk ;-).
#
-# Changes by Boris Veytsman - added --cut option
-#
# ------------------------------------------------------------------------
use strict ;
@@ -77,6 +78,9 @@ my $host = $ENV{MAIL2LJ_DOMAIN} || "ledo
# my $home = $ENV{HOME} || "/home/mail2lj" ; # Changed by LG
my $home = $ENV{HOME} || "/tmp/mail2lj" ;
+# Changed by LG - added because sometimes procmail doesn't set $USER.
+my $SysUser = $ENV{USER} || $ENV{LOGNAME} || getpwuid($>) || $> ;
+
# Changed by LG. Specifies the default incoming and outgoing charset for
# all e-mails (i.e, the posts CONTENT and the script replies).
# For incoming mails, the MIME header is analyzed and actual MIME charset
@@ -110,6 +114,10 @@ my %tr = (
'À' => 'yu', 'Ñ' => 'ya'
);
+# What to convert '@' to when obfuscating e-mail addresses (in '--add-from'
+# and/or '--obfuscate' modes.
+my $newdog = '[_@_]';
+
# ------------------------------------------------------------------------ #
# End configuration settings.
# ------------------------------------------------------------------------ #
@@ -129,6 +137,7 @@ my @opt_taglist ; # command-line tag
my $opt_ljcut ; # Add lj-cut after line number N
my $ljcut_delta = 5 ; # No lj-cut if less lines left after it
my $opt_ljcut_text ; # A text for lj-cut.
+my $opt_obfuscate ; # Obfuscate e-mail addresses in body
my $Parse = GetOptions( \%Opt,
'user|u=s',
'password|passwd|p=s',
@@ -152,6 +161,7 @@ my $Parse = GetOptions( \%Opt,
'ljcut|lj-cut|cut|l=i'=>\$opt_ljcut,
'ljcut-text|lj-cut-text|cut-text|ljcuttext|cuttext=s'=>\$opt_ljcut_text,
'keep-spaces|keep-space|keepspaces|keepspace|spaces|space!' => \$opt_keepspaces,
+ 'obfuscate|obfu|o!' => \$opt_obfuscate,
'help|h' => \$opt_h,
);
@@ -228,14 +238,19 @@ my $alias = shift @ARGV || "none" ;
my $mp = new MIME::Parser() or die "new MIME::Parser(): $!\n" ;
-# Changed by LG - changed directory.
+# Changed by LG - changed directory to be user and process-specific.
# $mp->output_dir("$home/mimetmp") ;
-$mp->output_dir("/tmp/mimetmp-".$ENV{USER}) ;
+$mp->output_dir("/tmp/mimetmp-" . $SysUser . "-$$") ;
mkdir $mp->output_dir if not -d $mp->output_dir ; # Create it if missing
# Get the whole mail.
+# Changed by LG - added removal of output directory.
my $me = $mp->parse(\*STDIN) ;
-END { $me && $me->purge() } ;
+END { $me and $me->purge() ;
+ rmdir $mp->output_dir if -d $mp->output_dir
+ or print STDERR "Error removing $mp->output_dir: $!\n" ;
+ } ;
+
# Changed by LG - different log file name.
# open(STDERR, ">>$home/generic.log") or die "open(`log'): $!\n" ;
@@ -536,6 +551,13 @@ sub post_body2href {
} elsif ($var =~ /^no-?tags?$/ || $var eq "no-?taglist") {
$req_data->{prop_taglist} = "" if $val =~ /^\s*((on)|(yes))\s*$/i ;
+ # Changed by LG - added 'Obfuscate' option to protect e-mail
+ # addresses in the body of the message.
+ } elsif ($var =~ /^obfuscate$/ ) {
+ $val = 1 if $val =~ /^\s*((on)|(yes))\s*$/i ;
+ $val = 0 if $val =~ /^\s*((off)|(no))\s*$/i ;
+ $opt_obfuscate = $val ;
+
# Anything else - just assign.
} else {
$req_data->{$var} = $val ;
@@ -578,6 +600,17 @@ sub hdr2utf8 {
return $r ;
}
+
+# Changed by LG - added this subroutine for a shortcut call to to_utf8().
+# All it does is conversion of a string to utf8.
+sub str2utf8 {
+ my ($s, $e) = @_;
+ my $r = "" ;
+
+ $r .= to_utf8({ -string => $s, -charset => $e }) ;
+ return $r ;
+}
+
sub post_me2req {
my ($me, $e, $hints) = @_ ;
my $mebh = $me->bodyhandle() or die "post_message(): no body?\n" ;
@@ -585,10 +618,11 @@ sub post_me2req {
my $charset = $mehh->mime_attr("content-type.charset") || $e ;
my $subject = hdr2utf8($me->get('Subject') || "", $charset) ;
chomp $subject ; # Changed by LG
-
- # Changed by LG
- my $from = hdr2utf8($me->get('From') || "", $charset) ;
+ # Changed by LG.
+ my $from = hdr2utf8($me->get('From') || "", $charset) ;
chomp $from ;
+ my $olddog_utf8 = str2utf8("\@", "ISO-8859-1") ; # @ in utf
+ my $newdog_utf8 = str2utf8($newdog, "ISO-8859-1") ; # obfuscated in utf
my $hr = href2utf8(post_body2href($mebh->open("r")), $charset) ;
my $req = new HTTP::Request('POST', $post_uri) or
@@ -608,29 +642,58 @@ sub post_me2req {
# Changed by LG - removed prefixing.
# $hr->{subject} = "[mail2lj] " . $hr->{subject} ;
- # Changed by LG - added options to add the 'From' field to the
- # posted message.
- #
- # NOTE: $from is already in UTF8. Strictly speaking, everything
- # that we add to it MUST ALSO BE IN UTF8 (i.e. you need to run
- # a to_utf8() function on it). But since all I'm adding is in
- # ISO-8859-1 lower ASCII characters (which are guaranteed to
- # have the same values in UTF8 as in plain ISO-8859-1), I'm
- # cheating here and taking a shortcut. If you want to add
- # something non-ASCII, you MUST convert it to UTF8 first!
- # Be forewarned!
- if ( $opt_addfrom ) {
- $hr->{event} = "From: $from" . "\n\n" . $hr->{event} ;
- } elsif ( $opt_addfromh ) {
- my $html_from = "From: $from" ;
- $html_from =~ s/\@/[_\@_]/g ;
- $hr->{event} = $html_from . "\n\n" . $hr->{event} ;
+
+ # Changed by LG - added option to obfuscate all e-mail addresses in
+ # the body of mail messages.
+ if ( $opt_obfuscate ) {
+ $hr->{event} =~
+ s/\b([-+_.\w]+)($olddog_utf8)([-_.\w]+)\b/$1${newdog_utf8}$3/g ;
}
+
+ # Changed by LG - added options to add the plain or HTML-ized 'From'
+ # field to the posted message.
+ #
+ # NOTE: $from is already in UTF8, but the "From:" and HTML tags are
+ # not. Strictly speaking, everything that goes to $hr->{event}
+ # MUST ALSO BE IN UTF8. A cheating shortcut is possible:
+ # since all lower ASCII characters are guaranteed to have
+ # the same values in UTF8 as in plain ISO-8859-1, you could
+ # possibly stick ASCII strings to $from without risk. But in
+ # order to add something non-ASCII, you absolutely MUST convert
+ # it to UTF8 first! To avoid the risk of forgetting this, the
+ # following substitutions are done in a _proper_ (albeit
+ # somewhat awkward) way.
+ if ( $opt_addfrom || $opt_addfromh ) {
+
+ # Assemble the added From string in UTF8.
+ my $added_from ;
+ if ( $opt_addfrom ) {
+ $added_from = str2utf8("From: ", "ISO-8859-1")
+ . $from . str2utf8("\n\n", "ISO-8859-1") ;
+ } elsif ( $opt_addfromh ) {
+ $added_from = str2utf8("From: ", "ISO-8859-1" )
+ . $from
+ . str2utf8("\n\n", "ISO-8859-1") ;
+ }
+
+ # This address is alway obfuscated (independently of the
+ # '--obfuscate' option which only governs addresses _already_
+ # in the body.
+ $added_from =~ s/$olddog_utf8/$newdog_utf8/g ; # Obfuscate
+ $hr->{event} = $added_from . $hr->{event} ; # And append
+ }
+
# Changed by LG - added an option to preserve (html-ize) multiple
# spaces and tabs (convert '\t' to eight ' ' and convert
# multiple continuous spaces into sequence of ' ').
# Lines with tabs are additionally wrapped in ... tags.
+ #
+ # NOTE: These tags should be in UTF8. But since HTML tags themselves
+ # are *certainly* in lower ASCII, we can safely stick them on
+ # top of the existing UTF8 post. But if you dare to add
+ # anything more than ASCII-markup, you'd better str2utf8() it
+ # first! See note in the $opt_addfrom/$opt_addfromh processing above.
if ( $opt_keepspaces ) {
$hr->{event} =~ s/^(.*\t.*)$/$1<\/nobr>/gm ;
$hr->{event} =~ s/\t/\ \ \ \ \ \ \ \ /g ;
@@ -641,7 +704,7 @@ sub post_me2req {
# Change by BV - added the option to put lj-cut after '--cut XX' lines
#
# Tweaked by LG - only adding lj-cut if more than $ljcut_delta lines
- # is left in the posting.
+ # is left in the posting. Also added $opt_ljcut_text.
#
if ($opt_ljcut>0) {
my $nlines = scalar( my @junk=split( /\n/, $hr->{event}, -1) ) - 1;
@@ -965,8 +1028,11 @@ Options:
Insert the From: field from the e-mail as the first line of
the posted message. The field is added in plain text (without
any HTML-formatting - see '--fromh' for that). For slight
- antispam protection, '\@' is replaced by '[_\@_]'. The option
+ antispam protection, '\@' is replaced by '$newdog'. The option
can be negated ('--nofrom'). Default is '--nofrom'.
+ Note: this option is independent from '--obfuscate' (i.e. the
+ prepended From is always obfuscated, even if the rest of the
+ message is not).
--fromh, --addfromh
Same as '--from', but uses HTML-markup to highlight inserted
@@ -974,6 +1040,12 @@ Options:
nice for mailing list -> Livejournal crossposting. The option
can be negated ('--nofromh'). Default is '--nofromh'.
+-o, --obfuscate
+ Obfuscate all e-mail addresses that are present in the body
+ of the message. For slight antispam protection, '\@' in these
+ addresses is replaced by '$newdog'. The option can be negated
+ ('--noobfuscate'). Default is '--noobfuscate'.
+
--spaces, --keepspaces
Normally the script does not change original message text,
and all of it is preserved in the body of resulting LJ post.