--- mail2lj/mail2lj.pl 2007/08/12 19:54:44 1.1
+++ mail2lj/mail2lj.pl 2007/08/31 17:02:46 1.8
@@ -30,6 +30,9 @@
# original From: address). This is convenient when you want to notify
# script maintainer instead of the poster (exactly what I need).
#
+# Changes by Boris Veytsman - added --cut option
+#
+# Changes by LG: added --obfuscate option to protect e-mails in the body.
#
# NB: to generate MD5 hash of your password, use the following command:
# perl -MDigest::MD5 -e 'print Digest::MD5::md5_hex("yourpassword")."\n"'
@@ -75,6 +78,9 @@ my $host = $ENV{MAIL2LJ_DOMAIN} || "ledo
# my $home = $ENV{HOME} || "/home/mail2lj" ; # Changed by LG
my $home = $ENV{HOME} || "/tmp/mail2lj" ;
+# Changed by LG - added because sometimes procmail doesn't set $USER.
+my $SysUser = $ENV{USER} || $ENV{LOGNAME} || getpwuid($>) || $> ;
+
# Changed by LG. Specifies the default incoming and outgoing charset for
# all e-mails (i.e, the posts CONTENT and the script replies).
# For incoming mails, the MIME header is analyzed and actual MIME charset
@@ -108,6 +114,10 @@ my %tr = (
'À' => 'yu', 'Ñ' => 'ya'
);
+# What to convert '@' to when obfuscating e-mail addresses (in '--add-from'
+# and/or '--obfuscate' modes.
+my $newdog = '[_@_]';
+
# ------------------------------------------------------------------------ #
# End configuration settings.
# ------------------------------------------------------------------------ #
@@ -115,6 +125,7 @@ my %tr = (
# ------------------------------------------------------------------------ #
# Changed by LG - added parsing of command line.
+# Changed by BV - added options cut
# ------------------------------------------------------------------------ #
my %Opt = (); # Main options go here
my $opt_h ; # Help flag
@@ -123,6 +134,10 @@ my $opt_addfrom ; # Add the From fie
my $opt_addfromh ; # Add the htmlized From to the post
my $opt_keepspaces ; # HTML-encode multiple spaces in e-mail
my @opt_taglist ; # command-line taglist first goes here
+my $opt_ljcut ; # Add lj-cut after line number N
+my $ljcut_delta = 5 ; # No lj-cut if less lines left after it
+my $opt_ljcut_text ; # A text for lj-cut.
+my $opt_obfuscate ; # Obfuscate e-mail addresses in body
my $Parse = GetOptions( \%Opt,
'user|u=s',
'password|passwd|p=s',
@@ -133,6 +148,7 @@ my $Parse = GetOptions( \%Opt,
'prop_opt_backdated|backdated|back-dated|backdate|back-date|back!',
'subject|subj|s=s',
'taglist|tags|tag|t=s' => \@opt_taglist, # Will tweak
+ 'notaglist|notags|notag|not|no-taglist|no-tags|no-tag|no-t' => sub {undef @opt_taglist},
'usejournal|use-journal|use|journal|j=s',
'prop_current_mood|current_mood|mood=s',
'prop_current_music|current_music|music=s',
@@ -142,7 +158,10 @@ my $Parse = GetOptions( \%Opt,
'bounces|bounce|b=s' => \$opt_bounces,
'addfrom|add-from|from!' => \$opt_addfrom,
'addfromh|add-fromh|fromh!' => \$opt_addfromh,
+ 'ljcut|lj-cut|cut|l=i'=>\$opt_ljcut,
+ 'ljcut-text|lj-cut-text|cut-text|ljcuttext|cuttext=s'=>\$opt_ljcut_text,
'keep-spaces|keep-space|keepspaces|keepspace|spaces|space!' => \$opt_keepspaces,
+ 'obfuscate|obfu|o!' => \$opt_obfuscate,
'help|h' => \$opt_h,
);
@@ -198,7 +217,13 @@ if ( exists $Opt{'comments'} ) {
# with other parameters.
$Opt{'prop_taglist'} = join( ", ", @opt_taglist ) if ( @opt_taglist ) ;
-# Convert all command line options to unicode.
+# Convert $opt_ljcut_text to UTF8.
+if ( defined $opt_ljcut_text ) {
+ $opt_ljcut_text =
+ to_utf8({ -string => $opt_ljcut_text, -charset => $SystemCharset }) ;
+}
+
+# Convert all %Opt command line options to unicode.
# Function href2utf8() uses a reference to input hash, so %Opt is
# being modified "in-place".
href2utf8( \%Opt, $SystemCharset) ;
@@ -213,14 +238,19 @@ my $alias = shift @ARGV || "none" ;
my $mp = new MIME::Parser() or die "new MIME::Parser(): $!\n" ;
-# Changed by LG - changed directory.
+# Changed by LG - changed directory to be user and process-specific.
# $mp->output_dir("$home/mimetmp") ;
-$mp->output_dir("/tmp/mimetmp-$ENV{user}") ;
+$mp->output_dir("/tmp/mimetmp-" . $SysUser . "-$$") ;
mkdir $mp->output_dir if not -d $mp->output_dir ; # Create it if missing
# Get the whole mail.
+# Changed by LG - added removal of output directory.
my $me = $mp->parse(\*STDIN) ;
-END { $me && $me->purge() } ;
+END { $me and $me->purge() ;
+ rmdir $mp->output_dir if -d $mp->output_dir
+ or print STDERR "Error removing $mp->output_dir: $!\n" ;
+ } ;
+
# Changed by LG - different log file name.
# open(STDERR, ">>$home/generic.log") or die "open(`log'): $!\n" ;
@@ -514,7 +544,19 @@ sub post_body2href {
# Changed by LG - added 'tags' option.
} elsif ($var =~ /^tags?$/ || $var eq "taglist") {
- $req_data->{prop_taglist} = $val;
+ $req_data->{prop_taglist} = $val;
+
+ # Changed by LG - added 'notags' option. Empty the preceding
+ # taglist if set to true, otherwise do nothing
+ } elsif ($var =~ /^no-?tags?$/ || $var eq "no-?taglist") {
+ $req_data->{prop_taglist} = "" if $val =~ /^\s*((on)|(yes))\s*$/i ;
+
+ # Changed by LG - added 'Obfuscate' option to protect e-mail
+ # addresses in the body of the message.
+ } elsif ($var =~ /^obfuscate$/ ) {
+ $val = 1 if $val =~ /^\s*((on)|(yes))\s*$/i ;
+ $val = 0 if $val =~ /^\s*((off)|(no))\s*$/i ;
+ $opt_obfuscate = $val ;
# Anything else - just assign.
} else {
@@ -558,6 +600,17 @@ sub hdr2utf8 {
return $r ;
}
+
+# Changed by LG - added this subroutine for a shortcut call to to_utf8().
+# All it does is conversion of a string to utf8.
+sub str2utf8 {
+ my ($s, $e) = @_;
+ my $r = "" ;
+
+ $r .= to_utf8({ -string => $s, -charset => $e }) ;
+ return $r ;
+}
+
sub post_me2req {
my ($me, $e, $hints) = @_ ;
my $mebh = $me->bodyhandle() or die "post_message(): no body?\n" ;
@@ -565,10 +618,11 @@ sub post_me2req {
my $charset = $mehh->mime_attr("content-type.charset") || $e ;
my $subject = hdr2utf8($me->get('Subject') || "", $charset) ;
chomp $subject ; # Changed by LG
-
- # Changed by LG
- my $from = hdr2utf8($me->get('From') || "", $charset) ;
+ # Changed by LG.
+ my $from = hdr2utf8($me->get('From') || "", $charset) ;
chomp $from ;
+ my $olddog_utf8 = str2utf8("\@", "ISO-8859-1") ; # @ in utf
+ my $newdog_utf8 = str2utf8($newdog, "ISO-8859-1") ; # obfuscated in utf
my $hr = href2utf8(post_body2href($mebh->open("r")), $charset) ;
my $req = new HTTP::Request('POST', $post_uri) or
@@ -588,25 +642,93 @@ sub post_me2req {
# Changed by LG - removed prefixing.
# $hr->{subject} = "[mail2lj] " . $hr->{subject} ;
- # Changed by LG - added options to add the 'From' field to the
- # posted message.
- if ( $opt_addfrom ) {
- $hr->{event} = "From: $from" . "\n\n" . $hr->{event} ;
- } elsif ( $opt_addfromh ) {
- my $html_from = "From: $from" ;
- $html_from =~ s/\@/[_\@_]/g ;
- $hr->{event} = $html_from . "\n\n" . $hr->{event} ;
+
+ # Changed by LG - added option to obfuscate all e-mail addresses in
+ # the body of mail messages.
+ if ( $opt_obfuscate ) {
+ $hr->{event} =~
+ s/\b([-+_.\w]+)($olddog_utf8)([-_.\w]+)\b/$1${newdog_utf8}$3/g ;
}
+
+ # Changed by LG - added options to add the plain or HTML-ized 'From'
+ # field to the posted message.
+ #
+ # NOTE: $from is already in UTF8, but the "From:" and HTML tags are
+ # not. Strictly speaking, everything that goes to $hr->{event}
+ # MUST ALSO BE IN UTF8. A cheating shortcut is possible:
+ # since all lower ASCII characters are guaranteed to have
+ # the same values in UTF8 as in plain ISO-8859-1, you could
+ # possibly stick ASCII strings to $from without risk. But in
+ # order to add something non-ASCII, you absolutely MUST convert
+ # it to UTF8 first! To avoid the risk of forgetting this, the
+ # following substitutions are done in a _proper_ (albeit
+ # somewhat awkward) way.
+ if ( $opt_addfrom || $opt_addfromh ) {
+
+ # Assemble the added From string in UTF8.
+ my $added_from ;
+ if ( $opt_addfrom ) {
+ $added_from = str2utf8("From: ", "ISO-8859-1")
+ . $from . str2utf8("\n\n", "ISO-8859-1") ;
+ } elsif ( $opt_addfromh ) {
+ $added_from = str2utf8("From: ", "ISO-8859-1" )
+ . $from
+ . str2utf8("\n\n", "ISO-8859-1") ;
+ }
+
+ # This address is alway obfuscated (independently of the
+ # '--obfuscate' option which only governs addresses _already_
+ # in the body.
+ $added_from =~ s/$olddog_utf8/$newdog_utf8/g ; # Obfuscate
+ $hr->{event} = $added_from . $hr->{event} ; # And append
+ }
+
# Changed by LG - added an option to preserve (html-ize) multiple
# spaces and tabs (convert '\t' to eight ' ' and convert
# multiple continuous spaces into sequence of ' ').
# Lines with tabs are additionally wrapped in ... tags.
+ #
+ # NOTE: These tags should be in UTF8. But since HTML tags themselves
+ # are *certainly* in lower ASCII, we can safely stick them on
+ # top of the existing UTF8 post. But if you dare to add
+ # anything more than ASCII-markup, you'd better str2utf8() it
+ # first! See note in the $opt_addfrom/$opt_addfromh processing above.
if ( $opt_keepspaces ) {
$hr->{event} =~ s/^(.*\t.*)$/$1<\/nobr>/gm ;
$hr->{event} =~ s/\t/\ \ \ \ \ \ \ \ /g ;
$hr->{event} =~ s/ / \ /g ;
}
+
+ #
+ # Change by BV - added the option to put lj-cut after '--cut XX' lines
+ #
+ # Tweaked by LG - only adding lj-cut if more than $ljcut_delta lines
+ # is left in the posting. Also added $opt_ljcut_text.
+ #
+ if ($opt_ljcut>0) {
+ my $nlines = scalar( my @junk=split( /\n/, $hr->{event}, -1) ) - 1;
+ my $start=0;
+ for (my $i=0; $i<$opt_ljcut; $i++) {
+ $start=index($hr->{event},"\n",$start)+1;
+ if ($start == 0) {
+ last;
+ }
+ }
+ # And insert the lj-cut if not too close to the end of the post.
+ if ($start>0 ) {
+ if ( $nlines >= $opt_ljcut+$ljcut_delta ) {
+ my $ljcut = ( $opt_ljcut_text =~ /^\s*$/ ) ?
+ '' :
+ '' ;
+ substr($hr->{event}, $start,0) = $ljcut ;
+ } else {
+ print STDERR "'--cut $opt_ljcut' requested, which is " .
+ "within $ljcut_delta of the total $nlines " .
+ "lines. Skipping lj-cut.\n" ;
+ }
+ }
+ }
$req->content_type('application/x-www-form-urlencoded');
$req->content(href2string $hr) ;
@@ -860,6 +982,12 @@ Options:
single or double quotes to protect from the shell. Multiple
'-t' options are allowed and taglists will be combined.
+--notaglist, --notags
+ Unsets all previously defined tags. Thus, a call to
+ $shortname ... --tags X --tags Y ... --notags --tags Z
+ will yield a taglist consisting of just "Z". This option is
+ rarely needed and added only for the sake of completeness.
+
-d DATE, --date DATE
Label posting with this date. Date should be in LiveJournal's
format: DD.MM.YYYY HH:mm. If absent, current date/time is used.
@@ -902,6 +1030,9 @@ Options:
any HTML-formatting - see '--fromh' for that). For slight
antispam protection, '\@' is replaced by '[_\@_]'. The option
can be negated ('--nofrom'). Default is '--nofrom'.
+ Note: this option is independent from '--obfuscate' (i.e. the
+ prepended From is always obfuscated, even if the rest of the
+ message is not).
--fromh, --addfromh
Same as '--from', but uses HTML-markup to highlight inserted
@@ -909,6 +1040,12 @@ Options:
nice for mailing list -> Livejournal crossposting. The option
can be negated ('--nofromh'). Default is '--nofromh'.
+-o, --obfuscate
+ Obfuscate all e-mail addresses that are present in the body
+ of the message. For slight antispam protection, '\@' in these
+ addresses is replaced by '[_\@_]'. The option can be negated
+ ('--noobfuscate'). Default is '--noobfuscate'.
+
--spaces, --keepspaces
Normally the script does not change original message text,
and all of it is preserved in the body of resulting LJ post.
@@ -923,6 +1060,16 @@ Options:
better preserved in the journal. The option can be negated
('--nospaces'). Default is '--nospaces'.
+--ljcut NUM, --cut NUM, -l NUM
+ Inserts '' after NUM lines of the post content.
+ If the resulting lj-cut happens to be within $ljcut_delta lines from
+ the end of the post, the cut will not be added.
+
+--ljcut-text TEXT, --cut-text TEXT, --cuttext TEXT
+ Text to use as lj-cut text parameter (in ).
+ If the text contains nothing but whitespace, it is ignored.
+ Remember to quote spaces and special characters from the shell.
+
--charset CHARSET
This option tells the script that all COMMAND LINE options are
given in this charset. Default is "$SystemCharset".
@@ -931,7 +1078,7 @@ Options:
utf8). It also has absolutely no effect on the in-the-body
keywords (they are also governed by email's charset). This
option is meaningful ONLY for the text that you supply VIA
- COMMAND LINE (e.g. '-s Subject').
+ COMMAND LINE (e.g. '-s Subject' or '--cuttext TEXT').
-b xxx\@yyy, --bounces xxx\@yyy
Normally, if errors occur during posting (e.g. wrong password),
@@ -959,6 +1106,7 @@ command line options), they should look
Security: private
Subject: Rzhevskij zhiv!
Tags: Junk, Viva Rzhevskij!
+ Notags: yes # Clears all preceding tags
Formatted: on # Or equivalent "Autoformat: off"
Usejournal: gusary
Mood: okay