Annotation of db/prgsrc/makeauthors.pl, revision 1.10
1.4 boris 1: #!/usr/bin/perl -w
1.1 roma7 2:
3: =head1 NAME
4:
5: makeauthors.pl - скрипт для создания таблиц авторов
6:
7: =head1 SYNOPSIS
8:
9: makeauthors.pl
10:
11: =head1 DESCRIPTION
12:
13: Скрипт создаёт и заполянет таблицы authors и A2Q, используя
14: информацию из файлов authors,nicks,ssnicks
15:
16: =head1 AUTHOR
17:
18: Роман Семизаров
19:
20:
21: =cut
22:
23:
24: use dbchgk;
25:
1.3 boris 26: my $DUMPDIR = $ENV{DUMPDIR} || "../dump";
27:
1.1 roma7 28: do "chgk.cnf";
29: use locale;
30: use POSIX qw (locale_h);
31: open NICKS, "<$nicksfile" or die "Can not open nicks";
32: open SSNICKS, "<$ssnicksfile" or die "Can not open ssnicks";
1.3 boris 33: open UNKNOWN, ">$DUMPDIR/uauthors";
34: open UNICKS, ">$DUMPDIR/unicks";
35: open STDERR, ">$DUMPDIR/errors";
1.1 roma7 36: my ($thislocale);
37: if ($^O =~ /win/i) {
38: $thislocale = "Russian_Russia.20866";
39: } else {
40: $thislocale = "ru_RU.KOI8-R";
41: }
42: POSIX::setlocale( &POSIX::LC_ALL, $thislocale );
43: if ((uc 'а') ne 'А') {die "!Koi8-r locale not installed!\n"};
44:
45:
46:
47: while (<NICKS>)
48: {
1.10 ! roma7 49: ($number,$nick)=split;
! 50: next unless $number;
1.1 roma7 51: next unless $number=~/^\d+$/;
1.10 ! roma7 52: @parts = split ' ',<NICKS>;
! 53: $_ = ucfirst lc $_ foreach @parts;
! 54: $surname = pop @parts;
! 55: my $name;
! 56: $name = $name{$nick}= join ' ', @parts;
1.1 roma7 57: $surname=~s/\-(.)/"-". uc $1/ge;
1.5 roma7 58: $surname=~s/\'(.)/"'". uc $1/ge;
1.10 ! roma7 59: $surname{$nick}= $surname;
! 60: $sn = "$name $surname";
! 61: print "$name!$surname\n";
! 62: $sn =~ tr/Ёё/Ее/;
! 63: $nickfromname{uc $sn} = $nick;
1.1 roma7 64: }
1.8 roma7 65:
1.1 roma7 66: $surname{'error'}='Глюков';
67: $name{'error'}='Очепят';
68: $surname{'unknown'}='Неизвестный';
69: $name{'unknown'}='Псевдоним';
1.9 roma7 70: $surname{'team'}='Авторов';
71: $name{'team'}='Коллектив';
1.1 roma7 72:
73:
74: while (<SSNICKS>)
75: {
76: $str=$_;
77: ($number,$n)=split ' ',$str;
78: if ($number=~/\d+/) {$nick=$n;next}
79: $str=~s/^\s+//;
80: $str=~s/\s+$//;
81: $str=~s/\s+/ /;
82: $ssnick{$nick}.="|$str";
83: }
84:
85: close (NICKS);
86: close (SSNICKS);
87:
88: open AUTHORS,"<$authorsfile" or die "Can not open authors";
1.8 roma7 89: print "REading authors...\n";
1.1 roma7 90: while (<AUTHORS>)
91: {
1.8 roma7 92:
1.1 roma7 93: ($nick,$number,$descr)=m/^([a-zA-Z][a-zA-Z\s]+)(\d+)\s+(.*)$/g;
94: if (!$nick)
95: {
96: ($number,$descr)=m/^(\d+)\s+(.*)$/g;
97: $nick='unknown';
98: }
1.7 roma7 99: # if ($nick=~s/\s*$//)
1.1 roma7 100: $descr=~s/([\.\,\:\!\?])/$1 /g;
101: $descr=~s/\\n/ /g;
102: $descr=~s/^\s+//g;
103: $descr=~s/\s+$//g;
104: $descr=~s/\s+/ /g;
105: $descr=uc $descr;
106: # die "$descr" unless $descr;
107: # die "Duplicated description \"$descr\"" if ($nick{$descr});
108: $nick{$descr}=$nick;
109: foreach (split ' ', $nick)
110: {
111: $unknick{$_}=1 unless $name{$_}
112: }
113: }
114:
1.8 roma7 115: print "printing unknown...\n";
1.1 roma7 116: foreach $as(keys %unknick)
117: {
1.2 roma7 118: print UNICKS "$as \n ", (join "\n ", (grep {$nick{$_}=~/$as/} keys %nick));
119: print UNICKS "\n";
1.1 roma7 120: }
121:
1.8 roma7 122: %forbidden=tableexists('equalto')? getequalto : ();
1.1 roma7 123:
124: #print scalar keys %forbidden, "forbidden questions\n";
125:
126: getbase('QuestionId','Authors');
127: while (($QuestionId, $author)=getrow,$QuestionId)
128: {
1.8 roma7 129: print "." unless $i++ % 100;
1.1 roma7 130: next unless $author;
131: $author=~s/([\.\,\:\!\?])/$1 /gm;
132: $author=~s/^\s+//mg;
133: $author=~s/\\n/ /g;
134: $author=~s/\s+$//mg;
135: $author=~s/\s+/ /mg;
136: $author=uc $author;
1.6 roma7 137: $author=~s/ё/е/mg;
1.1 roma7 138: if ($nick = $nick{$author})
139: {
140: my @a=split ' ',$nick;
1.8 roma7 141: foreach $tmp(@a) {
142: if ($tmp eq '!!!') {
143: print STDERR "!$author!".$QuestionId."\n";
144: }
145: }
1.1 roma7 146: push @{$questions{$_}},$QuestionId foreach @a;
147: }
148: else
149: {
150: $unknown{$author}=1;
151: }
152: }
153:
154:
155:
156: print scalar keys %nick , " authors found\n";
157:
158:
159: #print STDERR "$_ ".$name{$_}."!\n" foreach keys %name;
1.8 roma7 160: addquestions2author($_,$name{$_},$surname{$_},$questions{$_},$ssnick{$_},\%forbidden) foreach keys %questions;
1.1 roma7 161:
162: print UNKNOWN "$_\n" foreach sort keys %unknown;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>