#!Perl
# If the clipboard contains 8th bit characters, turn 'em into
#  appropriate Latin1/HTML4/Unicode entities.
# Otherwise, resolve whatever entities I know about.
# Note that this means only the entities in the hash below --
#  it does not include just any entity-formulation like "&68;" or
#  "&quot;" or "&x235;"
#
# Doesn't touch < or > or & -- I only care about 8th bit chars.
#
# Sean M. Burke, sburke@netadventure.net, 1998-03-26

package Dartmouth;
&MacPerl::LoadExternals("clipboard.XFCN");
# XCMD for clipboard processing.  Available at
#http://www.unimelb.edu.au/~ssilcot/macperl-primer/scripts/lib/clipboard.XFCN.sea.hqx
# if you need it
package main;

#---------------------------------------------------------------------------
# The following is not EXACTLY what
#  <ftp://dkuug.dk/i18n/WG15-collection/charmaps/MACINTOSH> says.
@ent_list = (
128, '&Auml;',         129, '&Aring;',
130, '&Ccedil;',       131, '&Eacute;',
132, '&Ntilde;',       133, '&Ouml;',
134, '&Uuml;',         135, '&aacute;',
136, '&agrave;',       137, '&acirc;',
138, '&auml;',         139, '&atilde;',
140, '&aring;',        141, '&ccedil;',
142, '&eacute;',       143, '&egrave;',
144, '&ecirc;',        145, '&euml;',
146, '&iacute;',       147, '&igrave;',
148, '&icirc;',        149, '&iuml;',
150, '&ntilde;',       151, '&oacute;',
152, '&ograve;',       153, '&ocirc;',
154, '&ouml;',         155, '&otilde;',
156, '&uacute;',       157, '&ugrave;',
158, '&ucirc;',        159, '&uuml;',
160, '&dagger;',       161, '&deg;',
162, '&cent;',         163, '&pound;',
164, '&sect;',         165, '&bullet;',
166, '&para;',         167, '&szlig;',
168, '&reg;',          169, '&copy;',
170, '&trade;',        171, '&acute;',
172, '&uml;',          173, '&ne;',
174, '&AElig;',        175, '&Oslash;',

176, '&infin;',        177, '&plusmn;',
178, '&le;',           179, '&ge;',
180, '&yen;',          181, '&micro;',
182, '&part;',   # partial differential.  Or is it just a malformed &eth;?
183, '&sum;',    # big capital sigma symbol
184, '&prod;',   # big capital pi symbol
185, '&piv;',    # pi-symbol (as in 3.14159)
186, '&int;',    # integral
187, '&ordf;',   # high-a
188, '&ordm;',   # high-o
189, '&Omega;',  # capital omega

190, '&aelig;',        191, '&oslash;',
192, '&iquest;',       193, '&iexcl;',

194, '&not;',    # logical not
195, '&radic;',  # sqr root
196, '&fnof;',   # florin (swash f)
197, '&cong;',   # similar (double-~)
198, '&Delta;',  # capital delta
199, '&laquo;', 
200, '&raquo;',
201, '&hellip;', # ellipsis
202, '&nbsp;',   # nonbreaking space?

180, '&yen;',
181, '&micro;',  # AKA mu
203, '&Agrave;',       204, '&Atilde;',
205, '&Otilde;',       206, '&OElig;',
207, '&oelig;',        208, '&ndash;',
209, '&mdash;',        210, '&ldquo;',
211, '&rdquo;',        212, '&lsquo;',
213, '&rsquo;',        214, '&divide;',
215, '&loz;',    # losenge
216, '&yuml;',         217, '&Yuml;',
218, '&frasl;',  # fraction-slash
219, '&curren;', # International Communist Conspiracy $-sign
220, '&lsaquo;',       221, '&rsaquo;',

# If you have 222 go to 'fi' and 223 to 'fl', this program's
# effect is not transitive -- i.e., if you apply it twice, you
# don't get back exactly what you put in.  If for some reason
# you need that, have 222 go to '&xFB01;' and 223 to '&xFB02;';
# But unless you NEED that, leave it alone!
222, 'fi',
223, 'fl',
# 222, '&xFB01;', # LATIN SMALL LIGATURE FI
# 223, '&xFB02;', # LATIN SMALL LIGATURE FL

224, '&Dagger;',
225, '&middot;',
226, '&sbquo;',
227, '&dbquo;',
228, '&permil;', # per-thousand
229, '&Acirc;',        230, '&Ecirc;',
231, '&Aacute;',       232, '&Euml;',
233, '&Egrave;',       234, '&Iacute;',
235, '&Icirc;',        236, '&Iuml;',
237, '&Igrave;',       238, '&Oacute;',
239, '&Ocirc;',

# Weird and wacky from here on.

240, '&clubs;', # really the solid Apple 
241, '&Ograve;',       242, '&Uacute;',
243, '&Ucirc;',        244, '&Ugrave;',
245, '&x0131;', # Latin small letter dotless i
246, '&circ;',
247, '&tilde;',
248, '&macr;',
249, '&x02D8;', # breve
250, '&x02D9;', # dot above
251, '&x02DA;', # ring above
252, '&cedil;',
253, '&x02DD;', # double acute
254, '&x02DB;', # ogonek
255, '&x02C7;', # hacek =? MODIFIER LETTER HACEK ; Mandarin Chinese third tone
);

%code2ent = @ent_list;
%ent2code = reverse(@ent_list);
delete @ent2code{grep( substr($_,0,1) ne '&', keys(%ent2code)
                      )};  # delete things that aren't entities

$magic_re = join('|', map(quotemeta($_), keys(%ent2code)));

my $clipboard = &Dartmouth::Clipboard();
if ($clipboard =~ /[\x80-\xFF]/) {
  $clipboard =~ s/([\x80-\xFF])/$code2ent{ord($1)}/eg;
} else {
  $clipboard =~ s/($magic_re)/chr($ent2code{$1})/oeg;
}
&Dartmouth::Clipboard($clipboard);
exit;

__END__

