#!/usr/bin/perl
#: sound like Chomsky's syntactic analyses
#----------------------------------------------------------------------------
########### sburke@cpan.org / Time-stamp: "2011-02-08 04:07:19 AST"
#(previous time stamp "2002-04-21 11:03:33 MDT")
#
# Chomsky phrases from Anthony Aristar (via John Lawyer and Kevin
#  McGowan), who got them from he can't remember who.
# Algorithm adapted from
#  http://www-personal.umich.edu/~jlawler/fogcode.html
#  http://stick.us.itd.umich.edu/cgi-bin/chomsky.pl

use strict;
my($doc_out, $p_in, $p_out) = ('','','','');

my(@intros, @subjects, @verbs, @objects);
maybe_init_cgi();

learn_chomsky();
be_chomsky();
exit;

# -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -
sub be_chomsky {
  while(@intros and @subjects and @verbs and @objects) {
    # Emit paragraph after paragraph, until we run out of phrases.

    my $sentences_to_make = 3 + int rand 5;
    my @sentences;
   Sentence:
    while($sentences_to_make-- > 0) {
      push @sentences, join ' ', 
        shift @intros   || last Sentence,
        shift @subjects || last Sentence,
        shift @verbs    || last Sentence,
        shift @objects  || last Sentence,
      ;
    }
    print_as_paragraph(@sentences);
  }

  print $doc_out;
  return;
}


sub learn_chomsky {
 # Initializing the phrase lists:
  
  learn_phrases_randomly(
  \@intros,
  q{
   To characterize a linguistic level L, / On the other hand, / This
   suggests that / It appears that / Furthermore, / We will bring
   evidence in favor of the following thesis: / To provide a
   constituent structure for T(Z,K), / From C1, it follows that /
   For any transformation which is sufficiently diversified in
   application to be of any interest, / Analogously, / Clearly, /
   Note that / Of course, / Suppose, for instance, that / Thus /
   With this clarification, / Conversely, / We have already seen
   that / By combining adjunctions and certain deformations, / I
   suggested that these results would follow from the assumption
   that / If the position of the trace in (99c) were only relatively
   inaccessible to movement, / However, this assumption is not
   correct, since / Comparing these examples with their parasitic
   gap counterparts in (96) and (97), we see that / In the
   discussion of resumptive pronouns following (81), / So far, /
   Nevertheless, / For one thing, / Summarizing, then, we assume
   that / A consequence of the approach just outlined is that /
   Presumably, / On our assumptions, / It may be, then, that / It
   must be emphasized, once again, that / Let us continue to suppose
   that / Notice, incidentally, that
  });
  
  learn_phrases_randomly(
  \@subjects,
  q{
   the notion of level of grammaticalness / a case of
   semigrammaticalness of a different sort / most of the
   methodological work in modern linguistics / a subset of English
   sentences interesting on quite independent grounds / the natural
   general principle that will subsume this case / an important
   property of these three types of EC / any associated supporting
   element / the appearance of parasitic gaps in domains relatively
   inaccessible to ordinary extraction / the speaker-hearer's
   linguistic intuition / the descriptive power of the base
   component / the earlier discussion of deviance / this analysis of
   a formative as a pair of sets of features / this selectionally
   introduced contextual feature / a descriptively adequate grammar
   / the fundamental error of regarding functional notions as
   categorial / relational information / the systematic use of
   complex symbols / the theory of syntactic features developed
   earlier
  });
  
  learn_phrases_randomly(
  \@verbs,
  q{
   can be defined in such a way as to impose / delimits / suffices
   to account for / cannot be arbitrary in / is not subject to /
   does not readily tolerate / raises serious doubts about / is not
   quite equivalent to / does not affect the structure of / may
   remedy and, at the same time, eliminate / is not to be considered
   in determining / is to be regarded as / is unspecified with
   respect to / is, apparently, determined by / is necessary to
   impose an interpretation on / appears to correlate rather closely
   with / is rather different from
  });
  
  learn_phrases_randomly(
  \@objects,
  q{
   problems of phonemic and morphological analysis. / a corpus of
   utterance tokens upon which conformity has been defined by the
   paired utterance test. / the traditional practice of grammarians.
   / the levels of acceptability from fairly high (eg (99a)) to
   virtual gibberish (eg (98d)). / a stipulation to place the
   constructions into these various categories. / a descriptive
   fact. / a parasitic gap construction. / the extended c-command
   discussed in connection with (34). / the ultimate standard that
   determines the accuracy of any proposed grammar.  / the system of
   base rules exclusive of the lexicon. / irrelevant intervening
   contexts in selectional rules. / nondistinctness in the sense of
   distinctive feature theory.  / a general convention regarding the
   forms of the grammar. / an abstract underlying order. / an
   important distinction in language use. / the requirement that
   branching is not tolerated within the dominance scope of a
   complex symbol. / the strong generative capacity of the
   theory.
  });
  
  return;
}

#----------------------------------------------------------------------------
use Text::Wrap ('wrap');

sub print_as_paragraph {
  my(@sentences) = @_;
  print
    $p_in, 
    wrap('  ', '', join('  ',
	  @sentences )),
    $p_out,
    "\n\n"
            if @sentences;
  return;  
}

#----------------------------------------------------------------------------

sub maybe_init_cgi {
  return unless $ENV{'SCRIPT_NAME'};
  print "Content-type: text/html\n\n",
   '<html lang="en-US"><head><title>Chomskybot Output</title></head><body>';
  $p_in = "<p>";
  $p_out = "</p>";
  $doc_out = "</body></html>";
  return;
}

#----------------------------------------------------------------------------

sub learn_phrases_randomly {
  my($phrase_list, $phrase_string) = @_;

  @$phrase_list = grep length $_, split /\s*\/\s*/, $phrase_string;

  foreach my $phrase ( @$phrase_list ) {
    # trim and regularize whitespace:
    $phrase =~ tr/\cm\cj\t / /s;
    $phrase =~ s/^ //s;
    $phrase =~ s/ $//s;
  }

  shuffle($phrase_list);

  return;
}

sub shuffle {
  # Randomly shuffle items in the given list (pass a reference to it)
  my($list) = @_;
  my $j;
  for(my $i = @$list; --$i; ) {
    @$list[$i, $j] = @$list[$j, $i] # swap
     unless $i == ($j = int rand ($i+1));
    # Fisher-Yates shuffler algorithm.  See perlfaq4.
  }
  return "(no useful retval)";  
}

#----------------------------------------------------------------------------

__END__

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


SAMPLE OUTPUT:

  For one thing, most of the methodological work in modern linguistics is
not to be considered in determining the requirement that branching is not
tolerated within the dominance scope of a complex symbol.  Let us continue
to suppose that the notion of level of grammaticalness appears to correlate
rather closely with the extended c-command discussed in connection with
(34).  To characterize a linguistic level L, any associated supporting
element does not affect the structure of a stipulation to place the
constructions into these various categories.  Summarizing, then, we assume
that the descriptive power of the base component raises serious doubts
about the levels of acceptability from fairly high (eg (99a)) to virtual
gibberish (eg (98d)).  Note that the earlier discussion of deviance is not
quite equivalent to nondistinctness in the sense of distinctive feature
theory.  Suppose, for instance, that the fundamental error of regarding
functional notions as categorial is rather different from the traditional
practice of grammarians.

  Thus a case of semigrammaticalness of a different sort suffices to
account for the system of base rules exclusive of the lexicon.  By
combining adjunctions and certain deformations, the natural general
principle that will subsume this case is, apparently, determined by a
general convention regarding the forms of the grammar.  If the position of
the trace in (99c) were only relatively inaccessible to movement, the
appearance of parasitic gaps in domains relatively inaccessible to ordinary
extraction is necessary to impose an interpretation on a corpus of
utterance tokens upon which conformity has been defined by the paired
utterance test.  On the other hand, a descriptively adequate grammar cannot
be arbitrary in the strong generative capacity of the theory.  Comparing
these examples with their parasitic gap counterparts in (96) and (97), we
see that the theory of syntactic features developed earlier is unspecified
with respect to a parasitic gap construction.

  Furthermore, a subset of English sentences interesting on quite
independent grounds is to be regarded as problems of phonemic and
morphological analysis.  With this clarification, the speaker-hearer's
linguistic intuition can be defined in such a way as to impose a
descriptive fact.  So far, this analysis of a formative as a pair of sets
of features is not subject to an important distinction in language use.
Presumably, the systematic use of complex symbols may remedy and, at the
same time, eliminate irrelevant intervening contexts in selectional rules.
We have already seen that relational information does not readily tolerate
an abstract underlying order.

  Analogously, this selectionally introduced contextual feature delimits
the ultimate standard that determines the accuracy of any proposed grammar.

