#!/usr/local/bin/perl -w # # nobanner # # A trivial web proxy written in perl. # # This proxy is passed a filename which contains a bunch # of perl regular expressions which allow for substitutions # and deletions of text in html files passing through the # proxy. This is an expensive way to do anything; far more # expensive then just rewriting URLs. However, this allows # more of the offending garbage to be removed. # # Written by: # Dave Regan # regan@peak.org # 24 August 1997 # http://mordred.ao.com/nobanner/ # # No copyright claimed. Do what you want to with this program. # # # Things to do: # ------------- # # The log file potentially has many different writers. This # should really be done through syslog. However, there may be # installations where that really isn't appropriate either. # # There are a number of CGI and web modules available for making # this work easier and more reliable. I did this as a training # excercise, and the standard routines were straightforward, so # I just did the work that way. # # This doesn't support a bidirectional dialog. I don't think # that is important though I really need to read the appropriate RFC. # # This doesn't proxy any of the secure server stuff. That bypasses # this code entirely, which is probably just as well. # # This probably won't handle multipart form-data correctly. # # It would be interesting to use the shared memory routines # for keeping a pool of counters for each of the rules fired. # Use this to replace the file oriented counting. # ### ### Configuration ### use Getopt::Long; use Socket; $Version = 'nobanner v0.09 regan@ao.com'; $Home = "http://mordred.ao.com/nobanner/"; # The source # Option table %Args = ( "port" => 5244, # Port to answer on "log" => "/dev/null", # Where to log data to "logdir" => "", # Where to log rule info "rules" => "./nobanner.rules", # Name of file with rules ); # Network constants $AF_INET = 2; $SOCK_STREAM = 1; $Sockaddr = "S n a4 x8"; ### ### Main program ### # Various initialization # print "$Version\n"; $StartTime = time(); GetOptions(\%Args, "port=i", "rules=s", "log=s", "logdir=s", "proxy=s"); InitializePublishedPort(); mkdir($Args{'logdir'}, 0755) if ($Args{'logdir'} ne ""); ReadRules(); $SIG{"CHLD"} = sub { wait }; $SIG{"HUP"} = "IGNORE"; # Servers started by hand. print "Cannot open $Args{'log'}. We will run, but expect errors.\n" if (!open(LOG, ">>$Args{'log'}")); print LOG "Operation started at ", scalar(localtime($StartTime)), "\n"; select(LOG); $| = 1; select(STDOUT); $Nrequests = 0; $SIG{'INT'} = ReadRules(); # Main loop for ($count = 0; ; $count++) { $addr = accept(NS, S); if (!$addr) { print LOG "Accept failed. This shouldn't happen: $!\n"; sleep(4); next; } select(NS); $| = 1; select(STDOUT); $Nrequests++; if (($child = fork()) == 0) { ProcessRequest($addr); close LOG; exit 0; } elsif ($child == -1) { print LOG "Cannot fork. Looks grim.\n"; Error(undef, "Proxy server cannot fork. Try back later"); } my($af, $port, $inetaddr); ($af, $port, $inetaddr) = unpack($Sockaddr, $addr); $References{$inetaddr} = 0 if (!defined($References{$inetaddr})); $References{$inetaddr}++; # my($af, $port, $inetaddr, $name, $aliases, $addrtype, $length, $addrs); # ($af, $port, $inetaddr) = unpack($Sockaddr, $addr); # print "Inetaddr is $inetaddr\n"; # ($name, $aliases, $addrtype, $length, @addrs) = # gethostbyaddr($inetaddr, $AF_INET); #print "Name is $name\n", $name; close NS; # Every now and then spin around and pick up zombies. # This isn't the right way to do things. if ($count > 100) { $count = 0; while (wait() != -1) { # Reap all of the zombies } } # If the Rules file has changed contents, read in the new table. ReadRules() if ($RulesReadAt != (stat($Args{'rules'}))[9]); } ### ### Error ### ### Print an error message to all concerned, and potentially exit. ### sub Error { my($error, $msg) = @_; $msg .= $error if (defined($error)); print LOG "$msg\n"; print NS << "EOF"; HTTP/1.0 404 File Not Found Content-type: text/html
$msgSometime this means that there is a configuration error, often it means that the next host processing this request had problems. EOF die $msg if (defined($error)); } ### ### GiveStatusMsg ### ### Print a status message indicating whatever there is to tell. ### sub GiveStatusMsg { my($addr, @addr, $count, $key, $name, $start); $start = scalar(localtime($StartTime)); print NS << "EOF"; HTTP/1.0 200 OK Content-type: text/html
The arguments that the program was started with is:
The sites which have referenced this proxy server are:
Site name | count EOF for $addr (sort keys(%References)) { $name = gethostbyaddr($addr, $AF_INET); if (!defined($name)) { @addr = unpack("C4", $addr); $name = "$addr[0].$addr[1].$addr[2].$addr[3]"; } print NS " |
---|---|
$name | $References{$addr}\n"; } $timestamp = scalar(localtime($RulesReadAt)); print NS << "EOF"; |
The rules (last changed at $timestamp) are:
matches | pattern | replacement EOF for ($count = 0; $count <= $#Pattern; $count++) { print NS " |
---|---|---|
"; print NS (stat("$Args{'logdir'}/$count"))[7] if ($Args{'logdir'} ne ""); print NS " | ", HTMLsafe($Pattern[$count]), " | ", HTMLsafe($Replace[$count]), "\n"; } print NS << "EOF"; |
For more information on this proxy server, see:
$Home.
EOF
}
###
### HTMLsafe
###
### Make a string safe to print in HTML land.
###
sub HTMLsafe
{
my($text) = @_;
$text =~ s/\&/&/g;
$text =~ s/</g;
$text =~ s/>/>/g;
return $text;
}
###
### InitializePublishedPort
###
### Make our presence felt.
###
### When we specify our address we risk picking the wrong interface.
### Far better to choose 0 and have one picked for us.
###
sub InitializePublishedPort
{
my($aliases, $len, $name, $server, $this, $type);
# If we are going to talk to a proxy server, find out about it now.
if (defined($Args{'proxy'}))
{
($ProxyHost = $Args{'proxy'}) =~ s/:.*//;
($ProxyPort = $Args{'proxy'}) =~ s/.*://;
}
# We will need our address later
# chomp($server = `hostname`);
# ($name, $aliases, $type, $len, $ServerAddress) = gethostbyname($server);
# $ServerInfo = pack($Sockaddr, $AF_INET, 0, $ServerAddress);
# $ServerInfo = pack($Sockaddr, $AF_INET, 0, "\0\0\0\0");
#@data = unpack('C16', $ServerInfo);
#print "Connection control block is @data\n";
# Make our connection out
($name, $aliases, $TcpProto) = getprotobyname('tcp');
$this = pack($Sockaddr, $AF_INET, $Args{'port'}, "\0\0\0\0");
# Unbuffered I/O
select(NS); $| = 1; select(STDOUT);
# The rest of the magic
socket(S, $AF_INET, $SOCK_STREAM, $TcpProto) || die "socket: $!";
setsockopt(S, SOL_SOCKET, SO_REUSEADDR, pack("l", 1)) || die "setsockopt: $!";
bind(S, $this) || die "bind: $!";
listen(S, 5) || die "connect: $!";
select(S); $| = 1; select(STDOUT);
}
###
### ProcessRequest
###
### Process a request from a client.
###
### Read the header, which is everything up to two blank lines.
### Then take that header and echo it through to the real server.
###
### Remember that each request is handled by a separate process,
### which makes it hard for state information to be passed between
### childred.
###
sub ProcessRequest
{
my($addr) = @_;
my($af, $port, $inetaddr, @inetaddr);
my($count, $data, $get, $header, $hits, $host, $ishtml);
my($name, $aliases, $type, $len, $length, $realaddr, $realinfo);
my($c2);
# print "child got request\n";
($af, $port, $inetaddr) = unpack($Sockaddr, $addr);
@inetaddr = unpack('C4', $inetaddr);
print LOG scalar(localtime), " connect: $af, $port, @inetaddr\n";
$header = "";
$host = "";
$get = "";
$length = -1;
while (