!C99Shell v. 1.0 pre-release build #16!

Software: Apache/2.0.54 (Fedora). PHP/5.0.4 

uname -a: Linux mina-info.me 2.6.17-1.2142_FC4smp #1 SMP Tue Jul 11 22:57:02 EDT 2006 i686 

uid=48(apache) gid=48(apache) groups=48(apache)
context=system_u:system_r:httpd_sys_script_t
 

Safe-mode: OFF (not secure)

/usr/lib/python2.4/   drwxr-xr-x
Free 3.85 GB of 27.03 GB (14.23%)
Home    Back    Forward    UPDIR    Refresh    Search    Buffer    Encoder    Tools    Proc.    FTP brute    Sec.    SQL    PHP-code    Update    Feedback    Self remove    Logout    


Viewing file:     sgmllib.py (15.91 KB)      -rw-r--r--
Select action/file-type:
(+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
"""A parser for SGML, using the derived class as a static DTD."""

# XXX This only supports those SGML features used by HTML.

# XXX There should be a way to distinguish between PCDATA (parsed
# character data -- the normal case), RCDATA (replaceable character
# data -- only char and entity references and end tags are special)
# and CDATA (character data -- only end tags are special).  RCDATA is
# not supported at all.


import markupbase
import re

__all__ = ["SGMLParser", "SGMLParseError"]

# Regular expressions used for parsing

interesting = re.compile('[&<]')
incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
                           '<([a-zA-Z][^<>]*|'
                              '/([a-zA-Z][^<>]*)?|'
                              '![^<>]*)?')

entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#([0-9]+)[^0-9]')

starttagopen = re.compile('<[>a-zA-Z]')
shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
piclose = re.compile('>')
endbracket = re.compile('[<>]')
tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
attrfind = re.compile(
    r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
    r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')


class SGMLParseError(RuntimeError):
    """Exception raised for all parse errors."""
    pass


# SGML parser base class -- find tags and call handler functions.
# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
# The dtd is defined by deriving a class which defines methods
# with special names to handle tags: start_foo and end_foo to handle
# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
# (Tags are converted to lower case for this purpose.)  The data
# between tags is passed to the parser by calling self.handle_data()
# with some data as argument (the data may be split up in arbitrary
# chunks).  Entity references are passed by calling
# self.handle_entityref() with the entity reference as argument.

class SGMLParser(markupbase.ParserBase):

    def __init__(self, verbose=0):
        """Initialize and reset this instance."""
        self.verbose = verbose
        self.reset()

    def reset(self):
        """Reset this instance. Loses all unprocessed data."""
        self.__starttag_text = None
        self.rawdata = ''
        self.stack = []
        self.lasttag = '???'
        self.nomoretags = 0
        self.literal = 0
        markupbase.ParserBase.reset(self)

    def setnomoretags(self):
        """Enter literal mode (CDATA) till EOF.

        Intended for derived classes only.
        """
        self.nomoretags = self.literal = 1

    def setliteral(self, *args):
        """Enter literal mode (CDATA).

        Intended for derived classes only.
        """
        self.literal = 1

    def feed(self, data):
        """Feed some data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').  (This just saves the text,
        all the processing is done by goahead().)
        """

        self.rawdata = self.rawdata + data
        self.goahead(0)

    def close(self):
        """Handle the remaining data."""
        self.goahead(1)

    def error(self, message):
        raise SGMLParseError(message)

    # Internal -- handle data as far as reasonable.  May leave state
    # and data to be processed by a subsequent call.  If 'end' is
    # true, force handling all data as if followed by EOF marker.
    def goahead(self, end):
        rawdata = self.rawdata
        i = 0
        n = len(rawdata)
        while i < n:
            if self.nomoretags:
                self.handle_data(rawdata[i:n])
                i = n
                break
            match = interesting.search(rawdata, i)
            if match: j = match.start()
            else: j = n
            if i < j:
                self.handle_data(rawdata[i:j])
            i = j
            if i == n: break
            if rawdata[i] == '<':
                if starttagopen.match(rawdata, i):
                    if self.literal:
                        self.handle_data(rawdata[i])
                        i = i+1
                        continue
                    k = self.parse_starttag(i)
                    if k < 0: break
                    i = k
                    continue
                if rawdata.startswith("</", i):
                    k = self.parse_endtag(i)
                    if k < 0: break
                    i = k
                    self.literal = 0
                    continue
                if self.literal:
                    if n > (i + 1):
                        self.handle_data("<")
                        i = i+1
                    else:
                        # incomplete
                        break
                    continue
                if rawdata.startswith("<!--", i):
                        # Strictly speaking, a comment is --.*--
                        # within a declaration tag <!...>.
                        # This should be removed,
                        # and comments handled only in parse_declaration.
                    k = self.parse_comment(i)
                    if k < 0: break
                    i = k
                    continue
                if rawdata.startswith("<?", i):
                    k = self.parse_pi(i)
                    if k < 0: break
                    i = i+k
                    continue
                if rawdata.startswith("
<!", i):
                    # This is some sort of declaration; in "
HTML as
                    
# deployed," this should only be the document type
                    # declaration ("<!DOCTYPE html...>").
                    
k = self.parse_declaration(i)
                    if
k < 0: break
                    
i = k
                    
continue
            
elif rawdata[i] == '&':
                if
self.literal:
                    
self.handle_data(rawdata[i])
                    
i = i+1
                    
continue
                
match = charref.match(rawdata, i)
                if
match:
                    
name = match.group(1)
                    
self.handle_charref(name)
                    
i = match.end(0)
                    if
rawdata[i-1] != ';': i = i-1
                    
continue
                
match = entityref.match(rawdata, i)
                if
match:
                    
name = match.group(1)
                    
self.handle_entityref(name)
                    
i = match.end(0)
                    if
rawdata[i-1] != ';': i = i-1
                    
continue
            else:
                
self.error('neither < nor & ??')
            
# We get here only if incomplete matches but
            # nothing else
            
match = incomplete.match(rawdata, i)
            if
not match:
                
self.handle_data(rawdata[i])
                
i = i+1
                
continue
            
j = match.end(0)
            if
j == n:
                break
# Really incomplete
            
self.handle_data(rawdata[i:j])
            
i = j
        
# end while
        
if end and i < n:
            
self.handle_data(rawdata[i:n])
            
i = n
        self
.rawdata = rawdata[i:]
        
# XXX if end: check for empty stack

    # Extensions for the DOCTYPE scanner:
    
_decl_otherchars = '='

    
# Internal -- parse processing instr, return length or -1 if not terminated
    
def parse_pi(self, i):
        
rawdata = self.rawdata
        
if rawdata[i:i+2] != '<?':
            
self.error('unexpected call to parse_pi()')
        
match = piclose.search(rawdata, i+2)
        if
not match:
            return -
1
        j
= match.start(0)
        
self.handle_pi(rawdata[i+2: j])
        
j = match.end(0)
        return
j-i

    def get_starttag_text
(self):
        return
self.__starttag_text

    
# Internal -- handle starttag, return length or -1 if not terminated
    
def parse_starttag(self, i):
        
self.__starttag_text = None
        start_pos
= i
        rawdata
= self.rawdata
        
if shorttagopen.match(rawdata, i):
            
# SGML shorthand: <tag/data/ == <tag>data</tag>
            # XXX Can data contain &... (entity or char refs)?
            # XXX Can data contain < or > (tag characters)?
            # XXX Can there be whitespace before the first /?
            
match = shorttag.match(rawdata, i)
            if
not match:
                return -
1
            tag
, data = match.group(1, 2)
            
self.__starttag_text = '<%s/' % tag
            tag
= tag.lower()
            
k = match.end(0)
            
self.finish_shorttag(tag, data)
            
self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
            return
k
        
# XXX The following should skip matching quotes (' or ")
        
match = endbracket.search(rawdata, i+1)
        if
not match:
            return -
1
        j
= match.start(0)
        
# Now parse the data between i+1 and j into a tag and attrs
        
attrs = []
        if
rawdata[i:i+2] == '<>':
            
# SGML shorthand: <> == <last open tag seen>
            
k = j
            tag
= self.lasttag
        
else:
            
match = tagfind.match(rawdata, i+1)
            if
not match:
                
self.error('unexpected call to parse_starttag')
            
k = match.end(0)
            
tag = rawdata[i+1:k].lower()
            
self.lasttag = tag
        
while k < j:
            
match = attrfind.match(rawdata, k)
            if
not match: break
            
attrname, rest, attrvalue = match.group(1, 2, 3)
            if
not rest:
                
attrvalue = attrname
            elif attrvalue
[:1] == '\'' == attrvalue[-1:] or
                 
attrvalue[:1] == '"' == attrvalue[-1:]:
                
attrvalue = attrvalue[1:-1]
            
attrs.append((attrname.lower(), attrvalue))
            
k = match.end(0)
        if
rawdata[j] == '>':
            
j = j+1
        self
.__starttag_text = rawdata[start_pos:j]
        
self.finish_starttag(tag, attrs)
        return
j

    
# Internal -- parse endtag
    
def parse_endtag(self, i):
        
rawdata = self.rawdata
        match
= endbracket.search(rawdata, i+1)
        if
not match:
            return -
1
        j
= match.start(0)
        
tag = rawdata[i+2:j].strip().lower()
        if
rawdata[j] == '>':
            
j = j+1
        self
.finish_endtag(tag)
        return
j

    
# Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
    
def finish_shorttag(self, tag, data):
        
self.finish_starttag(tag, [])
        
self.handle_data(data)
        
self.finish_endtag(tag)

    
# Internal -- finish processing of start tag
    # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag
    
def finish_starttag(self, tag, attrs):
        try:
            
method = getattr(self, 'start_' + tag)
        
except AttributeError:
            try:
                
method = getattr(self, 'do_' + tag)
            
except AttributeError:
                
self.unknown_starttag(tag, attrs)
                return -
1
            
else:
                
self.handle_starttag(tag, method, attrs)
                return
0
        
else:
            
self.stack.append(tag)
            
self.handle_starttag(tag, method, attrs)
            return
1

    
# Internal -- finish processing of end tag
    
def finish_endtag(self, tag):
        if
not tag:
            
found = len(self.stack) - 1
            
if found < 0:
                
self.unknown_endtag(tag)
                return
        else:
            if
tag not in self.stack:
                try:
                    
method = getattr(self, 'end_' + tag)
                
except AttributeError:
                    
self.unknown_endtag(tag)
                else:
                    
self.report_unbalanced(tag)
                return
            
found = len(self.stack)
            for
i in range(found):
                if
self.stack[i] == tag: found = i
        
while len(self.stack) > found:
            
tag = self.stack[-1]
            try:
                
method = getattr(self, 'end_' + tag)
            
except AttributeError:
                
method = None
            
if method:
                
self.handle_endtag(tag, method)
            else:
                
self.unknown_endtag(tag)
            
del self.stack[-1]

    
# Overridable -- handle start tag
    
def handle_starttag(self, tag, method, attrs):
        
method(attrs)

    
# Overridable -- handle end tag
    
def handle_endtag(self, tag, method):
        
method()

    
# Example -- report an unbalanced </...> tag.
    
def report_unbalanced(self, tag):
        if
self.verbose:
            print
'*** Unbalanced </' + tag + '>'
            
print '*** Stack:', self.stack

    def handle_charref
(self, name):
        
"""Handle character reference, no need to override."""
        
try:
            
n = int(name)
        
except ValueError:
            
self.unknown_charref(name)
            return
        if
not 0 <= n <= 255:
            
self.unknown_charref(name)
            return
        
self.handle_data(chr(n))

    
# Definition of entities -- derived classes may override
    
entitydefs =
            {
'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}

    
def handle_entityref(self, name):
        
"""Handle entity references.

        There should be no need to override this method; it can be
        tailored by setting up the self.entitydefs mapping appropriately.
        """
        
table = self.entitydefs
        
if name in table:
            
self.handle_data(table[name])
        else:
            
self.unknown_entityref(name)
            return

    
# Example -- handle data, should be overridden
    
def handle_data(self, data):
        
pass

    
# Example -- handle comment, could be overridden
    
def handle_comment(self, data):
        
pass

    
# Example -- handle declaration, could be overridden
    
def handle_decl(self, decl):
        
pass

    
# Example -- handle processing instruction, could be overridden
    
def handle_pi(self, data):
        
pass

    
# To be overridden -- handlers for unknown objects
    
def unknown_starttag(self, tag, attrs): pass
    def unknown_endtag
(self, tag): pass
    def unknown_charref
(self, ref): pass
    def unknown_entityref
(self, ref): pass


class TestSGMLParser(SGMLParser):

    
def __init__(self, verbose=0):
        
self.testdata = ""
        
SGMLParser.__init__(self, verbose)

    
def handle_data(self, data):
        
self.testdata = self.testdata + data
        
if len(repr(self.testdata)) >= 70:
            
self.flush()

    
def flush(self):
        
data = self.testdata
        
if data:
            
self.testdata = ""
            
print 'data:', repr(data)

    
def handle_comment(self, data):
        
self.flush()
        
r = repr(data)
        if
len(r) > 68:
            
r = r[:32] + '...' + r[-32:]
        print
'comment:', r

    def unknown_starttag
(self, tag, attrs):
        
self.flush()
        if
not attrs:
            print
'start tag: <' + tag + '>'
        
else:
            print
'start tag: <' + tag,
            for
name, value in attrs:
                print
name + '=' + '"' + value + '"',
            print
'>'

    
def unknown_endtag(self, tag):
        
self.flush()
        print
'end tag: </' + tag + '>'

    
def unknown_entityref(self, ref):
        
self.flush()
        print
'*** unknown entity ref: &' + ref + ';'

    
def unknown_charref(self, ref):
        
self.flush()
        print
'*** unknown char ref: &#' + ref + ';'

    
def unknown_decl(self, data):
        
self.flush()
        print
'*** unknown decl: [' + data + ']'

    
def close(self):
        
SGMLParser.close(self)
        
self.flush()


def test(args = None):
    
import sys

    
if args is None:
        
args = sys.argv[1:]

    if
args and args[0] == '-s':
        
args = args[1:]
        
klass = SGMLParser
    
else:
        
klass = TestSGMLParser

    
if args:
        
file = args[0]
    else:
        
file = 'test.html'

    
if file == '-':
        
f = sys.stdin
    
else:
        try:
            
f = open(file, 'r')
        
except IOError, msg:
            print
file, ":", msg
            sys
.exit(1)

    
data = f.read()
    if
f is not sys.stdin:
        
f.close()

    
x = klass()
    for
c in data:
        
x.feed(c)
    
x.close()


if
__name__ == '__main__':
    
test()

:: Command execute ::

Enter:
 
Select:
 

:: Search ::
  - regexp 

:: Upload ::
 
[ Read-Only ]

:: Make Dir ::
 
[ Read-Only ]
:: Make File ::
 
[ Read-Only ]

:: Go Dir ::
 
:: Go File ::
 

--[ c99shell v. 1.0 pre-release build #16 powered by Captain Crunch Security Team | http://ccteam.ru | Generation time: 0.0048 ]--