
f#]c           @   sZ  d  Z  d d l Z d d l Z d d l Z d d l m Z d d l m Z e j	 e
  Z e Z y  e j e j d   e Z Wn n Xd d l Z d e f d     YZ e j j j d e f d	     Y Z d d
 l m Z d d l m Z d e f d     YZ i e d  d 6e d  d 6e d  d 6Z x$ d D] Z e e e   e e <q:Wi  Z  i  Z! xW e j" e  D]F \ Z# Z$ e$ e  e j d  e# f <e$ j%   e  e j d  e# f <qvWi  Z& xW e j" e  D]F \ Z# Z$ e$ e& e j d  e# f <e$ j%   e& e j d  e# f <qWe e d  e d   j%   Z' i  Z( e d e d  e d  e d   e( d <e( d j%   e( d <e e j)  j* e d   j* e d    e( d! <e( d! j%   e( d" <e e j+  e( d# <e( d# j%   e( d$ <e d% e( d& <e( d& j%   e( d' <e e d(  j, e d)  j, e d$  e( d* <e( d* j%   e( d+ <d S(,   s\  This module contains support for Unicode characters as required to
support the regular expression syntax defined in U{annex F
<http://www/Documentation/W3C/www.w3.org/TR/xmlschema-2/index.html#regexs>}
of the XML Schema definition.

In particular, we need to be able to identify character properties and
block escapes, as defined in F.1.1, by name.

 - Block data: U{http://www.unicode.org/Public/3.1-Update/Blocks-4.txt}
 - Property list data: U{http://www.unicode.org/Public/3.1-Update/PropList-3.1.0.txt}
 - Full dataset: U{http://www.unicode.org/Public/3.1-Update/UnicodeData-3.1.0.txt}

The Unicode database active at the time XML Schema 1.0 was defined is
archived at
U{http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html},
and refers to U{Unicode Standard Annex #27: Unicode 3.1
<http://www.unicode.org/unicode/reports/tr27/>}.
iN(   t   six(   t   xranges   [\U0001d7ce-\U0001d7ff]t   CodePointSetErrorc           B   s   e  Z d  Z RS(   s8   Raised when some abuse of a L{CodePointSet} is detected.(   t   __name__t
   __module__t   __doc__(    (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyR   4   s   t   CodePointSetc           B   s   e  Z d  Z d Z d Z e s' e Z n  d Z d   Z d   Z	 d   Z
 d   Z d   Z d   Z d	   Z d
   Z d   Z d e d  e d  e d  e d  e d  f Z d   Z e d  Z d   Z d   Z d   Z RS(   s   Represent a set of Unicode code points.

    Each code point is an integral value between 0 and 0x10FFFF.  This
    class is used to represent a set of code points in a manner
    suitable for use as regular expression character sets.i i  c         C   s   |  j  S(   sU   For testing purrposes only, access to the codepoints
        internal representation.(   t   _CodePointSet__codepoints(   t   self(    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt   _codepointsS   s    c         C   s   t  |  j  S(   N(   t   hashR   (   R   (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt   __hash__X   s    c         C   s   |  j  | j  k S(   s-   Equality is delegated to the codepoints list.(   R   (   R   t   other(    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt   __eq__[   s    c         C   s   |  j  | j  k  S(   N(   R   (   R   R   (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt   __lt___   s    c         G   s   g  |  _  d t |  k rl t | d t  rI |  j  j | d j   d  St | d t  rl | d } ql n  x | D] } |  j |  qs Wd  S(   Ni   i    (   R   t   lent
   isinstanceR   t   extendt   listt   add(   R   t   argst   a(    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt   __init__b   s    	c         C   s  t  | t  r( | \ } } | d 7} n_ t  | t j  rq d t |  k  rX t    n  t |  } | d } n t |  } | d } | | k r t d   n  | |  j	 k r |  S| |  j	 k r |  j	 d } n  t
 j |  j |  } t
 j |  j |  } | d @d >| d @B} | s'd | } n  d | k rC|  j | | 5np d | k rp|  j | d | 5| |  j | <nC d | k r|  j | d | 5| |  j | <n | | g |  j | | +|  S(   Ni   s   codepoint range value orderi   i   (   R   t   tupleR    t   string_typesR   t	   TypeErrort   ordt   intt
   ValueErrort   MaxCodePointt   bisectt   bisect_leftR   t   bisect_right(   R   t   valuet   do_addt   st   et   lit   rit   case(    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt   __mutatem   s>    
c         C   s   |  j  | t  S(   s   Add the given value to the code point set.

        @param value: An integral value denoting a code point, or a
        tuple C{(s,e)} denoting the start and end (inclusive) code
        points in a range.
        @return: C{self}(   t   _CodePointSet__mutatet   True(   R   R!   (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyR      s    c         C   sJ   t  | t  r% |  j | j    n! x | D] } |  j | t  q, W|  S(   s   Add multiple values to a code point set.

        @param values: Either a L{CodePointSet} instance, or an iterable
        whose members are valid parameters to L{add}.

        @return: C{self}(   R   R   R   t   asTuplesR)   R*   (   R   t   valuest   v(    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyR      s
    c         C   sG   t  | t  r7 x! | j   D] } |  j |  q W|  S|  j | t  S(   s  Remove the given value from the code point set.

        @param value: An integral value denoting a code point, or a tuple
        C{(s,e)} denoting the start and end (inclusive) code points in a
        range, or a L{CodePointSet}.

        @return: C{self}(   R   R   R+   t   subtractR)   t   False(   R   R!   R-   (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyR.      s
    i    t   ^s   \t   [t   ]t   -c         C   sV   t  j |  } d | k r- t  j d  } n  | |  j k rR t  j d  | } n  | S(   Ni    t   x00i\   (   R    t   unichrt   ut-   _CodePointSet__XMLtoPythonREEscapedCodepoints(   R   t
   code_pointt   rv(    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt   __unichr   s    c         C   s   g  } | r% | j  t j d   n  xg |  j   D]Y \ } } | | k rc | j  |  j |   q2 | j |  j |  d |  j |  g  q2 W| r | j  t j d   n  t j d  j |  S(   s  Return the code point set as Unicode regular expression
        character group consisting of a sequence of characters or
        character ranges.

        This returns a regular expression fragment using Python's
        regular expression syntax.  Note that different regular expression
        syntaxes are not compatible, often in subtle ways.

        @param with_brackets: If C{True} (default), square brackets
        are added to enclose the returned character group.R1   R3   R2   t    (   t   appendR    R6   R+   t   _CodePointSet__unichrR   t   join(   R   t   with_bracketst   rvaR#   R$   (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt	   asPattern   s    ,c         C   s   g  } d } x] t t |  j   D]F } | d k	 r[ | j | |  j | d f  d } q" |  j | } q" W| d k	 r | |  j k r | j | |  j f  n  | S(   s   Return the codepoints as tuples denoting the ranges that are in
        the set.

        Each tuple C{(s, e)} indicates that the code points from C{s}
        (inclusive) to C{e}) (inclusive) are in the set.i   N(   t   NoneR   R   R   R<   R   (   R   R9   t   startR&   (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyR+      s    	c         C   sx   t  |     } d t |  j  k  rQ d |  j d k rQ | j j |  j d  n# | j j d  | j j |  j  | S(   s;   Return an instance that represents the inverse of this set.i    i   (   t   typeR   R   R   R<   (   R   R9   (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt   negate  s    (c         C   sK   d t  |  j  k s3 d |  j d |  j d k  r7 d St j |  j d  S(   st   If this set represents a single character, return it as its
        unicode string value.  Otherwise return C{None}.i   i   i    N(   R   R   RB   R    R5   (   R   (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt   asSingleCharacter  s    3N(   R   R   R   R   t   MaxShortCodePointt   SupportsWideUnicodeRB   R   R	   R   R   R   R   R)   R   R   R.   R   R7   R=   R*   RA   R+   RE   RF   (    (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyR   8   s4   								0											
(   t   PropertyMap(   t   BlockMapt   XML1p0e2c           B   sO  e  Z d  Z e d d d dCdD Z e rJ e j d e j e j f  n  e dEdFdGdHdIdJdKdLdMdNdOdPdQdRdSd' dTd* dUdVdWd1 d2 d3 d4 dXdYdZd[d\d]d^d_d`dadbdcdM dddedfdgdhdidjdkdld` dmdnde dodpdqdrdsdp dtdudvdwdxdydzd{d|d}d~dd ddd dddddd d ddddddd ddddddd ddddddddddddddddd dddddddd dddd dd d dddd d ddd dd dddddd
dddddddddddddddd d!d"ddd'd(d)d*ddd/dd2d3d4ddddddddCdDdEddddLdddddddd[dd^ddddd Z	 e ddkd Z
 e e	  j e
  Z e dddddddzdd}dddddddddddddddddddddddddddddddddddddddddd dddddddddd	d
dddddddddddddddddddddddddddddd
ddd_ Z e dd d!d"d#d$d%d&d'd(d)d*d+d,d- Z e d-d.d/d0d1d2d3d4d.d/d0 Z e e  Z e j e d;  e j e d<  e e  Z e j e d;  e e  Z e j e  e j e d=  e j e d>  e j e d;  e j e d<  e j e  e j e  e e  Z e j e  e j e d=  e j e d>  e j e d;  e j e  e j e  d?e j   e j   f Z e j d@e f  Z dAe j   f Z e j d@e f  Z d?e j   e j   f Z e j d@e f  Z dBe e f Z e j d@e f  Z RS(1  s-  Regular expression support for XML Schema Data Types.

    This class holds character classes and regular expressions used to
    constrain the lexical space of XML Schema datatypes derived from
    U{string<http://www.w3.org/TR/xmlschema-2/#string>}.  They are
    from U{XML 1.0 (Second
    Edition)<http://www.w3.org/TR/2000/WD-xml-2e-20000814>} and
    U{Namespaces in XML
    <http://www.w3.org/TR/1999/REC-xml-names-19990114/>}.

    Unlike the regular expressions used for pattern constraints in XML
    Schema, which are derived from the Unicode 3.1 specification,
    these are derived from the Unicode 2.0 specification.

    The XML Schema definition refers explicitly to the second edition
    of XML, so we have to use these code point sets and patterns.  Be
    aware that U{subsequent updates to the XML specification
    <http://www.w3.org/XML/xml-V10-4e-errata#E09>} have changed the
    corresponding patterns for other uses of XML.  One significant
    change is that the original specification, used here, does not
    allow wide unicode characters.i	   i
   i   i    i  i   i  i   iA   iZ   ia   iz   i   i   i   i   i   i   i   i1  i4  i>  iA  iH  iJ  i~  i  i  i  i  i  i  i  i  iP  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  iO  iQ  i\  i^  i  i  i  i  i  i  i  i  i  i  i  i  i  i1  iV  iY  ia  i  i  i  i  i  i!  i:  iA  iJ  iq  i  i  i  i  i  i  i  i  i  i  i	  i9	  i=	  iX	  ia	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i
  i

  i
  i
  i
  i(
  i*
  i0
  i2
  i3
  i5
  i6
  i8
  i9
  iY
  i\
  i^
  ir
  it
  i
  i
  i
  i
  i
  i
  i
  i
  i
  i
  i
  i
  i
  i
  i
  i  i  i  i  i  i(  i*  i0  i2  i3  i6  i9  i=  i\  i]  i_  ia  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i(  i*  i3  i5  i9  i`  ia  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i(  i*  i9  i`  ia  i  i.  i0  i2  i3  i@  iE  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i@  iG  iI  ii  i  i  i  i  i   i  i  i  i  i	  i  i  i  i  i<  i>  i@  iL  iN  iP  iT  iU  iY  i_  ia  ic  ie  ig  ii  im  in  ir  is  iu  i  i  i  i  i  i  i  i  i  i  i  i  i  i   i  i  i  i   i  i  i  i   iE  iH  iM  iP  iW  iY  i[  i]  i_  i}  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i  i&!  i*!  i+!  i.!  i!  i!  iA0  i0  i0  i0  i1  i,1  i   i  i N  i  i0  i!0  i)0  i   iE  i`  ia  i  i  i  i  i  i  i  i  i  i  i  i  iK  iR  ip  i  i  i  i  i  i  i  i  i  i  i	  i	  i<	  i>	  iL	  iM	  iQ	  iT	  ib	  ic	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i	  i
  i<
  i>
  i?
  i@
  iB
  iG
  iH
  iK
  iM
  ip
  iq
  i
  i
  i
  i
  i
  i
  i
  i
  i
  i  i  i<  i>  iC  iG  iH  iK  iM  iV  iW  i  i  i  i  i  i  i  i  i  i  i  i>  iD  iF  iH  iJ  iM  iU  iV  i  i  i  i  i  i  i  i  i  i  i  i  i>  iC  iF  iH  iJ  iM  iW  i1  i4  i:  iG  iN  i  i  i  i  i  i  i  i  i  i5  i7  i9  i>  i?  iq  i  i  i  i  i  i  i  i  i  i  i  i   i   i   i*0  i/0  i0  i0  i0   i9   i`  ii  i  i  if	  io	  i	  i	  if
  io
  i
  i
  if  io  i  i  if  io  i  i  if  io  iP  iY  i  i  i   i)  i   i  i  i  i@  iF  i  i0  i10  i50  i0  i0  i0  i0  t   _t   :t   .R3   s   %s%s*s   ^%s$s   %s+s   (%s:)?%s(   i    i  (   i   i  (   iA   iZ   (   ia   iz   (   i   i   (   i   i   (   i   i   (   i   i1  (   i4  i>  (   iA  iH  (   iJ  i~  (   i  i  (   i  i  (   i  i  (   i  i  (   iP  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  iO  (   iQ  i\  (   i^  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i1  iV  (   ia  i  (   i  i  (   i  i  (   i!  i:  (   iA  iJ  (   iq  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i	  i9	  (   iX	  ia	  (   i	  i	  (   i	  i	  (   i	  i	  (   i	  i	  (   i	  i	  (   i	  i	  (   i	  i	  (   i	  i	  (   i
  i

  (   i
  i
  (   i
  i(
  (   i*
  i0
  (   i2
  i3
  (   i5
  i6
  (   i8
  i9
  (   iY
  i\
  (   ir
  it
  (   i
  i
  (   i
  i
  (   i
  i
  (   i
  i
  (   i
  i
  (   i
  i
  (   i  i  (   i  i  (   i  i(  (   i*  i0  (   i2  i3  (   i6  i9  (   i\  i]  (   i_  ia  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i(  (   i*  i3  (   i5  i9  (   i`  ia  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i(  (   i*  i9  (   i`  ia  (   i  i.  (   i2  i3  (   i@  iE  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i@  iG  (   iI  ii  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   iT  iU  (   i_  ia  (   im  in  (   ir  is  (   i  i  (   i  i  (   i  i  (   i   i  (   i  i  (   i   i  (   i  i  (   i   iE  (   iH  iM  (   iP  iW  (   i_  i}  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i*!  i+!  (   i!  i!  (   iA0  i0  (   i0  i0  (   i1  i,1  (   i   i  (   i N  i  (   i!0  i)0  (   i   iE  (   i`  ia  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   iK  iR  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i	  i	  (   i>	  iL	  (   iQ	  iT	  (   ib	  ic	  (   i	  i	  (   i	  i	  (   i	  i	  (   i	  i	  (   i	  i	  (   i@
  iB
  (   iG
  iH
  (   iK
  iM
  (   ip
  iq
  (   i
  i
  (   i
  i
  (   i
  i
  (   i
  i
  (   i  i  (   i>  iC  (   iG  iH  (   iK  iM  (   iV  iW  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i>  iD  (   iF  iH  (   iJ  iM  (   iU  iV  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i>  iC  (   iF  iH  (   iJ  iM  (   i4  i:  (   iG  iN  (   i  i  (   i  i  (   i  i  (   i  i  (   iq  i  (   i  i  (   i  i  (   i  i  (   i  i  (   i   i   (   i*0  i/0  (   i0   i9   (   i`  ii  (   i  i  (   if	  io	  (   i	  i	  (   if
  io
  (   i
  i
  (   if  io  (   i  i  (   if  io  (   i  i  (   if  io  (   iP  iY  (   i  i  (   i   i)  (   i10  i50  (   i0  i0  (   i0  i0  (    R   R   R   R   t   CharRH   R   RG   R   t   BaseChart   IdeographicR   t   Lettert   CombiningChart   Digitt   Extendert   NameStartCharR   t   NCNameStartChart   NameChart
   NCNameCharRA   t   Name_patt   ret   compilet   Name_ret   NmToken_patt
   NmToken_ret
   NCName_patt	   NCName_ret	   QName_patt   QName_re(    (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyRK     s  	 					i
   t   ni   t   ri	   t   ts   \|.-^?*+{}()[]s   p{%s}s   P{%s}s   p{Is%s}s   P{Is%s}s   
s   i    s   	R#   t   SRL   RM   t   it   It   ct   Ct   Ndt   dt   Dt   Pt   Zt   Wt   w(-   R   R[   t   loggingt   pyxb.utils.utilityt   pyxbt
   pyxb.utilsR    t   pyxb.utils.six.movesR   t	   getLoggerR   t   _logR/   RH   R\   R6   R*   R   t   LookupErrorR   t   utilst   utilityt   BackfillComparisonst   objectR   t   pyxb.utils.unicode_dataRI   RJ   RK   t   SingleCharEscRj   R   t   catEsct   complEsct	   iteritemst   kR-   RE   t
   IsBlockEsct   WildcardEsct   MultiCharEscRR   R   RX   R   (    (    (    sG   /data/av2000/b2b/venv/lib/python2.7/site-packages/pyxb/utils/unicode.pyt   <module>!   s\   
 $$!+1.