(* lexer for rfc2252 format schemas *)

{
  type lexeme =
      Lparen
    | Rparen
    | Numericoid of string
    | Name of string list
    | Desc of string
    | Obsolete
    | Equality of string
    | Ordering of string
    | Substr of string
    | Syntax of string * int
    | Single_value
    | Collective
    | No_user_modification
    | Usage of string
    | Sup of string list
    | Abstract
    | Structural
    | Auxiliary
    | Must of string list
    | May of string list
    | Xstring of string
 
  let extract buf i chop = String.sub buf i ((String.length buf) - i - chop);;
  let splitoidlst buf regex = Str.split regex buf;;
  let stripquotes lst = List.map (fun item -> String.sub item 1 ((String.length item) - 2)) lst
}

(* conversion definitions, from rfc 2252. I've tried to keep the names
the same, or close. I've changed some names to make them more descriptive *)
let alpha  = [ 'a' - 'z' 'A' - 'Z' ]
let digit  = [ '0' - '9' ]
let hdigit = [ 'a' - 'f' 'A' - 'F' '0' - '9' ]
let k = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '-' ';' ]
let p = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '"' '(' ')' '+' ',' '-' '.' '/' ':' '?' ' ' ]
let utf8 = [ '\t' ' ' '!' - '~' ] (* for now, this works, need to read about this *)
let whsp = ' ' +
let dstring = utf8 *
let qdstring = whsp ''' dstring ''' whsp
let qdstringlist = qdstring +
let qdstrings = qdstring | ( whsp '(' qdstringlist ')' whsp )
let letterstring = alpha +
let numericstring = digit +
let anhstring = k +
let keystring = alpha anhstring *
let printablestring = p +
let space = ' ' +
let descr = keystring
let qdescr = whsp ''' descr ''' whsp
let qdescrlist = qdescr ( ''' descr ''' whsp ) *
let numericoid = numericstring ( '.' numericstring ) *
let oid = descr | numericoid
let woid = whsp oid whsp
let oidlist = woid ( '$' woid ) *
let oids = woid | ( whsp '(' oidlist ')' whsp )
let noidlen = whsp numericoid ( '{' numericstring '}' ) ?
let attributeUsage = "userApplication" | "directoryOperation" | "distributedOperation" | "dSAOperation"

rule lexattr = parse
    '(' whsp {Lparen}
  | numericoid whsp {Numericoid (extract (Lexing.lexeme lexbuf) 0 1)}
  | "NAME" qdescr {Name [(extract (Lexing.lexeme lexbuf) 6 2)]}
  | "NAME" whsp '(' qdescrlist ')' whsp {Name (stripquotes 
						 (splitoidlst 
						    (extract (Lexing.lexeme lexbuf) 7 3)
						    (Str.regexp "  *")))}
  | "DESC" qdstring {Desc (extract (Lexing.lexeme lexbuf) 6 2)}
  | "OBSOLETE" whsp {Obsolete}
  | "SUP" woid {Sup [(extract (Lexing.lexeme lexbuf) 4 1)]}
  | "EQUALITY" woid {Equality (extract (Lexing.lexeme lexbuf) 9 1)}
  | "ORDERING" woid {Ordering (extract (Lexing.lexeme lexbuf) 9 1)}
  | "SUBSTR" woid {Substr (extract (Lexing.lexeme lexbuf) 7 1)}
  | "SYNTAX" noidlen whsp {match (splitoidlst 
			       (extract (Lexing.lexeme lexbuf) 7 1)
			       (Str.regexp "{")) with
			  [syntax]        -> Syntax (syntax,0)
			| [syntax;length] -> Syntax (syntax,(int_of_string (extract length 0 1)))
			| _               -> failwith "syntax error"}
  | "SINGLE-VALUE" whsp {Single_value}
  | "COLLECTIVE" whsp {Collective}
  | "NO-USER-MODIFICATION" whsp {No_user_modification}
  | "USAGE" whsp attributeUsage whsp {Usage (extract (Lexing.lexeme lexbuf) 6 1)}
  | "X-" dstring qdstring {Xstring (Lexing.lexeme lexbuf)}
  | "X-" dstring whsp '(' oidlist ')' whsp {Xstring (Lexing.lexeme lexbuf)}
  | ')' {Rparen}

and lexoc = parse
    '(' whsp {Lparen}
  | numericoid whsp {Numericoid (extract (Lexing.lexeme lexbuf) 0 1)}
  | "NAME" qdescr {Name [(extract (Lexing.lexeme lexbuf) 6 2)]}
  | "NAME" whsp '(' qdescrlist ')' whsp {Name (stripquotes
						 (splitoidlst 
						    (extract (Lexing.lexeme lexbuf) 7 3)
						    (Str.regexp "  *")))}
  | "DESC" qdstring {Desc (extract (Lexing.lexeme lexbuf) 6 2)}
  | "OBSOLETE" whsp {Obsolete}
  | "SUP" woid {Sup [(extract (Lexing.lexeme lexbuf) 4 1)]}
  | "SUP" whsp '(' oidlist ')' whsp {Sup (splitoidlst 
					    (extract (Lexing.lexeme lexbuf) 6 3)
					    (Str.regexp " *\\$ *"))}
  | "ABSTRACT" whsp {Abstract}
  | "STRUCTURAL" whsp {Structural}
  | "AUXILIARY" whsp {Auxiliary}
  | "MUST" woid {Must [(extract (Lexing.lexeme lexbuf) 5 1)]}
  | "MUST" whsp '(' oidlist ')' whsp {Must (splitoidlst 
					      (extract (Lexing.lexeme lexbuf) 7 3)
					      (Str.regexp " *\\$ *"))}
  | "MAY" woid {May [(extract (Lexing.lexeme lexbuf) 4 1)]}
  | "MAY" whsp '(' oidlist ')' whsp {May (splitoidlst 
					    (extract (Lexing.lexeme lexbuf) 6 3)
					    (Str.regexp " *\\$ *"))}
  | "X-" dstring qdstring {Xstring (Lexing.lexeme lexbuf)}
  | "X-" dstring whsp '(' oidlist ')' whsp {Xstring (Lexing.lexeme lexbuf)}
  | ')' {Rparen}
