JavaScriptでCSSパーサーを書くための情報を収集中(3日目)

三日目です。今日は、重み付け(spec)と !important です。

http://uupaa-js-spinoff.googlecode.com/svn/trunk/uuCSSParser.js/demo/sortspec.htm

<!DOCTYPE html><html><head><title></title>
<link rel="stylesheet" type="text/css" href="a.css" />
<style type="text/css"><!--
  @import url('e.css');
  :digit,div {
    color: pink;
    background-color: red;
  }
  div[title="{hoge,hoge;{}"] {
    color: red;
    ;
  }
  div > * > li[title="{hoge,hoge;hoge}}"] {
    color: red;
  }
  *,div {
    color: black !important;
    background-color: white !important;
    line-height: 2;
  }

/* hoge
 * hoge
 * hoge */
--></style>
</head>
<body>
<input type="button" value="TEST 1 (load from css files)" onclick="test1()" />
<input type="button" value="TEST 2 (parse static css)" onclick="test2()" />
<div id="out"></div>

<script src="../uuCSSParser.js"></script>
<script>
function dump(hash) {
  var rv = [], v, i, j, jz, spec, data;

  for (i = 0, iz = hash.specs.length; i < iz; ++i) {
    spec = hash.specs[i];
    data = hash.data[spec];
    rv.push("<b>hash.data[" + spec + "]</b>");
    for (j = 0, jz = data.length; j < jz; ++j) {
      rv.push("  " + data[j].rule + " --&gt; " + data[j].decl.join("; "));
    }
  }
  return "<b>hash.specs: " + hash.specs.join(", ") + "</b><br />" +
         rv.join("<br /> ") + "<hr />";
}

function test1() {
  var css = uuCSSParser.collect();
  document.getElementById("out").innerHTML += dump(uuCSSParser.parse(css));
}
function test2() {
  var css =
    '*              {}' + //   0
    'LI             {}' + //   1
    'UL LI          {}' + //   2
    'UL OL+LI       {}' + //   3
    'H1 + *[REL=up] {}' + //  11
    'UL OL LI.red   {}' + //  13
    'LI.red.level   {}' + //  21
    '#x34y          {}' + // 100
    '#s12:not(FOO)  {}';  // 101

  document.getElementById("out").innerHTML += dump(uuCSSParser.parse(css));
}
</script>
</body></html>

http://uupaa-js-spinoff.googlecode.com/svn/trunk/uuCSSParser.js/uuCSSParser.js

(function() {
var _cssp, // inner namespace
    _doc = document,
    _ie = _doc.uniqueID,
    MEMENTO = "uuCSSParserMemento",
    COMMENT = /\/\*[^*]*\*+([^\/][^*]*\*+)*\//g, // /* ... */
    IMPORT = /(?:^|[\r\n]+|\s+)@import\s*(?:url)?[\("']+\s*([\w\.\/\+\-]+)\s*["'\)]+\s*([\w]+)?\s*;/g,
    SPEC_E = /\w+/g,
    SPEC_ID = /#[\w\u00C0-\uFFEE\-]+/g, // (
    SPEC_NOT = /:not\(([^\)]+)\)/,
    SPEC_ATTR = /\[\s*(?:([^~\^$*|=!\s]+)\s*([~\^$*|!]?\=)\s*(["'])?(.*?)\3|([^\]\s]+))\s*\]/g,
    SPEC_CLASS = /\.[\w\u00C0-\uFFEE\-]+/g,
    SPEC_PCLASS = /:[\w\-]+(?:\(.*\))?/g,
    SPEC_PELEMENT = /::[\w\-]+/g,
    SPEC_CONTAINS = /:contains\((["'])?.*?\1\)/g,
    IMPORTANT_BIAS = 10000; // !important

_cssp = {
  // uuCSSParser.collect - collect css info
  collect: function() { // @return String: minified CSS
    _ie && _cssp._memento();
    return _cssp._collect().replace(/<!\-\-|\-\->/g,  ""). // <!-- ... -->
                            replace(/\s*[\r\n]+\s*/g, ""). // ...\r\n...
                            replace(/^\s+|\s+$/g,     ""); // trim both
  },

  // uuCSSParser.parse
  parse:
      function(css) { // @param String: uuCSSParser.collect result value
                      // @return Hash: { specs: [spec-num1, spec-num2, ...],
                      //                 data: [spec-num1:{ rule,sele,decl }, ...] }
    var rv = { specs: [], data: {} }, escape = 0, v, i, j, k, iz, jz, kz,
        g1, g2, ary, expr, decl, decls, exprs, spec,
        imp = /\s*!important\s*/;

    function esc(m, q, str) {
      ++escape;
      return q + str.replace(/\{/g, "\\u007B").replace(/;/g, "\\u003B").
                     replace(/\}/g, "\\u007D").replace(/,/g, "\\u002C") + q;
    }
    function unesc(str) {
      return str.replace(/\\u007B/g, "{").replace(/\\u003B/g, ";").
                 replace(/\\u007D/g, "}").replace(/\\u002C/g, ",");
    }

    ary = css.replace(/(["'])((?:.*?)[\{\};,](?:.*?))\1/g, esc). // ungreedy
              replace(/\{\}/g, "{ }"). // ie bug
              split(/\s*\{|\}\s*/);
    !_ie && ary.pop(); // ie bug

    for (i = 0, iz = ary.length; i < iz; i += 2) {
      expr = ary[i];
      decl = ary[i + 1];
      exprs = (expr + ",").split(/,+/); !_ie && exprs.pop(); // ie bug
      decls = (decl + ";").split(/;+/); !_ie && decls.pop(); // ie bug

      for (g1 = [], g2 = [], k = 0, kz = decls.length; k < kz; ++k) {
        v = (escape ? unesc(decls[k]) : decls[k]).replace(/;+$/, "");
        imp.test(v) ? g2.push(v.replace(imp, "")) : g1.push(v);
      }

      for (j = 0, jz = exprs.length; j < jz; ++j) {
        v = escape ? unesc(exprs[j]) : exprs[j];
        spec = _cssp._spec(v);

        if (g1.length) {
          !(spec in rv.data) && (rv.specs.push(spec), rv.data[spec] = []);
          rv.data[spec].push({ rule: v + "{" + decl + "}", sele: v, decl: g1 });
        }
        if (g2.length) { // !important
          spec += IMPORTANT_BIAS;
          !(spec in rv.data) && (rv.specs.push(spec), rv.data[spec] = []);
          rv.data[spec].push({ rule: v + "{" + decl + "}", sele: v, decl: g2 });
        }
      }
    }
    rv.specs.sort(function(a, b) { return a - b; }); // sort of number order
    return rv;
  },

  _spec: function(expr) {
    var a = 0, b = 0, c = 0;

    function A() { ++a; return ""; }
    function B() { ++b; return ""; }
    function C() { ++c; return ""; }

    expr.replace(SPEC_NOT, function(m, E) { return " " + E; }).
          replace(SPEC_ID, A).        // #id
          replace(SPEC_CLASS, B).     // .class
          replace(SPEC_CONTAINS, B).  // :contains("...")
          replace(SPEC_PELEMENT, ""). // ::pseudo-element
          replace(SPEC_PCLASS, B).    // :pseudo-class
          replace(SPEC_ATTR, B).      // [attr=value]
          replace(SPEC_E, C);         // E
    return a * 100 + b * 10 + c;
  },

  // bond raw style
  _memento: function() {
    var node = _doc.getElementsByTagName("style"),
        i = 0, iz = node.length;

    for (; i < iz; ++i) {
      node[i][MEMENTO] = node[i].innerHTML;
    }
  },

  // @import
  _collect: function() { // @return String: joined CSS
    var rv = [], node = _doc.styleSheets, i = 0, iz = node.length,
        key = _ie ? "owningElement" : "ownerNode",
        key2 = _ie ? MEMENTO : "textContent";

    function load(css) {
      return css.replace(COMMENT, "").
                 replace(IMPORT, function(m, url, media) {
        return load(_cssp._loadSync(url));
      });
    }

    for (; i < iz; ++i) {
      if (!node[i].disabled) {
        if (node[i].href && /\.css$/.test(node[i].href)) {
          rv.push(load(_cssp._loadSync(node[i].href)));
        } else {
          rv.push(load(node[i][key][key2]));
        }
      }
    }
    return rv.join("");
  },

  _loadSync: function(url) { // @param String: request url
                             // @return String: responseText or ""
    function abs(url) {
      if (!/^(file|https|http)\:\/\//.test(url)) {
        var div = _doc.createElement("div");
        div.innerHTML = '<a href="' + url + '" />';
        url = div.firstChild ? div.firstChild.href
                             : /href\="([^"]+)"/.exec(div.innerHTML)[1];
      }
      return url;
    }

    try {
      var xhr;
      if (_ie && ActiveXObject) {
        xhr = new ActiveXObject("Microsoft.XMLHTTP");
      }
      if (!xhr && XMLHttpRequest) {
        xhr = new XMLHttpRequest();
      }
      xhr.open("GET", abs(url), false); // sync
      xhr.send(null);
      if (xhr.status === 200 || !xhr.status) {
        return xhr.responseText;
      }
    } catch(err) {}
    return "";
  }
};

window.uuCSSParser = _cssp; // export
})(); // uuCSSParser scope

uuCSSParser.collect() が、CSSの情報をかき集めコメントを除去した文字列を返します。
uuCSSParser.parse(css) が、文字列を受け取りパースした結果をHashで返します。

{
  specs: [
    <i>最も小さいspec値</i>,
    ...
    <i>最も大きいspec値</i>
  ],
 
  data: {
    <i>spec値</i>: {
      rule: "<i>ルール</i>",
      sele: "<i>セレクタ</i>",
      decl: ["<i>宣言</i>", ...]
    },
    ...
  }
}

uuCSSParser.parse("div { color: red; text-align: center !important }") の戻り値は、こうなります。

{
  specs: [1, 10001],
  data: {
    1: {
      rule: "div{ color: red; text-align: center !important }",
      sele: "div",
      decl: ["color: red"]
    },
    10001: {
      rule: "div{ color: red; text-align: center !important }",
      sele: "div",
      decl: ["text-align: center"]
    }
  }
}

ほとんどの処理は CSS3 の仕様書とコードを読めば理解できると思いますので、説明は省略します。

!important は以下のように処理します。

declaration(decl) を分解する
!important を含むものと含まないものに分ける
!important を含んでいる場合は、spec + 10000 する

次回は、

レガシーなスタイル(<td align="right">等)を収集し spec=0 で Hash の先頭に追加。セレクタは階層をさかのぼって作る(html/body/div/table/tbody/td みたいに)
インラインスタイルを収集し、spec=1000 で Hash に追加。セレクタは階層をさかのぼって作る

です。