| @@ -16,14 +16,14 @@ require ( | |||
| gitea.com/macaron/session v0.0.0-20190821211443-122c47c5f705 | |||
| gitea.com/macaron/toolbox v0.0.0-20190822013122-05ff0fc766b7 | |||
| github.com/PuerkitoBio/goquery v1.5.0 | |||
| github.com/RoaringBitmap/roaring v0.4.7 // indirect | |||
| github.com/RoaringBitmap/roaring v0.4.21 // indirect | |||
| github.com/bgentry/speakeasy v0.1.0 // indirect | |||
| github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3 | |||
| github.com/blevesearch/bleve v0.8.1 | |||
| github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 // indirect | |||
| github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f // indirect | |||
| github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc // indirect | |||
| github.com/blevesearch/go-porterstemmer v1.0.2 // indirect | |||
| github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f // indirect | |||
| github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 // indirect | |||
| github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe // indirect | |||
| github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd // indirect | |||
| github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect | |||
| github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect | |||
| github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect | |||
| @@ -31,14 +31,13 @@ require ( | |||
| github.com/dgrijalva/jwt-go v3.2.0+incompatible | |||
| github.com/editorconfig/editorconfig-core-go/v2 v2.1.1 | |||
| github.com/emirpasic/gods v1.12.0 | |||
| github.com/etcd-io/bbolt v1.3.2 // indirect | |||
| github.com/etcd-io/bbolt v1.3.3 // indirect | |||
| github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a | |||
| github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect | |||
| github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect | |||
| github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect | |||
| github.com/gliderlabs/ssh v0.2.2 | |||
| github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd // indirect | |||
| github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e // indirect | |||
| github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect | |||
| github.com/go-openapi/jsonreference v0.19.3 // indirect | |||
| github.com/go-openapi/runtime v0.19.5 // indirect | |||
| github.com/go-redis/redis v6.15.2+incompatible | |||
| @@ -68,12 +67,10 @@ require ( | |||
| github.com/mattn/go-sqlite3 v1.11.0 | |||
| github.com/mcuadros/go-version v0.0.0-20190308113854-92cdf37c5b75 | |||
| github.com/microcosm-cc/bluemonday v0.0.0-20161012083705-f77f16ffc87a | |||
| github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae // indirect | |||
| github.com/msteinert/pam v0.0.0-20151204160544-02ccfbfaf0cc | |||
| github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5 | |||
| github.com/niklasfasching/go-org v0.1.8 | |||
| github.com/oliamb/cutter v0.2.2 | |||
| github.com/philhofer/fwd v1.0.0 // indirect | |||
| github.com/pkg/errors v0.8.1 | |||
| github.com/pquerna/otp v0.0.0-20160912161815-54653902c20e | |||
| github.com/prometheus/client_golang v1.1.0 | |||
| @@ -90,19 +87,17 @@ require ( | |||
| github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2 // indirect | |||
| github.com/stretchr/testify v1.4.0 | |||
| github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 // indirect | |||
| github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200 // indirect | |||
| github.com/tstranex/u2f v1.0.0 | |||
| github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1 | |||
| github.com/unknwon/com v0.0.0-20190804042917-757f69c95f3e | |||
| github.com/unknwon/i18n v0.0.0-20190805065654-5c6446a380b6 | |||
| github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 | |||
| github.com/urfave/cli v1.20.0 | |||
| github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621 // indirect | |||
| github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 | |||
| golang.org/x/crypto v0.0.0-20191117063200-497ca9f6d64f | |||
| golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9 | |||
| golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 | |||
| golang.org/x/sys v0.0.0-20191010194322-b09406accb47 | |||
| golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2 | |||
| golang.org/x/text v0.3.2 | |||
| golang.org/x/tools v0.0.0-20190910221609-7f5965fd7709 // indirect | |||
| gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect | |||
| @@ -46,8 +46,8 @@ github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tN | |||
| github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= | |||
| github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= | |||
| github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= | |||
| github.com/RoaringBitmap/roaring v0.4.7 h1:eGUudvFzvF7Kxh7JjYvXfI1f7l22/2duFby7r5+d4oc= | |||
| github.com/RoaringBitmap/roaring v0.4.7/go.mod h1:8khRDP4HmeXns4xIj9oGrKSz7XTQiJx2zgh7AcNke4w= | |||
| github.com/RoaringBitmap/roaring v0.4.21 h1:WJ/zIlNX4wQZ9x8Ey33O1UaD9TCTakYsdLFSBcTwH+8= | |||
| github.com/RoaringBitmap/roaring v0.4.21/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo= | |||
| github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= | |||
| github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= | |||
| github.com/Unknwon/com v0.0.0-20190321035513-0fed4efef755/go.mod h1:voKvFVpXBJxdIPeqjoJuLK+UVcRlo/JLjeToGxPYu68= | |||
| @@ -72,14 +72,14 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= | |||
| github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= | |||
| github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY= | |||
| github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= | |||
| github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3 h1:vinCy/rcjbtxWnMiw11CbMKcuyNi+y4L4MbZUpk7m4M= | |||
| github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw= | |||
| github.com/blevesearch/bleve v0.8.1 h1:20zBREtGe8dvBxCC+717SaxKcUVQOWk3/Fm75vabKpU= | |||
| github.com/blevesearch/bleve v0.8.1/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw= | |||
| github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 h1:U6vnxZrTfItfiUiYx0lf/LgHjRSfaKK5QHSom3lEbnA= | |||
| github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3/go.mod h1:WH+MU2F4T0VmSdaPX+Wu5GYoZBrYWdOZWSjzvYcDmqQ= | |||
| github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f h1:J9ZVHbB2X6JNxbKw/f3Y4E9Xq+Ro+zPiivzgmi3RTvg= | |||
| github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA= | |||
| github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc h1:7OfDAkuAGx71ruzOIFqCkHqGIsVZU0C7PMw5u1bIrwU= | |||
| github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8= | |||
| github.com/blevesearch/go-porterstemmer v1.0.2 h1:qe7n69gBd1OLY5sHKnxQHIbzn0LNJA4hpAf+5XDxV2I= | |||
| github.com/blevesearch/go-porterstemmer v1.0.2/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA= | |||
| github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f h1:kqbi9lqXLLs+zfWlgo1PIiRQ86n33K1JKotjj4rSYOg= | |||
| github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8= | |||
| github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 h1:NGpwhs9FOwddM6TptNrq2ycby4s24TcppSe5uG4DA/Q= | |||
| github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= | |||
| github.com/bradfitz/gomemcache v0.0.0-20190329173943-551aad21a668 h1:U/lr3Dgy4WK+hNk4tyD+nuGjpVLPEHuJSFXMw11/HPA= | |||
| @@ -92,6 +92,7 @@ github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkE | |||
| github.com/coreos/bbolt v1.3.3/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= | |||
| github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= | |||
| github.com/coreos/etcd v3.3.15+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= | |||
| github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= | |||
| github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= | |||
| github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= | |||
| github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= | |||
| @@ -102,10 +103,11 @@ github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d h1:XMf4E1U+b | |||
| github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d/go.mod h1:srVSlQLB8iXBVXHgnqemxUXqN6FCvClgCMPCsjBDR7c= | |||
| github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b h1:bZ9rKU2/V8sY+NulSfxDOnXTWcs1rySqdF1sVepihvo= | |||
| github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b/go.mod h1:BQwMFlJzDjFDG3DJUdU0KORxn88UlsOULuxLExMh3Hs= | |||
| github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe h1:2o6Y7KMjJNsuMTF8f2H2eTKRhqH7+bQbjr+D+LnhE5M= | |||
| github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe/go.mod h1:prYTC8EgTu3gwbqJihkud9zRXISvyulAplQ6exdCo1g= | |||
| github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd h1:zeuJhcG3f8eePshH3KxkNE+Xtl53pVln9MOUPMyr/1w= | |||
| github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd/go.mod h1:xbc8Ff/oG7h2ejd7AlwOpfd+6QZntc92ygpAOfGwcKY= | |||
| github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7 h1:1XjEY/gnjQ+AfXef2U6dxCquhiRzkEpxZuWqs+QxTL8= | |||
| github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7/go.mod h1:mby/05p8HE5yHEAKiIH/555NoblMs7PtW6NrYshDruc= | |||
| github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= | |||
| github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= | |||
| github.com/cupcake/rdb v0.0.0-20161107195141-43ba34106c76/go.mod h1:vYwsqCOLxGiisLwp9rITslkFNpZD5rz43tf41QFkTWY= | |||
| github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d h1:SwD98825d6bdB+pEuTxWOXiSjBrHdOl/UVp75eI7JT8= | |||
| @@ -135,8 +137,8 @@ github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw= | |||
| github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= | |||
| github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg= | |||
| github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o= | |||
| github.com/etcd-io/bbolt v1.3.2 h1:RLRQ0TKLX7DlBRXAJHvbmXL17Q3KNnTBtZ9B6Qo+/Y0= | |||
| github.com/etcd-io/bbolt v1.3.2/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= | |||
| github.com/etcd-io/bbolt v1.3.3 h1:gSJmxrs37LgTqR/oyJBWok6k6SvXEUerFTbltIhXkBM= | |||
| github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= | |||
| github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a h1:M1bRpaZAn4GSsqu3hdK2R8H0AH9O6vqCTCbm2oAFGfE= | |||
| github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a/go.mod h1:MkKY/CB98aVE4VxO63X5vTQKUgcn+3XP15LMASe3lYs= | |||
| github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 h1:0JZ+dUmQeA8IIVUMzysrX4/AKuQwWhV2dYQuPZdvdSQ= | |||
| @@ -154,10 +156,11 @@ github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0= | |||
| github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= | |||
| github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= | |||
| github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= | |||
| github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd h1:r04MMPyLHj/QwZuMJ5+7tJcBr1AQjpiAK/rZWRrQT7o= | |||
| github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= | |||
| github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e h1:SiEs4J3BKVIeaWrH3tKaz3QLZhJ68iJ/A4xrzIoE5+Y= | |||
| github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= | |||
| github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= | |||
| github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqoVvjbiUioBBFUL5up+h+GdCa/AnJsL/1bIs/veSI= | |||
| github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= | |||
| github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8= | |||
| github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= | |||
| github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= | |||
| github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= | |||
| github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= | |||
| @@ -279,6 +282,8 @@ github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORR | |||
| github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= | |||
| github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4= | |||
| github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= | |||
| github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw= | |||
| github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= | |||
| github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8= | |||
| github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= | |||
| github.com/gorilla/handlers v1.4.2 h1:0QniY0USkHQ1RGCLfKxeNHK9bkDHGRYGNDFBCS+YARg= | |||
| @@ -304,6 +309,7 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= | |||
| github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= | |||
| github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= | |||
| github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= | |||
| github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= | |||
| github.com/issue9/assert v1.3.2 h1:IaTa37u4m1fUuTH9K9ldO5IONKVDXjLiUO1T9vj0OF0= | |||
| github.com/issue9/assert v1.3.2/go.mod h1:9Ger+iz8X7r1zMYYwEhh++2wMGWcNN2oVI+zIQXxcio= | |||
| github.com/issue9/identicon v0.0.0-20160320065130-d36b54562f4c h1:A/PDn117UYld5mlxe58EpMguqpkeTMw5/FCo0ZPS/Ko= | |||
| @@ -467,6 +473,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001/go.mod h1:qq | |||
| github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= | |||
| github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= | |||
| github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= | |||
| github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNueLj0oo= | |||
| github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= | |||
| github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= | |||
| github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= | |||
| github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= | |||
| @@ -504,11 +512,13 @@ github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc= | |||
| github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= | |||
| github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8= | |||
| github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= | |||
| github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= | |||
| github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= | |||
| github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= | |||
| github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= | |||
| github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= | |||
| github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= | |||
| github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= | |||
| github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU= | |||
| github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= | |||
| github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4= | |||
| @@ -529,8 +539,8 @@ github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 h1:HOxvxvnntLiPn1 | |||
| github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8= | |||
| github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= | |||
| github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= | |||
| github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200 h1:ZVvr38DYEyOPyelySqvF0I9I++85NnUMsWkroBDS4fs= | |||
| github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= | |||
| github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU= | |||
| github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= | |||
| github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= | |||
| github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ= | |||
| github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM= | |||
| @@ -538,6 +548,7 @@ github.com/tstranex/u2f v1.0.0 h1:HhJkSzDDlVSVIVt7pDJwCHQj67k7A5EeBgPmeD+pVsQ= | |||
| github.com/tstranex/u2f v1.0.0/go.mod h1:eahSLaqAS0zsIEv80+vXT7WanXs7MQQDg3j3wGBSayo= | |||
| github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= | |||
| github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= | |||
| github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= | |||
| github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= | |||
| github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1 h1:SpoCl3+Pta5/ubQyF+Fmx65obtpfkyzeaOIneCE3MTw= | |||
| github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1/go.mod h1:QaSeRctcea9fK6piJpAMCCPKxzJ01+xFcr2k1m3WRPU= | |||
| @@ -549,8 +560,8 @@ github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 h1:Z79lyIznnziKA | |||
| github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141/go.mod h1:TBwoao3Q4Eb/cp+dHbXDfRTrZSsj/k7kLr2j1oWRWC0= | |||
| github.com/urfave/cli v1.20.0 h1:fDqGv3UG/4jbVl/QkFwEdddtEDjh/5Ov6X+0B/3bPaw= | |||
| github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= | |||
| github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621 h1:E8u341JM/N8LCnPXBV6ZFD1RKo/j+qHl1XOqSV+GstA= | |||
| github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= | |||
| github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= | |||
| github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= | |||
| github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70= | |||
| github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4= | |||
| github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= | |||
| @@ -574,6 +585,7 @@ go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/ | |||
| go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= | |||
| golang.org/x/crypto v0.0.0-20180820150726-614d502a4dac/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= | |||
| golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= | |||
| golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= | |||
| golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= | |||
| golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= | |||
| golang.org/x/crypto v0.0.0-20190320223903-b7391e95e576/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= | |||
| @@ -642,6 +654,7 @@ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5h | |||
| golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
| golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
| golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
| golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
| golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
| golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
| golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
| @@ -660,6 +673,8 @@ golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7w | |||
| golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/sys v0.0.0-20191010194322-b09406accb47 h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY= | |||
| golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2 h1:/J2nHFg1MTqaRLFO7M+J78ASNsJoz3r0cvHBPQ77fsE= | |||
| golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | |||
| golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | |||
| golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= | |||
| @@ -0,0 +1,20 @@ | |||
| kind: pipeline | |||
| name: default | |||
| workspace: | |||
| base: /go | |||
| path: src/github.com/RoaringBitmap/roaring | |||
| steps: | |||
| - name: test | |||
| image: golang | |||
| commands: | |||
| - go get -t | |||
| - go test | |||
| - go test -race -run TestConcurrent* | |||
| - go build -tags appengine | |||
| - go test -tags appengine | |||
| - GOARCH=386 go build | |||
| - GOARCH=386 go test | |||
| - GOARCH=arm go build | |||
| - GOARCH=arm64 go build | |||
| @@ -8,10 +8,12 @@ install: | |||
| notifications: | |||
| email: false | |||
| go: | |||
| - 1.7.x | |||
| - 1.8.x | |||
| - 1.9.x | |||
| - 1.10.x | |||
| - "1.7.x" | |||
| - "1.8.x" | |||
| - "1.9.x" | |||
| - "1.10.x" | |||
| - "1.11.x" | |||
| - "1.12.x" | |||
| - tip | |||
| # whitelist | |||
| @@ -21,10 +23,14 @@ branches: | |||
| script: | |||
| - goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test | |||
| - go test -race -run TestConcurrent* | |||
| - go build -tags appengine | |||
| - go test -tags appengine | |||
| - GOARCH=arm64 go build | |||
| - GOARCH=386 go build | |||
| - GOARCH=386 go test | |||
| - GOARCH=arm go build | |||
| - GOARCH=arm64 go build | |||
| matrix: | |||
| allow_failures: | |||
| - go: tip | |||
| @@ -7,4 +7,5 @@ Bob Potter (@bpot), | |||
| Tyson Maly (@tvmaly), | |||
| Will Glynn (@willglynn), | |||
| Brent Pedersen (@brentp) | |||
| Maciej Biłas (@maciej) | |||
| Maciej Biłas (@maciej), | |||
| Joe Nall (@joenall) | |||
| @@ -9,4 +9,8 @@ Will Glynn (@willglynn), | |||
| Brent Pedersen (@brentp), | |||
| Jason E. Aten (@glycerine), | |||
| Vali Malinoiu (@0x4139), | |||
| Forud Ghafouri (@fzerorubigd) | |||
| Forud Ghafouri (@fzerorubigd), | |||
| Joe Nall (@joenall), | |||
| (@fredim), | |||
| Edd Robinson (@e-dard), | |||
| Alexander Petrov (@alldroll) | |||
| @@ -200,3 +200,36 @@ | |||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| See the License for the specific language governing permissions and | |||
| limitations under the License. | |||
| ================================================================================ | |||
| Portions of runcontainer.go are from the Go standard library, which is licensed | |||
| under: | |||
| Copyright (c) 2009 The Go Authors. All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| * Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| * Redistributions in binary form must reproduce the above | |||
| copyright notice, this list of conditions and the following disclaimer | |||
| in the documentation and/or other materials provided with the | |||
| distribution. | |||
| * Neither the name of Google Inc. nor the names of its | |||
| contributors may be used to endorse or promote products derived from | |||
| this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
| OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| @@ -1,4 +1,4 @@ | |||
| .PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets | |||
| .PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets | |||
| @@ -63,7 +63,7 @@ qa: fmtcheck test vet lint | |||
| # Get the dependencies | |||
| deps: | |||
| GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey | |||
| GOPATH=$(GOPATH) go get github.com/stretchr/testify | |||
| GOPATH=$(GOPATH) go get github.com/willf/bitset | |||
| GOPATH=$(GOPATH) go get github.com/golang/lint/golint | |||
| GOPATH=$(GOPATH) go get github.com/mschoch/smat | |||
| @@ -97,18 +97,8 @@ nuke: | |||
| rm -rf ./target | |||
| GOPATH=$(GOPATH) go clean -i ./... | |||
| rle: | |||
| cp rle.go rle16.go | |||
| perl -pi -e 's/32/16/g' rle16.go | |||
| cp rle_test.go rle16_test.go | |||
| perl -pi -e 's/32/16/g' rle16_test.go | |||
| backrle: | |||
| cp rle16.go rle.go | |||
| perl -pi -e 's/16/32/g' rle.go | |||
| perl -pi -e 's/2032/2016/g' rle.go | |||
| ser: rle | |||
| ser: | |||
| go generate | |||
| cover: | |||
| @@ -1,4 +1,5 @@ | |||
| roaring [](https://travis-ci.org/RoaringBitmap/roaring) [](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [](https://godoc.org/github.com/RoaringBitmap/roaring) [](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) | |||
| [](https://cloud.drone.io/RoaringBitmap/roaring) | |||
| ============= | |||
| This is a go version of the Roaring bitmap data structure. | |||
| @@ -6,12 +7,12 @@ This is a go version of the Roaring bitmap data structure. | |||
| Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and | |||
| [Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. | |||
| [Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. | |||
| [lucene]: https://lucene.apache.org/ | |||
| [solr]: https://lucene.apache.org/solr/ | |||
| [elasticsearch]: https://www.elastic.co/products/elasticsearch | |||
| [druid]: http://druid.io/ | |||
| [druid]: https://druid.apache.org/ | |||
| [spark]: https://spark.apache.org/ | |||
| [opensearchserver]: http://www.opensearchserver.com | |||
| [cloudtorrent]: https://github.com/jpillora/cloud-torrent | |||
| @@ -61,7 +62,6 @@ http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/r | |||
| Dependencies are fetched automatically by giving the `-t` flag to `go get`. | |||
| they include | |||
| - github.com/smartystreets/goconvey/convey | |||
| - github.com/willf/bitset | |||
| - github.com/mschoch/smat | |||
| - github.com/glycerine/go-unsnap-stream | |||
| @@ -133,6 +133,7 @@ func main() { | |||
| if rb1.Equals(newrb) { | |||
| fmt.Println("I wrote the content to a byte stream and read it back.") | |||
| } | |||
| // you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator() | |||
| } | |||
| ``` | |||
| @@ -206,7 +207,7 @@ You can use roaring with gore: | |||
| - go get -u github.com/motemen/gore | |||
| - Make sure that ``$GOPATH/bin`` is in your ``$PATH``. | |||
| - go get github/RoaringBitmap/roaring | |||
| - go get github.com/RoaringBitmap/roaring | |||
| ```go | |||
| $ gore | |||
| @@ -24,12 +24,16 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin | |||
| } | |||
| } | |||
| func (ac *arrayContainer) getShortIterator() shortIterable { | |||
| func (ac *arrayContainer) getShortIterator() shortPeekable { | |||
| return &shortIterator{ac.content, 0} | |||
| } | |||
| func (ac *arrayContainer) getReverseIterator() shortIterable { | |||
| return &reverseIterator{ac.content, len(ac.content) - 1} | |||
| } | |||
| func (ac *arrayContainer) getManyIterator() manyIterable { | |||
| return &manyIterator{ac.content, 0} | |||
| return &shortIterator{ac.content, 0} | |||
| } | |||
| func (ac *arrayContainer) minimum() uint16 { | |||
| @@ -115,7 +119,6 @@ func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container { | |||
| // flip the values in the range [firstOfRange,endx) | |||
| func (ac *arrayContainer) not(firstOfRange, endx int) container { | |||
| if firstOfRange >= endx { | |||
| //p("arrayContainer.not(): exiting early with ac.clone()") | |||
| return ac.clone() | |||
| } | |||
| return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1] | |||
| @@ -124,18 +127,15 @@ func (ac *arrayContainer) not(firstOfRange, endx int) container { | |||
| // flip the values in the range [firstOfRange,lastOfRange] | |||
| func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container { | |||
| if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] | |||
| //p("arrayContainer.notClose(): exiting early with ac.clone()") | |||
| return ac.clone() | |||
| } | |||
| // determine the span of array indices to be affected^M | |||
| startIndex := binarySearch(ac.content, uint16(firstOfRange)) | |||
| //p("startIndex=%v", startIndex) | |||
| if startIndex < 0 { | |||
| startIndex = -startIndex - 1 | |||
| } | |||
| lastIndex := binarySearch(ac.content, uint16(lastOfRange)) | |||
| //p("lastIndex=%v", lastIndex) | |||
| if lastIndex < 0 { | |||
| lastIndex = -lastIndex - 2 | |||
| } | |||
| @@ -144,9 +144,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container { | |||
| newValuesInRange := spanToBeFlipped - currentValuesInRange | |||
| cardinalityChange := newValuesInRange - currentValuesInRange | |||
| newCardinality := len(ac.content) + cardinalityChange | |||
| //p("new card is %v", newCardinality) | |||
| if newCardinality > arrayDefaultMaxSize { | |||
| //p("new card over arrayDefaultMaxSize, so returning bitmap") | |||
| return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1) | |||
| } | |||
| answer := newArrayContainer() | |||
| @@ -503,7 +501,6 @@ func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container { | |||
| } | |||
| func (ac *arrayContainer) and(a container) container { | |||
| //p("ac.and() called") | |||
| switch x := a.(type) { | |||
| case *arrayContainer: | |||
| return ac.andArray(x) | |||
| @@ -550,7 +547,7 @@ func (ac *arrayContainer) iand(a container) container { | |||
| return ac.iandBitmap(x) | |||
| case *runContainer16: | |||
| if x.isFull() { | |||
| return ac.clone() | |||
| return ac | |||
| } | |||
| return x.andArray(ac) | |||
| } | |||
| @@ -722,7 +719,6 @@ func (ac *arrayContainer) inot(firstOfRange, endx int) container { | |||
| // flip the values in the range [firstOfRange,lastOfRange] | |||
| func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { | |||
| //p("ac.inotClose() starting") | |||
| if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] | |||
| return ac | |||
| } | |||
| @@ -745,7 +741,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { | |||
| if cardinalityChange > 0 { | |||
| if newCardinality > len(ac.content) { | |||
| if newCardinality > arrayDefaultMaxSize { | |||
| //p("ac.inotClose() converting to bitmap and doing inot there") | |||
| bcRet := ac.toBitmapContainer() | |||
| bcRet.inot(firstOfRange, lastOfRange+1) | |||
| *ac = *bcRet.toArrayContainer() | |||
| @@ -766,7 +761,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { | |||
| } | |||
| } | |||
| ac.content = ac.content[:newCardinality] | |||
| //p("bottom of ac.inotClose(): returning ac") | |||
| return ac | |||
| } | |||
| @@ -958,3 +952,17 @@ func (ac *arrayContainer) toEfficientContainer() container { | |||
| func (ac *arrayContainer) containerType() contype { | |||
| return arrayContype | |||
| } | |||
| func (ac *arrayContainer) addOffset(x uint16) []container { | |||
| low := &arrayContainer{} | |||
| high := &arrayContainer{} | |||
| for _, val := range ac.content { | |||
| y := uint32(val) + uint32(x) | |||
| if highbits(y) > 0 { | |||
| high.content = append(high.content, lowbits(y)) | |||
| } else { | |||
| low.content = append(low.content, lowbits(y)) | |||
| } | |||
| } | |||
| return []container{low, high} | |||
| } | |||
| @@ -6,7 +6,7 @@ package roaring | |||
| import "github.com/tinylib/msgp/msgp" | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -49,7 +49,7 @@ func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) { | |||
| // map header, size 1 | |||
| // write "content" | |||
| @@ -70,7 +70,7 @@ func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) { | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| // map header, size 1 | |||
| @@ -83,7 +83,7 @@ func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -127,7 +127,7 @@ func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z *arrayContainer) Msgsize() (s int) { | |||
| s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size)) | |||
| return | |||
| @@ -110,14 +110,54 @@ func (bcsi *bitmapContainerShortIterator) hasNext() bool { | |||
| return bcsi.i >= 0 | |||
| } | |||
| func (bcsi *bitmapContainerShortIterator) peekNext() uint16 { | |||
| return uint16(bcsi.i) | |||
| } | |||
| func (bcsi *bitmapContainerShortIterator) advanceIfNeeded(minval uint16) { | |||
| if bcsi.hasNext() && bcsi.peekNext() < minval { | |||
| bcsi.i = bcsi.ptr.NextSetBit(int(minval)) | |||
| } | |||
| } | |||
| func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator { | |||
| return &bitmapContainerShortIterator{a, a.NextSetBit(0)} | |||
| } | |||
| func (bc *bitmapContainer) getShortIterator() shortIterable { | |||
| func (bc *bitmapContainer) getShortIterator() shortPeekable { | |||
| return newBitmapContainerShortIterator(bc) | |||
| } | |||
| type reverseBitmapContainerShortIterator struct { | |||
| ptr *bitmapContainer | |||
| i int | |||
| } | |||
| func (bcsi *reverseBitmapContainerShortIterator) next() uint16 { | |||
| if bcsi.i == -1 { | |||
| panic("reverseBitmapContainerShortIterator.next() going beyond what is available") | |||
| } | |||
| j := bcsi.i | |||
| bcsi.i = bcsi.ptr.PrevSetBit(bcsi.i - 1) | |||
| return uint16(j) | |||
| } | |||
| func (bcsi *reverseBitmapContainerShortIterator) hasNext() bool { | |||
| return bcsi.i >= 0 | |||
| } | |||
| func newReverseBitmapContainerShortIterator(a *bitmapContainer) *reverseBitmapContainerShortIterator { | |||
| if a.cardinality == 0 { | |||
| return &reverseBitmapContainerShortIterator{a, -1} | |||
| } | |||
| return &reverseBitmapContainerShortIterator{a, int(a.maximum())} | |||
| } | |||
| func (bc *bitmapContainer) getReverseIterator() shortIterable { | |||
| return newReverseBitmapContainerShortIterator(bc) | |||
| } | |||
| type bitmapContainerManyIterator struct { | |||
| ptr *bitmapContainer | |||
| base int | |||
| @@ -131,7 +171,7 @@ func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int { | |||
| for n < len(buf) { | |||
| if bitset == 0 { | |||
| base += 1 | |||
| base++ | |||
| if base >= len(bcmi.ptr.bitmap) { | |||
| bcmi.base = base | |||
| bcmi.bitset = bitset | |||
| @@ -177,16 +217,13 @@ func bitmapContainerSizeInBytes() int { | |||
| func bitmapEquals(a, b []uint64) bool { | |||
| if len(a) != len(b) { | |||
| //p("bitmaps differ on length. len(a)=%v; len(b)=%v", len(a), len(b)) | |||
| return false | |||
| } | |||
| for i, v := range a { | |||
| if v != b[i] { | |||
| //p("bitmaps differ on element i=%v", i) | |||
| return false | |||
| } | |||
| } | |||
| //p("bitmapEquals returning true") | |||
| return true | |||
| } | |||
| @@ -209,9 +246,7 @@ func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask ui | |||
| func (bc *bitmapContainer) equals(o container) bool { | |||
| srb, ok := o.(*bitmapContainer) | |||
| if ok { | |||
| //p("bitmapContainers.equals: both are bitmapContainers") | |||
| if srb.cardinality != bc.cardinality { | |||
| //p("bitmapContainers.equals: card differs: %v vs %v", srb.cardinality, bc.cardinality) | |||
| return false | |||
| } | |||
| return bitmapEquals(bc.bitmap, srb.bitmap) | |||
| @@ -261,12 +296,6 @@ func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container { | |||
| // iremove returns true if i was found. | |||
| func (bc *bitmapContainer) iremove(i uint16) bool { | |||
| /* branchless code | |||
| w := bc.bitmap[i>>6] | |||
| mask := uint64(1) << (i % 64) | |||
| neww := w &^ mask | |||
| bc.cardinality -= int((w ^ neww) >> (i % 64)) | |||
| bc.bitmap[i>>6] = neww */ | |||
| if bc.contains(i) { | |||
| bc.cardinality-- | |||
| bc.bitmap[i/64] &^= (uint64(1) << (i % 64)) | |||
| @@ -306,14 +335,10 @@ func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container | |||
| // flip all values in range [firstOfRange,endx) | |||
| func (bc *bitmapContainer) inot(firstOfRange, endx int) container { | |||
| p("bc.inot() called with [%v, %v)", firstOfRange, endx) | |||
| if endx-firstOfRange == maxCapacity { | |||
| //p("endx-firstOfRange == maxCapacity") | |||
| flipBitmapRange(bc.bitmap, firstOfRange, endx) | |||
| bc.cardinality = maxCapacity - bc.cardinality | |||
| //p("bc.cardinality is now %v", bc.cardinality) | |||
| } else if endx-firstOfRange > maxCapacity/2 { | |||
| //p("endx-firstOfRange > maxCapacity/2") | |||
| flipBitmapRange(bc.bitmap, firstOfRange, endx) | |||
| bc.computeCardinality() | |||
| } else { | |||
| @@ -517,11 +542,31 @@ func (bc *bitmapContainer) iorBitmap(value2 *bitmapContainer) container { | |||
| func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container { | |||
| answer := bc | |||
| c := value2.getCardinality() | |||
| for k := 0; k < c; k++ { | |||
| for k := 0; k+3 < c; k += 4 { | |||
| content := (*[4]uint16)(unsafe.Pointer(&value2.content[k])) | |||
| vc0 := content[0] | |||
| i0 := uint(vc0) >> 6 | |||
| answer.bitmap[i0] = answer.bitmap[i0] | (uint64(1) << (vc0 % 64)) | |||
| vc1 := content[1] | |||
| i1 := uint(vc1) >> 6 | |||
| answer.bitmap[i1] = answer.bitmap[i1] | (uint64(1) << (vc1 % 64)) | |||
| vc2 := content[2] | |||
| i2 := uint(vc2) >> 6 | |||
| answer.bitmap[i2] = answer.bitmap[i2] | (uint64(1) << (vc2 % 64)) | |||
| vc3 := content[3] | |||
| i3 := uint(vc3) >> 6 | |||
| answer.bitmap[i3] = answer.bitmap[i3] | (uint64(1) << (vc3 % 64)) | |||
| } | |||
| for k := c &^ 3; k < c; k++ { | |||
| vc := value2.content[k] | |||
| i := uint(vc) >> 6 | |||
| answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64)) | |||
| } | |||
| answer.cardinality = invalidCardinality | |||
| return answer | |||
| } | |||
| @@ -789,8 +834,6 @@ func (bc *bitmapContainer) andNotRun16(rc *runContainer16) container { | |||
| } | |||
| func (bc *bitmapContainer) iandNot(a container) container { | |||
| //p("bitmapContainer.iandNot() starting") | |||
| switch x := a.(type) { | |||
| case *arrayContainer: | |||
| return bc.iandNotArray(x) | |||
| @@ -844,12 +887,15 @@ func (bc *bitmapContainer) andNotBitmap(value2 *bitmapContainer) container { | |||
| return ac | |||
| } | |||
| func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) *bitmapContainer { | |||
| func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) container { | |||
| newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap)) | |||
| for k := 0; k < len(bc.bitmap); k++ { | |||
| bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k] | |||
| } | |||
| bc.cardinality = newCardinality | |||
| if bc.getCardinality() <= arrayDefaultMaxSize { | |||
| return bc.toArrayContainer() | |||
| } | |||
| return bc | |||
| } | |||
| @@ -917,6 +963,32 @@ func (bc *bitmapContainer) NextSetBit(i int) int { | |||
| return -1 | |||
| } | |||
| func (bc *bitmapContainer) PrevSetBit(i int) int { | |||
| if i < 0 { | |||
| return -1 | |||
| } | |||
| x := i / 64 | |||
| if x >= len(bc.bitmap) { | |||
| return -1 | |||
| } | |||
| w := bc.bitmap[x] | |||
| b := i % 64 | |||
| w = w << uint(63-b) | |||
| if w != 0 { | |||
| return i - countLeadingZeros(w) | |||
| } | |||
| x-- | |||
| for ; x >= 0; x-- { | |||
| if bc.bitmap[x] != 0 { | |||
| return (x * 64) + 63 - countLeadingZeros(bc.bitmap[x]) | |||
| } | |||
| } | |||
| return -1 | |||
| } | |||
| // reference the java implementation | |||
| // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892 | |||
| // | |||
| @@ -980,3 +1052,35 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer { | |||
| func (bc *bitmapContainer) containerType() contype { | |||
| return bitmapContype | |||
| } | |||
| func (bc *bitmapContainer) addOffset(x uint16) []container { | |||
| low := newBitmapContainer() | |||
| high := newBitmapContainer() | |||
| b := uint32(x) >> 6 | |||
| i := uint32(x) % 64 | |||
| end := uint32(1024) - b | |||
| if i == 0 { | |||
| copy(low.bitmap[b:], bc.bitmap[:end]) | |||
| copy(high.bitmap[:b], bc.bitmap[end:]) | |||
| } else { | |||
| low.bitmap[b] = bc.bitmap[0] << i | |||
| for k := uint32(1); k < end; k++ { | |||
| newval := bc.bitmap[k] << i | |||
| if newval == 0 { | |||
| newval = bc.bitmap[k-1] >> (64 - i) | |||
| } | |||
| low.bitmap[b+k] = newval | |||
| } | |||
| for k := end; k < 1024; k++ { | |||
| newval := bc.bitmap[k] << i | |||
| if newval == 0 { | |||
| newval = bc.bitmap[k-1] >> (64 - i) | |||
| } | |||
| high.bitmap[k-end] = newval | |||
| } | |||
| high.bitmap[b] = bc.bitmap[1023] >> (64 - i) | |||
| } | |||
| low.computeCardinality() | |||
| high.computeCardinality() | |||
| return []container{low, high} | |||
| } | |||
| @@ -6,7 +6,7 @@ package roaring | |||
| import "github.com/tinylib/msgp/msgp" | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -54,7 +54,7 @@ func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) { | |||
| // map header, size 2 | |||
| // write "cardinality" | |||
| @@ -84,7 +84,7 @@ func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) { | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| // map header, size 2 | |||
| @@ -100,7 +100,7 @@ func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -149,13 +149,13 @@ func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z *bitmapContainer) Msgsize() (s int) { | |||
| s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size)) | |||
| return | |||
| } | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -239,7 +239,7 @@ func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) { | |||
| // map header, size 2 | |||
| // write "ptr" | |||
| @@ -291,7 +291,7 @@ func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) { | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| // map header, size 2 | |||
| @@ -317,7 +317,7 @@ func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -402,7 +402,7 @@ func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err e | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z *bitmapContainerShortIterator) Msgsize() (s int) { | |||
| s = 1 + 4 | |||
| if z.ptr == nil { | |||
| @@ -0,0 +1,161 @@ | |||
| package roaring | |||
| import ( | |||
| "encoding/binary" | |||
| "io" | |||
| ) | |||
| type byteInput interface { | |||
| // next returns a slice containing the next n bytes from the buffer, | |||
| // advancing the buffer as if the bytes had been returned by Read. | |||
| next(n int) ([]byte, error) | |||
| // readUInt32 reads uint32 with LittleEndian order | |||
| readUInt32() (uint32, error) | |||
| // readUInt16 reads uint16 with LittleEndian order | |||
| readUInt16() (uint16, error) | |||
| // getReadBytes returns read bytes | |||
| getReadBytes() int64 | |||
| // skipBytes skips exactly n bytes | |||
| skipBytes(n int) error | |||
| } | |||
| func newByteInputFromReader(reader io.Reader) byteInput { | |||
| return &byteInputAdapter{ | |||
| r: reader, | |||
| readBytes: 0, | |||
| } | |||
| } | |||
| func newByteInput(buf []byte) byteInput { | |||
| return &byteBuffer{ | |||
| buf: buf, | |||
| off: 0, | |||
| } | |||
| } | |||
| type byteBuffer struct { | |||
| buf []byte | |||
| off int | |||
| } | |||
| // next returns a slice containing the next n bytes from the reader | |||
| // If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned | |||
| func (b *byteBuffer) next(n int) ([]byte, error) { | |||
| m := len(b.buf) - b.off | |||
| if n > m { | |||
| return nil, io.ErrUnexpectedEOF | |||
| } | |||
| data := b.buf[b.off : b.off+n] | |||
| b.off += n | |||
| return data, nil | |||
| } | |||
| // readUInt32 reads uint32 with LittleEndian order | |||
| func (b *byteBuffer) readUInt32() (uint32, error) { | |||
| if len(b.buf)-b.off < 4 { | |||
| return 0, io.ErrUnexpectedEOF | |||
| } | |||
| v := binary.LittleEndian.Uint32(b.buf[b.off:]) | |||
| b.off += 4 | |||
| return v, nil | |||
| } | |||
| // readUInt16 reads uint16 with LittleEndian order | |||
| func (b *byteBuffer) readUInt16() (uint16, error) { | |||
| if len(b.buf)-b.off < 2 { | |||
| return 0, io.ErrUnexpectedEOF | |||
| } | |||
| v := binary.LittleEndian.Uint16(b.buf[b.off:]) | |||
| b.off += 2 | |||
| return v, nil | |||
| } | |||
| // getReadBytes returns read bytes | |||
| func (b *byteBuffer) getReadBytes() int64 { | |||
| return int64(b.off) | |||
| } | |||
| // skipBytes skips exactly n bytes | |||
| func (b *byteBuffer) skipBytes(n int) error { | |||
| m := len(b.buf) - b.off | |||
| if n > m { | |||
| return io.ErrUnexpectedEOF | |||
| } | |||
| b.off += n | |||
| return nil | |||
| } | |||
| // reset resets the given buffer with a new byte slice | |||
| func (b *byteBuffer) reset(buf []byte) { | |||
| b.buf = buf | |||
| b.off = 0 | |||
| } | |||
| type byteInputAdapter struct { | |||
| r io.Reader | |||
| readBytes int | |||
| } | |||
| // next returns a slice containing the next n bytes from the buffer, | |||
| // advancing the buffer as if the bytes had been returned by Read. | |||
| func (b *byteInputAdapter) next(n int) ([]byte, error) { | |||
| buf := make([]byte, n) | |||
| m, err := io.ReadAtLeast(b.r, buf, n) | |||
| b.readBytes += m | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| return buf, nil | |||
| } | |||
| // readUInt32 reads uint32 with LittleEndian order | |||
| func (b *byteInputAdapter) readUInt32() (uint32, error) { | |||
| buf, err := b.next(4) | |||
| if err != nil { | |||
| return 0, err | |||
| } | |||
| return binary.LittleEndian.Uint32(buf), nil | |||
| } | |||
| // readUInt16 reads uint16 with LittleEndian order | |||
| func (b *byteInputAdapter) readUInt16() (uint16, error) { | |||
| buf, err := b.next(2) | |||
| if err != nil { | |||
| return 0, err | |||
| } | |||
| return binary.LittleEndian.Uint16(buf), nil | |||
| } | |||
| // getReadBytes returns read bytes | |||
| func (b *byteInputAdapter) getReadBytes() int64 { | |||
| return int64(b.readBytes) | |||
| } | |||
| // skipBytes skips exactly n bytes | |||
| func (b *byteInputAdapter) skipBytes(n int) error { | |||
| _, err := b.next(n) | |||
| return err | |||
| } | |||
| // reset resets the given buffer with a new stream | |||
| func (b *byteInputAdapter) reset(stream io.Reader) { | |||
| b.r = stream | |||
| b.readBytes = 0 | |||
| } | |||
| @@ -0,0 +1,11 @@ | |||
| // +build go1.9 | |||
| // "go1.9", from Go version 1.9 onward | |||
| // See https://golang.org/pkg/go/build/#hdr-Build_Constraints | |||
| package roaring | |||
| import "math/bits" | |||
| func countLeadingZeros(x uint64) int { | |||
| return bits.LeadingZeros64(x) | |||
| } | |||
| @@ -0,0 +1,36 @@ | |||
| // +build !go1.9 | |||
| package roaring | |||
| // LeadingZeroBits returns the number of consecutive most significant zero | |||
| // bits of x. | |||
| func countLeadingZeros(i uint64) int { | |||
| if i == 0 { | |||
| return 64 | |||
| } | |||
| n := 1 | |||
| x := uint32(i >> 32) | |||
| if x == 0 { | |||
| n += 32 | |||
| x = uint32(i) | |||
| } | |||
| if (x >> 16) == 0 { | |||
| n += 16 | |||
| x <<= 16 | |||
| } | |||
| if (x >> 24) == 0 { | |||
| n += 8 | |||
| x <<= 8 | |||
| } | |||
| if x>>28 == 0 { | |||
| n += 4 | |||
| x <<= 4 | |||
| } | |||
| if x>>30 == 0 { | |||
| n += 2 | |||
| x <<= 2 | |||
| } | |||
| n -= int(x >> 31) | |||
| return n | |||
| } | |||
| @@ -0,0 +1,16 @@ | |||
| module github.com/RoaringBitmap/roaring | |||
| go 1.12 | |||
| require ( | |||
| github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 | |||
| github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect | |||
| github.com/golang/snappy v0.0.1 // indirect | |||
| github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect | |||
| github.com/jtolds/gls v4.20.0+incompatible // indirect | |||
| github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae | |||
| github.com/philhofer/fwd v1.0.0 // indirect | |||
| github.com/stretchr/testify v1.4.0 | |||
| github.com/tinylib/msgp v1.1.0 | |||
| github.com/willf/bitset v1.1.10 | |||
| ) | |||
| @@ -0,0 +1,30 @@ | |||
| github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= | |||
| github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | |||
| github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4= | |||
| github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= | |||
| github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8= | |||
| github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= | |||
| github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= | |||
| github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= | |||
| github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw= | |||
| github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= | |||
| github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= | |||
| github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= | |||
| github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY= | |||
| github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg= | |||
| github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ= | |||
| github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= | |||
| github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | |||
| github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | |||
| github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= | |||
| github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= | |||
| github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= | |||
| github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= | |||
| github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU= | |||
| github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= | |||
| github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= | |||
| github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= | |||
| gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= | |||
| gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | |||
| gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= | |||
| gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= | |||
| @@ -4,12 +4,7 @@ type manyIterable interface { | |||
| nextMany(hs uint32, buf []uint32) int | |||
| } | |||
| type manyIterator struct { | |||
| slice []uint16 | |||
| loc int | |||
| } | |||
| func (si *manyIterator) nextMany(hs uint32, buf []uint32) int { | |||
| func (si *shortIterator) nextMany(hs uint32, buf []uint32) int { | |||
| n := 0 | |||
| l := si.loc | |||
| s := si.slice | |||
| @@ -143,8 +143,8 @@ func toBitmapContainer(c container) container { | |||
| func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) { | |||
| expectedKeys := -1 | |||
| appendedKeys := 0 | |||
| keys := make([]uint16, 0) | |||
| containers := make([]container, 0) | |||
| var keys []uint16 | |||
| var containers []container | |||
| for appendedKeys != expectedKeys { | |||
| select { | |||
| case item := <-resultChan: | |||
| @@ -337,7 +337,7 @@ func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap { | |||
| // (if it is set to 0, a default number of workers is chosen) | |||
| func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { | |||
| var lKey uint16 = MaxUint16 | |||
| var hKey uint16 = 0 | |||
| var hKey uint16 | |||
| bitmapsFiltered := bitmaps[:0] | |||
| for _, b := range bitmaps { | |||
| @@ -1,163 +0,0 @@ | |||
| package roaring | |||
| import ( | |||
| "fmt" | |||
| ) | |||
| // common to rle32.go and rle16.go | |||
| // rleVerbose controls whether p() prints show up. | |||
| // The testing package sets this based on | |||
| // testing.Verbose(). | |||
| var rleVerbose bool | |||
| // p is a shorthand for fmt.Printf with beginning and | |||
| // trailing newlines. p() makes it easy | |||
| // to add diagnostic print statements. | |||
| func p(format string, args ...interface{}) { | |||
| if rleVerbose { | |||
| fmt.Printf("\n"+format+"\n", args...) | |||
| } | |||
| } | |||
| // MaxUint32 is the largest uint32 value. | |||
| const MaxUint32 = 4294967295 | |||
| // MaxUint16 is the largest 16 bit unsigned int. | |||
| // This is the largest value an interval16 can store. | |||
| const MaxUint16 = 65535 | |||
| // searchOptions allows us to accelerate runContainer32.search with | |||
| // prior knowledge of (mostly lower) bounds. This is used by Union | |||
| // and Intersect. | |||
| type searchOptions struct { | |||
| // start here instead of at 0 | |||
| startIndex int64 | |||
| // upper bound instead of len(rc.iv); | |||
| // endxIndex == 0 means ignore the bound and use | |||
| // endxIndex == n ==len(rc.iv) which is also | |||
| // naturally the default for search() | |||
| // when opt = nil. | |||
| endxIndex int64 | |||
| } | |||
| // And finds the intersection of rc and b. | |||
| func (rc *runContainer32) And(b *Bitmap) *Bitmap { | |||
| out := NewBitmap() | |||
| for _, p := range rc.iv { | |||
| for i := p.start; i <= p.last; i++ { | |||
| if b.Contains(i) { | |||
| out.Add(i) | |||
| } | |||
| } | |||
| } | |||
| return out | |||
| } | |||
| // Xor returns the exclusive-or of rc and b. | |||
| func (rc *runContainer32) Xor(b *Bitmap) *Bitmap { | |||
| out := b.Clone() | |||
| for _, p := range rc.iv { | |||
| for v := p.start; v <= p.last; v++ { | |||
| if out.Contains(v) { | |||
| out.RemoveRange(uint64(v), uint64(v+1)) | |||
| } else { | |||
| out.Add(v) | |||
| } | |||
| } | |||
| } | |||
| return out | |||
| } | |||
| // Or returns the union of rc and b. | |||
| func (rc *runContainer32) Or(b *Bitmap) *Bitmap { | |||
| out := b.Clone() | |||
| for _, p := range rc.iv { | |||
| for v := p.start; v <= p.last; v++ { | |||
| out.Add(v) | |||
| } | |||
| } | |||
| return out | |||
| } | |||
| // trial is used in the randomized testing of runContainers | |||
| type trial struct { | |||
| n int | |||
| percentFill float64 | |||
| ntrial int | |||
| // only in the union test | |||
| // only subtract test | |||
| percentDelete float64 | |||
| // only in 067 randomized operations | |||
| // we do this + 1 passes | |||
| numRandomOpsPass int | |||
| // allow sampling range control | |||
| // only recent tests respect this. | |||
| srang *interval16 | |||
| } | |||
| // And finds the intersection of rc and b. | |||
| func (rc *runContainer16) And(b *Bitmap) *Bitmap { | |||
| out := NewBitmap() | |||
| for _, p := range rc.iv { | |||
| plast := p.last() | |||
| for i := p.start; i <= plast; i++ { | |||
| if b.Contains(uint32(i)) { | |||
| out.Add(uint32(i)) | |||
| } | |||
| } | |||
| } | |||
| return out | |||
| } | |||
| // Xor returns the exclusive-or of rc and b. | |||
| func (rc *runContainer16) Xor(b *Bitmap) *Bitmap { | |||
| out := b.Clone() | |||
| for _, p := range rc.iv { | |||
| plast := p.last() | |||
| for v := p.start; v <= plast; v++ { | |||
| w := uint32(v) | |||
| if out.Contains(w) { | |||
| out.RemoveRange(uint64(w), uint64(w+1)) | |||
| } else { | |||
| out.Add(w) | |||
| } | |||
| } | |||
| } | |||
| return out | |||
| } | |||
| // Or returns the union of rc and b. | |||
| func (rc *runContainer16) Or(b *Bitmap) *Bitmap { | |||
| out := b.Clone() | |||
| for _, p := range rc.iv { | |||
| plast := p.last() | |||
| for v := p.start; v <= plast; v++ { | |||
| out.Add(uint32(v)) | |||
| } | |||
| } | |||
| return out | |||
| } | |||
| //func (rc *runContainer32) and(container) container { | |||
| // panic("TODO. not yet implemented") | |||
| //} | |||
| // serializedSizeInBytes returns the number of bytes of memory | |||
| // required by this runContainer16. This is for the | |||
| // Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/ | |||
| func (rc *runContainer16) serializedSizeInBytes() int { | |||
| // number of runs in one uint16, then each run | |||
| // needs two more uint16 | |||
| return 2 + len(rc.iv)*4 | |||
| } | |||
| // serializedSizeInBytes returns the number of bytes of memory | |||
| // required by this runContainer32. | |||
| func (rc *runContainer32) serializedSizeInBytes() int { | |||
| return 4 + len(rc.iv)*8 | |||
| } | |||
| @@ -1,695 +0,0 @@ | |||
| package roaring | |||
| /////////////////////////////////////////////////// | |||
| // | |||
| // container interface methods for runContainer16 | |||
| // | |||
| /////////////////////////////////////////////////// | |||
| import ( | |||
| "fmt" | |||
| ) | |||
| // compile time verify we meet interface requirements | |||
| var _ container = &runContainer16{} | |||
| func (rc *runContainer16) clone() container { | |||
| return newRunContainer16CopyIv(rc.iv) | |||
| } | |||
| func (rc *runContainer16) minimum() uint16 { | |||
| return rc.iv[0].start // assume not empty | |||
| } | |||
| func (rc *runContainer16) maximum() uint16 { | |||
| return rc.iv[len(rc.iv)-1].last() // assume not empty | |||
| } | |||
| func (rc *runContainer16) isFull() bool { | |||
| return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16)) | |||
| } | |||
| func (rc *runContainer16) and(a container) container { | |||
| if rc.isFull() { | |||
| return a.clone() | |||
| } | |||
| switch c := a.(type) { | |||
| case *runContainer16: | |||
| return rc.intersect(c) | |||
| case *arrayContainer: | |||
| return rc.andArray(c) | |||
| case *bitmapContainer: | |||
| return rc.andBitmapContainer(c) | |||
| } | |||
| panic("unsupported container type") | |||
| } | |||
| func (rc *runContainer16) andCardinality(a container) int { | |||
| switch c := a.(type) { | |||
| case *runContainer16: | |||
| return int(rc.intersectCardinality(c)) | |||
| case *arrayContainer: | |||
| return rc.andArrayCardinality(c) | |||
| case *bitmapContainer: | |||
| return rc.andBitmapContainerCardinality(c) | |||
| } | |||
| panic("unsupported container type") | |||
| } | |||
| // andBitmapContainer finds the intersection of rc and b. | |||
| func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container { | |||
| bc2 := newBitmapContainerFromRun(rc) | |||
| return bc2.andBitmap(bc) | |||
| } | |||
| func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int { | |||
| pos := 0 | |||
| answer := 0 | |||
| maxpos := ac.getCardinality() | |||
| if maxpos == 0 { | |||
| return 0 // won't happen in actual code | |||
| } | |||
| v := ac.content[pos] | |||
| mainloop: | |||
| for _, p := range rc.iv { | |||
| for v < p.start { | |||
| pos++ | |||
| if pos == maxpos { | |||
| break mainloop | |||
| } | |||
| v = ac.content[pos] | |||
| } | |||
| for v <= p.last() { | |||
| answer++ | |||
| pos++ | |||
| if pos == maxpos { | |||
| break mainloop | |||
| } | |||
| v = ac.content[pos] | |||
| } | |||
| } | |||
| return answer | |||
| } | |||
| func (rc *runContainer16) iand(a container) container { | |||
| if rc.isFull() { | |||
| return a.clone() | |||
| } | |||
| switch c := a.(type) { | |||
| case *runContainer16: | |||
| return rc.inplaceIntersect(c) | |||
| case *arrayContainer: | |||
| return rc.andArray(c) | |||
| case *bitmapContainer: | |||
| return rc.iandBitmapContainer(c) | |||
| } | |||
| panic("unsupported container type") | |||
| } | |||
| func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container { | |||
| // TODO: optimize by doing less allocation, possibly? | |||
| // sect will be new | |||
| sect := rc.intersect(rc2) | |||
| *rc = *sect | |||
| return rc | |||
| } | |||
| func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container { | |||
| isect := rc.andBitmapContainer(bc) | |||
| *rc = *newRunContainer16FromContainer(isect) | |||
| return rc | |||
| } | |||
| func (rc *runContainer16) andArray(ac *arrayContainer) container { | |||
| if len(rc.iv) == 0 { | |||
| return newArrayContainer() | |||
| } | |||
| acCardinality := ac.getCardinality() | |||
| c := newArrayContainerCapacity(acCardinality) | |||
| for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; { | |||
| iv := rc.iv[rlePos] | |||
| arrayVal := ac.content[arrayPos] | |||
| for iv.last() < arrayVal { | |||
| rlePos++ | |||
| if rlePos == len(rc.iv) { | |||
| return c | |||
| } | |||
| iv = rc.iv[rlePos] | |||
| } | |||
| if iv.start > arrayVal { | |||
| arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start) | |||
| } else { | |||
| c.content = append(c.content, arrayVal) | |||
| arrayPos++ | |||
| } | |||
| } | |||
| return c | |||
| } | |||
| func (rc *runContainer16) andNot(a container) container { | |||
| switch c := a.(type) { | |||
| case *arrayContainer: | |||
| return rc.andNotArray(c) | |||
| case *bitmapContainer: | |||
| return rc.andNotBitmap(c) | |||
| case *runContainer16: | |||
| return rc.andNotRunContainer16(c) | |||
| } | |||
| panic("unsupported container type") | |||
| } | |||
| func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { | |||
| k := 0 | |||
| var val int64 | |||
| for _, p := range rc.iv { | |||
| n := p.runlen() | |||
| for j := int64(0); j < n; j++ { | |||
| val = int64(p.start) + j | |||
| x[k+i] = uint32(val) | mask | |||
| k++ | |||
| } | |||
| } | |||
| } | |||
| func (rc *runContainer16) getShortIterator() shortIterable { | |||
| return rc.newRunIterator16() | |||
| } | |||
| func (rc *runContainer16) getManyIterator() manyIterable { | |||
| return rc.newManyRunIterator16() | |||
| } | |||
| // add the values in the range [firstOfRange, endx). endx | |||
| // is still abe to express 2^16 because it is an int not an uint16. | |||
| func (rc *runContainer16) iaddRange(firstOfRange, endx int) container { | |||
| if firstOfRange >= endx { | |||
| panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx)) | |||
| } | |||
| addme := newRunContainer16TakeOwnership([]interval16{ | |||
| { | |||
| start: uint16(firstOfRange), | |||
| length: uint16(endx - 1 - firstOfRange), | |||
| }, | |||
| }) | |||
| *rc = *rc.union(addme) | |||
| return rc | |||
| } | |||
| // remove the values in the range [firstOfRange,endx) | |||
| func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container { | |||
| if firstOfRange >= endx { | |||
| panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+ | |||
| " nothing to do.", firstOfRange, endx)) | |||
| //return rc | |||
| } | |||
| x := newInterval16Range(uint16(firstOfRange), uint16(endx-1)) | |||
| rc.isubtract(x) | |||
| return rc | |||
| } | |||
| // not flip the values in the range [firstOfRange,endx) | |||
| func (rc *runContainer16) not(firstOfRange, endx int) container { | |||
| if firstOfRange >= endx { | |||
| panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) | |||
| } | |||
| return rc.Not(firstOfRange, endx) | |||
| } | |||
| // Not flips the values in the range [firstOfRange,endx). | |||
| // This is not inplace. Only the returned value has the flipped bits. | |||
| // | |||
| // Currently implemented as (!A intersect B) union (A minus B), | |||
| // where A is rc, and B is the supplied [firstOfRange, endx) interval. | |||
| // | |||
| // TODO(time optimization): convert this to a single pass | |||
| // algorithm by copying AndNotRunContainer16() and modifying it. | |||
| // Current routine is correct but | |||
| // makes 2 more passes through the arrays than should be | |||
| // strictly necessary. Measure both ways though--this may not matter. | |||
| // | |||
| func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 { | |||
| if firstOfRange >= endx { | |||
| panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange)) | |||
| } | |||
| if firstOfRange >= endx { | |||
| return rc.Clone() | |||
| } | |||
| a := rc | |||
| // algo: | |||
| // (!A intersect B) union (A minus B) | |||
| nota := a.invert() | |||
| bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))} | |||
| b := newRunContainer16TakeOwnership(bs) | |||
| notAintersectB := nota.intersect(b) | |||
| aMinusB := a.AndNotRunContainer16(b) | |||
| rc2 := notAintersectB.union(aMinusB) | |||
| return rc2 | |||
| } | |||
| // equals is now logical equals; it does not require the | |||
| // same underlying container type. | |||
| func (rc *runContainer16) equals(o container) bool { | |||
| srb, ok := o.(*runContainer16) | |||
| if !ok { | |||
| // maybe value instead of pointer | |||
| val, valok := o.(*runContainer16) | |||
| if valok { | |||
| srb = val | |||
| ok = true | |||
| } | |||
| } | |||
| if ok { | |||
| // Check if the containers are the same object. | |||
| if rc == srb { | |||
| return true | |||
| } | |||
| if len(srb.iv) != len(rc.iv) { | |||
| return false | |||
| } | |||
| for i, v := range rc.iv { | |||
| if v != srb.iv[i] { | |||
| return false | |||
| } | |||
| } | |||
| return true | |||
| } | |||
| // use generic comparison | |||
| if o.getCardinality() != rc.getCardinality() { | |||
| return false | |||
| } | |||
| rit := rc.getShortIterator() | |||
| bit := o.getShortIterator() | |||
| //k := 0 | |||
| for rit.hasNext() { | |||
| if bit.next() != rit.next() { | |||
| return false | |||
| } | |||
| //k++ | |||
| } | |||
| return true | |||
| } | |||
| func (rc *runContainer16) iaddReturnMinimized(x uint16) container { | |||
| rc.Add(x) | |||
| return rc | |||
| } | |||
| func (rc *runContainer16) iadd(x uint16) (wasNew bool) { | |||
| return rc.Add(x) | |||
| } | |||
| func (rc *runContainer16) iremoveReturnMinimized(x uint16) container { | |||
| rc.removeKey(x) | |||
| return rc | |||
| } | |||
| func (rc *runContainer16) iremove(x uint16) bool { | |||
| return rc.removeKey(x) | |||
| } | |||
| func (rc *runContainer16) or(a container) container { | |||
| if rc.isFull() { | |||
| return rc.clone() | |||
| } | |||
| switch c := a.(type) { | |||
| case *runContainer16: | |||
| return rc.union(c) | |||
| case *arrayContainer: | |||
| return rc.orArray(c) | |||
| case *bitmapContainer: | |||
| return rc.orBitmapContainer(c) | |||
| } | |||
| panic("unsupported container type") | |||
| } | |||
| func (rc *runContainer16) orCardinality(a container) int { | |||
| switch c := a.(type) { | |||
| case *runContainer16: | |||
| return int(rc.unionCardinality(c)) | |||
| case *arrayContainer: | |||
| return rc.orArrayCardinality(c) | |||
| case *bitmapContainer: | |||
| return rc.orBitmapContainerCardinality(c) | |||
| } | |||
| panic("unsupported container type") | |||
| } | |||
| // orBitmapContainer finds the union of rc and bc. | |||
| func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container { | |||
| bc2 := newBitmapContainerFromRun(rc) | |||
| return bc2.iorBitmap(bc) | |||
| } | |||
| func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int { | |||
| answer := 0 | |||
| for i := range rc.iv { | |||
| answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1) | |||
| } | |||
| //bc.computeCardinality() | |||
| return answer | |||
| } | |||
| func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int { | |||
| return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc) | |||
| } | |||
| // orArray finds the union of rc and ac. | |||
| func (rc *runContainer16) orArray(ac *arrayContainer) container { | |||
| bc1 := newBitmapContainerFromRun(rc) | |||
| bc2 := ac.toBitmapContainer() | |||
| return bc1.orBitmap(bc2) | |||
| } | |||
| // orArray finds the union of rc and ac. | |||
| func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int { | |||
| return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac) | |||
| } | |||
| func (rc *runContainer16) ior(a container) container { | |||
| if rc.isFull() { | |||
| return rc | |||
| } | |||
| switch c := a.(type) { | |||
| case *runContainer16: | |||
| return rc.inplaceUnion(c) | |||
| case *arrayContainer: | |||
| return rc.iorArray(c) | |||
| case *bitmapContainer: | |||
| return rc.iorBitmapContainer(c) | |||
| } | |||
| panic("unsupported container type") | |||
| } | |||
| func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container { | |||
| p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv)) | |||
| for _, p := range rc2.iv { | |||
| last := int64(p.last()) | |||
| for i := int64(p.start); i <= last; i++ { | |||
| rc.Add(uint16(i)) | |||
| } | |||
| } | |||
| return rc | |||
| } | |||
| func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container { | |||
| it := bc.getShortIterator() | |||
| for it.hasNext() { | |||
| rc.Add(it.next()) | |||
| } | |||
| return rc | |||
| } | |||
| func (rc *runContainer16) iorArray(ac *arrayContainer) container { | |||
| it := ac.getShortIterator() | |||
| for it.hasNext() { | |||
| rc.Add(it.next()) | |||
| } | |||
| return rc | |||
| } | |||
| // lazyIOR is described (not yet implemented) in | |||
| // this nice note from @lemire on | |||
| // https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737 | |||
| // | |||
| // Description of lazyOR and lazyIOR from @lemire: | |||
| // | |||
| // Lazy functions are optional and can be simply | |||
| // wrapper around non-lazy functions. | |||
| // | |||
| // The idea of "laziness" is as follows. It is | |||
| // inspired by the concept of lazy evaluation | |||
| // you might be familiar with (functional programming | |||
| // and all that). So a roaring bitmap is | |||
| // such that all its containers are, in some | |||
| // sense, chosen to use as little memory as | |||
| // possible. This is nice. Also, all bitsets | |||
| // are "cardinality aware" so that you can do | |||
| // fast rank/select queries, or query the | |||
| // cardinality of the whole bitmap... very fast, | |||
| // without latency. | |||
| // | |||
| // However, imagine that you are aggregating 100 | |||
| // bitmaps together. So you OR the first two, then OR | |||
| // that with the third one and so forth. Clearly, | |||
| // intermediate bitmaps don't need to be as | |||
| // compressed as possible, right? They can be | |||
| // in a "dirty state". You only need the end | |||
| // result to be in a nice state... which you | |||
| // can achieve by calling repairAfterLazy at the end. | |||
| // | |||
| // The Java/C code does something special for | |||
| // the in-place lazy OR runs. The idea is that | |||
| // instead of taking two run containers and | |||
| // generating a new one, we actually try to | |||
| // do the computation in-place through a | |||
| // technique invented by @gssiyankai (pinging him!). | |||
| // What you do is you check whether the host | |||
| // run container has lots of extra capacity. | |||
| // If it does, you move its data at the end of | |||
| // the backing array, and then you write | |||
| // the answer at the beginning. What this | |||
| // trick does is minimize memory allocations. | |||
| // | |||
| func (rc *runContainer16) lazyIOR(a container) container { | |||
| // not lazy at the moment | |||
| // TODO: make it lazy | |||
| return rc.ior(a) | |||
| /* | |||
| switch c := a.(type) { | |||
| case *arrayContainer: | |||
| return rc.lazyIorArray(c) | |||
| case *bitmapContainer: | |||
| return rc.lazyIorBitmap(c) | |||
| case *runContainer16: | |||
| return rc.lazyIorRun16(c) | |||
| } | |||
| panic("unsupported container type") | |||
| */ | |||
| } | |||
| // lazyOR is described above in lazyIOR. | |||
| func (rc *runContainer16) lazyOR(a container) container { | |||
| // not lazy at the moment | |||
| // TODO: make it lazy | |||
| return rc.or(a) | |||
| /* | |||
| switch c := a.(type) { | |||
| case *arrayContainer: | |||
| return rc.lazyOrArray(c) | |||
| case *bitmapContainer: | |||
| return rc.lazyOrBitmap(c) | |||
| case *runContainer16: | |||
| return rc.lazyOrRunContainer16(c) | |||
| } | |||
| panic("unsupported container type") | |||
| */ | |||
| } | |||
| func (rc *runContainer16) intersects(a container) bool { | |||
| // TODO: optimize by doing inplace/less allocation, possibly? | |||
| isect := rc.and(a) | |||
| return isect.getCardinality() > 0 | |||
| } | |||
| func (rc *runContainer16) xor(a container) container { | |||
| switch c := a.(type) { | |||
| case *arrayContainer: | |||
| return rc.xorArray(c) | |||
| case *bitmapContainer: | |||
| return rc.xorBitmap(c) | |||
| case *runContainer16: | |||
| return rc.xorRunContainer16(c) | |||
| } | |||
| panic("unsupported container type") | |||
| } | |||
| func (rc *runContainer16) iandNot(a container) container { | |||
| switch c := a.(type) { | |||
| case *arrayContainer: | |||
| return rc.iandNotArray(c) | |||
| case *bitmapContainer: | |||
| return rc.iandNotBitmap(c) | |||
| case *runContainer16: | |||
| return rc.iandNotRunContainer16(c) | |||
| } | |||
| panic("unsupported container type") | |||
| } | |||
| // flip the values in the range [firstOfRange,endx) | |||
| func (rc *runContainer16) inot(firstOfRange, endx int) container { | |||
| if firstOfRange >= endx { | |||
| panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) | |||
| } | |||
| // TODO: minimize copies, do it all inplace; not() makes a copy. | |||
| rc = rc.Not(firstOfRange, endx) | |||
| return rc | |||
| } | |||
| func (rc *runContainer16) getCardinality() int { | |||
| return int(rc.cardinality()) | |||
| } | |||
| func (rc *runContainer16) rank(x uint16) int { | |||
| n := int64(len(rc.iv)) | |||
| xx := int64(x) | |||
| w, already, _ := rc.search(xx, nil) | |||
| if w < 0 { | |||
| return 0 | |||
| } | |||
| if !already && w == n-1 { | |||
| return rc.getCardinality() | |||
| } | |||
| var rnk int64 | |||
| if !already { | |||
| for i := int64(0); i <= w; i++ { | |||
| rnk += rc.iv[i].runlen() | |||
| } | |||
| return int(rnk) | |||
| } | |||
| for i := int64(0); i < w; i++ { | |||
| rnk += rc.iv[i].runlen() | |||
| } | |||
| rnk += int64(x-rc.iv[w].start) + 1 | |||
| return int(rnk) | |||
| } | |||
| func (rc *runContainer16) selectInt(x uint16) int { | |||
| return rc.selectInt16(x) | |||
| } | |||
| func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container { | |||
| return rc.AndNotRunContainer16(b) | |||
| } | |||
| func (rc *runContainer16) andNotArray(ac *arrayContainer) container { | |||
| rcb := rc.toBitmapContainer() | |||
| acb := ac.toBitmapContainer() | |||
| return rcb.andNotBitmap(acb) | |||
| } | |||
| func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container { | |||
| rcb := rc.toBitmapContainer() | |||
| return rcb.andNotBitmap(bc) | |||
| } | |||
| func (rc *runContainer16) toBitmapContainer() *bitmapContainer { | |||
| p("run16 toBitmap starting; rc has %v ranges", len(rc.iv)) | |||
| bc := newBitmapContainer() | |||
| for i := range rc.iv { | |||
| bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) | |||
| } | |||
| bc.computeCardinality() | |||
| return bc | |||
| } | |||
| func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container { | |||
| rcb := rc.toBitmapContainer() | |||
| x2b := x2.toBitmapContainer() | |||
| rcb.iandNotBitmapSurely(x2b) | |||
| // TODO: check size and optimize the return value | |||
| // TODO: is inplace modification really required? If not, elide the copy. | |||
| rc2 := newRunContainer16FromBitmapContainer(rcb) | |||
| *rc = *rc2 | |||
| return rc | |||
| } | |||
| func (rc *runContainer16) iandNotArray(ac *arrayContainer) container { | |||
| rcb := rc.toBitmapContainer() | |||
| acb := ac.toBitmapContainer() | |||
| rcb.iandNotBitmapSurely(acb) | |||
| // TODO: check size and optimize the return value | |||
| // TODO: is inplace modification really required? If not, elide the copy. | |||
| rc2 := newRunContainer16FromBitmapContainer(rcb) | |||
| *rc = *rc2 | |||
| return rc | |||
| } | |||
| func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container { | |||
| rcb := rc.toBitmapContainer() | |||
| rcb.iandNotBitmapSurely(bc) | |||
| // TODO: check size and optimize the return value | |||
| // TODO: is inplace modification really required? If not, elide the copy. | |||
| rc2 := newRunContainer16FromBitmapContainer(rcb) | |||
| *rc = *rc2 | |||
| return rc | |||
| } | |||
| func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container { | |||
| rcb := rc.toBitmapContainer() | |||
| x2b := x2.toBitmapContainer() | |||
| return rcb.xorBitmap(x2b) | |||
| } | |||
| func (rc *runContainer16) xorArray(ac *arrayContainer) container { | |||
| rcb := rc.toBitmapContainer() | |||
| acb := ac.toBitmapContainer() | |||
| return rcb.xorBitmap(acb) | |||
| } | |||
| func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container { | |||
| rcb := rc.toBitmapContainer() | |||
| return rcb.xorBitmap(bc) | |||
| } | |||
| // convert to bitmap or array *if needed* | |||
| func (rc *runContainer16) toEfficientContainer() container { | |||
| // runContainer16SerializedSizeInBytes(numRuns) | |||
| sizeAsRunContainer := rc.getSizeInBytes() | |||
| sizeAsBitmapContainer := bitmapContainerSizeInBytes() | |||
| card := int(rc.cardinality()) | |||
| sizeAsArrayContainer := arrayContainerSizeInBytes(card) | |||
| if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { | |||
| return rc | |||
| } | |||
| if card <= arrayDefaultMaxSize { | |||
| return rc.toArrayContainer() | |||
| } | |||
| bc := newBitmapContainerFromRun(rc) | |||
| return bc | |||
| } | |||
| func (rc *runContainer16) toArrayContainer() *arrayContainer { | |||
| ac := newArrayContainer() | |||
| for i := range rc.iv { | |||
| ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) | |||
| } | |||
| return ac | |||
| } | |||
| func newRunContainer16FromContainer(c container) *runContainer16 { | |||
| switch x := c.(type) { | |||
| case *runContainer16: | |||
| return x.Clone() | |||
| case *arrayContainer: | |||
| return newRunContainer16FromArray(x) | |||
| case *bitmapContainer: | |||
| return newRunContainer16FromBitmapContainer(x) | |||
| } | |||
| panic("unsupported container type") | |||
| } | |||
| @@ -6,12 +6,12 @@ | |||
| package roaring | |||
| import ( | |||
| "bufio" | |||
| "bytes" | |||
| "encoding/base64" | |||
| "fmt" | |||
| "io" | |||
| "strconv" | |||
| "sync" | |||
| ) | |||
| // Bitmap represents a compressed bitmap where you can add integers. | |||
| @@ -52,7 +52,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) { | |||
| return rb.highlowcontainer.toBytes() | |||
| } | |||
| // WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized | |||
| // Deprecated: WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized | |||
| // version of this bitmap to stream. The format is not | |||
| // compatible with the WriteTo() format, and is | |||
| // experimental: it may produce smaller on disk | |||
| @@ -67,8 +67,14 @@ func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) { | |||
| // The format is compatible with other RoaringBitmap | |||
| // implementations (Java, C) and is documented here: | |||
| // https://github.com/RoaringBitmap/RoaringFormatSpec | |||
| func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) { | |||
| return rb.highlowcontainer.readFrom(stream) | |||
| func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) { | |||
| stream := byteInputAdapterPool.Get().(*byteInputAdapter) | |||
| stream.reset(reader) | |||
| p, err = rb.highlowcontainer.readFrom(stream) | |||
| byteInputAdapterPool.Put(stream) | |||
| return | |||
| } | |||
| // FromBuffer creates a bitmap from its serialized version stored in buffer | |||
| @@ -87,10 +93,36 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) { | |||
| // You should *not* change the copy-on-write status of the resulting | |||
| // bitmaps (SetCopyOnWrite). | |||
| // | |||
| func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) { | |||
| return rb.highlowcontainer.fromBuffer(buf) | |||
| // If buf becomes unavailable, then a bitmap created with | |||
| // FromBuffer would be effectively broken. Furthermore, any | |||
| // bitmap derived from this bitmap (e.g., via Or, And) might | |||
| // also be broken. Thus, before making buf unavailable, you should | |||
| // call CloneCopyOnWriteContainers on all such bitmaps. | |||
| // | |||
| func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) { | |||
| stream := byteBufferPool.Get().(*byteBuffer) | |||
| stream.reset(buf) | |||
| p, err = rb.highlowcontainer.readFrom(stream) | |||
| byteBufferPool.Put(stream) | |||
| return | |||
| } | |||
| var ( | |||
| byteBufferPool = sync.Pool{ | |||
| New: func() interface{} { | |||
| return &byteBuffer{} | |||
| }, | |||
| } | |||
| byteInputAdapterPool = sync.Pool{ | |||
| New: func() interface{} { | |||
| return &byteInputAdapter{} | |||
| }, | |||
| } | |||
| ) | |||
| // RunOptimize attempts to further compress the runs of consecutive values found in the bitmap | |||
| func (rb *Bitmap) RunOptimize() { | |||
| rb.highlowcontainer.runOptimize() | |||
| @@ -101,7 +133,7 @@ func (rb *Bitmap) HasRunCompression() bool { | |||
| return rb.highlowcontainer.hasRunCompression() | |||
| } | |||
| // ReadFromMsgpack reads a msgpack2/snappy-streaming serialized | |||
| // Deprecated: ReadFromMsgpack reads a msgpack2/snappy-streaming serialized | |||
| // version of this bitmap from stream. The format is | |||
| // expected is that written by the WriteToMsgpack() | |||
| // call; see additional notes there. | |||
| @@ -110,29 +142,15 @@ func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) { | |||
| } | |||
| // MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap | |||
| // (same as ToBytes) | |||
| func (rb *Bitmap) MarshalBinary() ([]byte, error) { | |||
| var buf bytes.Buffer | |||
| writer := bufio.NewWriter(&buf) | |||
| _, err := rb.WriteTo(writer) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| err = writer.Flush() | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| return buf.Bytes(), nil | |||
| return rb.ToBytes() | |||
| } | |||
| // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap | |||
| func (rb *Bitmap) UnmarshalBinary(data []byte) error { | |||
| var buf bytes.Buffer | |||
| _, err := buf.Write(data) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| reader := bufio.NewReader(&buf) | |||
| _, err = rb.ReadFrom(reader) | |||
| r := bytes.NewReader(data) | |||
| _, err := rb.ReadFrom(r) | |||
| return err | |||
| } | |||
| @@ -215,10 +233,20 @@ type IntIterable interface { | |||
| Next() uint32 | |||
| } | |||
| // IntPeekable allows you to look at the next value without advancing and | |||
| // advance as long as the next value is smaller than minval | |||
| type IntPeekable interface { | |||
| IntIterable | |||
| // PeekNext peeks the next value without advancing the iterator | |||
| PeekNext() uint32 | |||
| // AdvanceIfNeeded advances as long as the next value is smaller than minval | |||
| AdvanceIfNeeded(minval uint32) | |||
| } | |||
| type intIterator struct { | |||
| pos int | |||
| hs uint32 | |||
| iter shortIterable | |||
| iter shortPeekable | |||
| highlowcontainer *roaringArray | |||
| } | |||
| @@ -244,6 +272,30 @@ func (ii *intIterator) Next() uint32 { | |||
| return x | |||
| } | |||
| // PeekNext peeks the next value without advancing the iterator | |||
| func (ii *intIterator) PeekNext() uint32 { | |||
| return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs | |||
| } | |||
| // AdvanceIfNeeded advances as long as the next value is smaller than minval | |||
| func (ii *intIterator) AdvanceIfNeeded(minval uint32) { | |||
| to := minval >> 16 | |||
| for ii.HasNext() && (ii.hs>>16) < to { | |||
| ii.pos++ | |||
| ii.init() | |||
| } | |||
| if ii.HasNext() && (ii.hs>>16) == to { | |||
| ii.iter.advanceIfNeeded(lowbits(minval)) | |||
| if !ii.iter.hasNext() { | |||
| ii.pos++ | |||
| ii.init() | |||
| } | |||
| } | |||
| } | |||
| func newIntIterator(a *Bitmap) *intIterator { | |||
| p := new(intIterator) | |||
| p.pos = 0 | |||
| @@ -252,6 +304,45 @@ func newIntIterator(a *Bitmap) *intIterator { | |||
| return p | |||
| } | |||
| type intReverseIterator struct { | |||
| pos int | |||
| hs uint32 | |||
| iter shortIterable | |||
| highlowcontainer *roaringArray | |||
| } | |||
| // HasNext returns true if there are more integers to iterate over | |||
| func (ii *intReverseIterator) HasNext() bool { | |||
| return ii.pos >= 0 | |||
| } | |||
| func (ii *intReverseIterator) init() { | |||
| if ii.pos >= 0 { | |||
| ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator() | |||
| ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 | |||
| } else { | |||
| ii.iter = nil | |||
| } | |||
| } | |||
| // Next returns the next integer | |||
| func (ii *intReverseIterator) Next() uint32 { | |||
| x := uint32(ii.iter.next()) | ii.hs | |||
| if !ii.iter.hasNext() { | |||
| ii.pos = ii.pos - 1 | |||
| ii.init() | |||
| } | |||
| return x | |||
| } | |||
| func newIntReverseIterator(a *Bitmap) *intReverseIterator { | |||
| p := new(intReverseIterator) | |||
| p.highlowcontainer = &a.highlowcontainer | |||
| p.pos = a.highlowcontainer.size() - 1 | |||
| p.init() | |||
| return p | |||
| } | |||
| // ManyIntIterable allows you to iterate over the values in a Bitmap | |||
| type ManyIntIterable interface { | |||
| // pass in a buffer to fill up with values, returns how many values were returned | |||
| @@ -325,12 +416,20 @@ func (rb *Bitmap) String() string { | |||
| return buffer.String() | |||
| } | |||
| // Iterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order | |||
| func (rb *Bitmap) Iterator() IntIterable { | |||
| // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; | |||
| // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). | |||
| func (rb *Bitmap) Iterator() IntPeekable { | |||
| return newIntIterator(rb) | |||
| } | |||
| // Iterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order | |||
| // ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order; | |||
| // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). | |||
| func (rb *Bitmap) ReverseIterator() IntIterable { | |||
| return newIntReverseIterator(rb) | |||
| } | |||
| // ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order; | |||
| // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). | |||
| func (rb *Bitmap) ManyIterator() ManyIntIterable { | |||
| return newManyIntIterator(rb) | |||
| } | |||
| @@ -374,6 +473,46 @@ func (rb *Bitmap) Equals(o interface{}) bool { | |||
| return false | |||
| } | |||
| // AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process | |||
| func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) { | |||
| containerOffset := highbits(offset) | |||
| inOffset := lowbits(offset) | |||
| if inOffset == 0 { | |||
| answer = x.Clone() | |||
| for pos := 0; pos < answer.highlowcontainer.size(); pos++ { | |||
| key := answer.highlowcontainer.getKeyAtIndex(pos) | |||
| key += containerOffset | |||
| answer.highlowcontainer.keys[pos] = key | |||
| } | |||
| } else { | |||
| answer = New() | |||
| for pos := 0; pos < x.highlowcontainer.size(); pos++ { | |||
| key := x.highlowcontainer.getKeyAtIndex(pos) | |||
| key += containerOffset | |||
| c := x.highlowcontainer.getContainerAtIndex(pos) | |||
| offsetted := c.addOffset(inOffset) | |||
| if offsetted[0].getCardinality() > 0 { | |||
| curSize := answer.highlowcontainer.size() | |||
| lastkey := uint16(0) | |||
| if curSize > 0 { | |||
| lastkey = answer.highlowcontainer.getKeyAtIndex(curSize - 1) | |||
| } | |||
| if curSize > 0 && lastkey == key { | |||
| prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1) | |||
| orrseult := prev.ior(offsetted[0]) | |||
| answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult) | |||
| } else { | |||
| answer.highlowcontainer.appendContainer(key, offsetted[0], false) | |||
| } | |||
| } | |||
| if offsetted[1].getCardinality() > 0 { | |||
| answer.highlowcontainer.appendContainer(key+1, offsetted[1], false) | |||
| } | |||
| } | |||
| } | |||
| return answer | |||
| } | |||
| // Add the integer x to the bitmap | |||
| func (rb *Bitmap) Add(x uint32) { | |||
| hb := highbits(x) | |||
| @@ -794,11 +933,6 @@ main: | |||
| } | |||
| } | |||
| /*func (rb *Bitmap) Or(x2 *Bitmap) { | |||
| results := Or(rb, x2) // Todo: could be computed in-place for reduced memory usage | |||
| rb.highlowcontainer = results.highlowcontainer | |||
| }*/ | |||
| // AndNot computes the difference between two bitmaps and stores the result in the current bitmap | |||
| func (rb *Bitmap) AndNot(x2 *Bitmap) { | |||
| pos1 := 0 | |||
| @@ -1086,10 +1220,10 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { | |||
| return | |||
| } | |||
| hbStart := highbits(uint32(rangeStart)) | |||
| lbStart := lowbits(uint32(rangeStart)) | |||
| hbLast := highbits(uint32(rangeEnd - 1)) | |||
| lbLast := lowbits(uint32(rangeEnd - 1)) | |||
| hbStart := uint32(highbits(uint32(rangeStart))) | |||
| lbStart := uint32(lowbits(uint32(rangeStart))) | |||
| hbLast := uint32(highbits(uint32(rangeEnd - 1))) | |||
| lbLast := uint32(lowbits(uint32(rangeEnd - 1))) | |||
| var max uint32 = maxLowBit | |||
| for hb := hbStart; hb <= hbLast; hb++ { | |||
| @@ -1102,7 +1236,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { | |||
| containerLast = uint32(lbLast) | |||
| } | |||
| i := rb.highlowcontainer.getIndex(hb) | |||
| i := rb.highlowcontainer.getIndex(uint16(hb)) | |||
| if i >= 0 { | |||
| c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1) | |||
| @@ -1113,7 +1247,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { | |||
| } | |||
| } else { // *think* the range of ones must never be | |||
| // empty. | |||
| rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast))) | |||
| rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) | |||
| } | |||
| } | |||
| } | |||
| @@ -1139,24 +1273,24 @@ func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) { | |||
| lbLast := uint32(lowbits(uint32(rangeEnd - 1))) | |||
| var max uint32 = maxLowBit | |||
| for hb := uint16(hbStart); hb <= uint16(hbLast); hb++ { | |||
| for hb := hbStart; hb <= hbLast; hb++ { | |||
| containerStart := uint32(0) | |||
| if hb == uint16(hbStart) { | |||
| if hb == hbStart { | |||
| containerStart = lbStart | |||
| } | |||
| containerLast := max | |||
| if hb == uint16(hbLast) { | |||
| if hb == hbLast { | |||
| containerLast = lbLast | |||
| } | |||
| i := rb.highlowcontainer.getIndex(hb) | |||
| i := rb.highlowcontainer.getIndex(uint16(hb)) | |||
| if i >= 0 { | |||
| c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1) | |||
| rb.highlowcontainer.setContainerAtIndex(i, c) | |||
| } else { // *think* the range of ones must never be | |||
| // empty. | |||
| rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast))) | |||
| rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) | |||
| } | |||
| } | |||
| } | |||
| @@ -1243,13 +1377,13 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap { | |||
| } | |||
| answer := NewBitmap() | |||
| hbStart := highbits(uint32(rangeStart)) | |||
| lbStart := lowbits(uint32(rangeStart)) | |||
| hbLast := highbits(uint32(rangeEnd - 1)) | |||
| lbLast := lowbits(uint32(rangeEnd - 1)) | |||
| hbStart := uint32(highbits(uint32(rangeStart))) | |||
| lbStart := uint32(lowbits(uint32(rangeStart))) | |||
| hbLast := uint32(highbits(uint32(rangeEnd - 1))) | |||
| lbLast := uint32(lowbits(uint32(rangeEnd - 1))) | |||
| // copy the containers before the active area | |||
| answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, hbStart) | |||
| answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart)) | |||
| var max uint32 = maxLowBit | |||
| for hb := hbStart; hb <= hbLast; hb++ { | |||
| @@ -1262,23 +1396,23 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap { | |||
| containerLast = uint32(lbLast) | |||
| } | |||
| i := bm.highlowcontainer.getIndex(hb) | |||
| j := answer.highlowcontainer.getIndex(hb) | |||
| i := bm.highlowcontainer.getIndex(uint16(hb)) | |||
| j := answer.highlowcontainer.getIndex(uint16(hb)) | |||
| if i >= 0 { | |||
| c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1) | |||
| if c.getCardinality() > 0 { | |||
| answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, c) | |||
| answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c) | |||
| } | |||
| } else { // *think* the range of ones must never be | |||
| // empty. | |||
| answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, | |||
| answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), | |||
| rangeOfOnes(int(containerStart), int(containerLast))) | |||
| } | |||
| } | |||
| // copy the containers after the active area. | |||
| answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, hbLast) | |||
| answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast)) | |||
| return answer | |||
| } | |||
| @@ -1296,6 +1430,21 @@ func (rb *Bitmap) GetCopyOnWrite() (val bool) { | |||
| return rb.highlowcontainer.copyOnWrite | |||
| } | |||
| // CloneCopyOnWriteContainers clones all containers which have | |||
| // needCopyOnWrite set to true. | |||
| // This can be used to make sure it is safe to munmap a []byte | |||
| // that the roaring array may still have a reference to, after | |||
| // calling FromBuffer. | |||
| // More generally this function is useful if you call FromBuffer | |||
| // to construct a bitmap with a backing array buf | |||
| // and then later discard the buf array. Note that you should call | |||
| // CloneCopyOnWriteContainers on all bitmaps that were derived | |||
| // from the 'FromBuffer' bitmap since they map have dependencies | |||
| // on the buf array as well. | |||
| func (rb *Bitmap) CloneCopyOnWriteContainers() { | |||
| rb.highlowcontainer.cloneCopyOnWriteContainers() | |||
| } | |||
| // FlipInt calls Flip after casting the parameters (convenience method) | |||
| func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap { | |||
| return Flip(bm, uint64(rangeStart), uint64(rangeEnd)) | |||
| @@ -4,16 +4,16 @@ import ( | |||
| "bytes" | |||
| "encoding/binary" | |||
| "fmt" | |||
| "io" | |||
| "io/ioutil" | |||
| snappy "github.com/glycerine/go-unsnap-stream" | |||
| "github.com/tinylib/msgp/msgp" | |||
| "io" | |||
| ) | |||
| //go:generate msgp -unexported | |||
| type container interface { | |||
| addOffset(uint16) []container | |||
| clone() container | |||
| and(container) container | |||
| andCardinality(container) int | |||
| @@ -37,7 +37,8 @@ type container interface { | |||
| not(start, final int) container // range is [firstOfRange,lastOfRange) | |||
| inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx) | |||
| xor(r container) container | |||
| getShortIterator() shortIterable | |||
| getShortIterator() shortPeekable | |||
| getReverseIterator() shortIterable | |||
| getManyIterator() manyIterable | |||
| contains(i uint16) bool | |||
| maximum() uint16 | |||
| @@ -61,7 +62,6 @@ type container interface { | |||
| iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange) | |||
| selectInt(x uint16) int // selectInt returns the xth integer in the container | |||
| serializedSizeInBytes() int | |||
| readFrom(io.Reader) (int, error) | |||
| writeTo(io.Writer) (int, error) | |||
| numberOfRuns() int | |||
| @@ -280,6 +280,18 @@ func (ra *roaringArray) clone() *roaringArray { | |||
| return &sa | |||
| } | |||
| // clone all containers which have needCopyOnWrite set to true | |||
| // This can be used to make sure it is safe to munmap a []byte | |||
| // that the roaring array may still have a reference to. | |||
| func (ra *roaringArray) cloneCopyOnWriteContainers() { | |||
| for i, needCopyOnWrite := range ra.needCopyOnWrite { | |||
| if needCopyOnWrite { | |||
| ra.containers[i] = ra.containers[i].clone() | |||
| ra.needCopyOnWrite[i] = false | |||
| } | |||
| } | |||
| } | |||
| // unused function: | |||
| //func (ra *roaringArray) containsKey(x uint16) bool { | |||
| // return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0) | |||
| @@ -456,8 +468,7 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 { | |||
| // | |||
| // spec: https://github.com/RoaringBitmap/RoaringFormatSpec | |||
| // | |||
| func (ra *roaringArray) toBytes() ([]byte, error) { | |||
| stream := &bytes.Buffer{} | |||
| func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) { | |||
| hasRun := ra.hasRunCompression() | |||
| isRunSizeInBytes := 0 | |||
| cookieSize := 8 | |||
| @@ -522,79 +533,77 @@ func (ra *roaringArray) toBytes() ([]byte, error) { | |||
| } | |||
| } | |||
| _, err := stream.Write(buf[:nw]) | |||
| written, err := w.Write(buf[:nw]) | |||
| if err != nil { | |||
| return nil, err | |||
| return n, err | |||
| } | |||
| for i, c := range ra.containers { | |||
| _ = i | |||
| _, err := c.writeTo(stream) | |||
| n += int64(written) | |||
| for _, c := range ra.containers { | |||
| written, err := c.writeTo(w) | |||
| if err != nil { | |||
| return nil, err | |||
| return n, err | |||
| } | |||
| n += int64(written) | |||
| } | |||
| return stream.Bytes(), nil | |||
| return n, nil | |||
| } | |||
| // | |||
| // spec: https://github.com/RoaringBitmap/RoaringFormatSpec | |||
| // | |||
| func (ra *roaringArray) writeTo(out io.Writer) (int64, error) { | |||
| by, err := ra.toBytes() | |||
| if err != nil { | |||
| return 0, err | |||
| } | |||
| n, err := out.Write(by) | |||
| if err == nil && n < len(by) { | |||
| err = io.ErrShortWrite | |||
| } | |||
| return int64(n), err | |||
| func (ra *roaringArray) toBytes() ([]byte, error) { | |||
| var buf bytes.Buffer | |||
| _, err := ra.writeTo(&buf) | |||
| return buf.Bytes(), err | |||
| } | |||
| func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { | |||
| pos := 0 | |||
| if len(buf) < 8 { | |||
| return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf)) | |||
| func (ra *roaringArray) readFrom(stream byteInput) (int64, error) { | |||
| cookie, err := stream.readUInt32() | |||
| if err != nil { | |||
| return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) | |||
| } | |||
| cookie := binary.LittleEndian.Uint32(buf) | |||
| pos += 4 | |||
| var size uint32 // number of containers | |||
| haveRunContainers := false | |||
| var size uint32 | |||
| var isRunBitmap []byte | |||
| // cookie header | |||
| if cookie&0x0000FFFF == serialCookie { | |||
| haveRunContainers = true | |||
| size = uint32(uint16(cookie>>16) + 1) // number of containers | |||
| size = uint32(uint16(cookie>>16) + 1) | |||
| // create is-run-container bitmap | |||
| isRunBitmapSize := (int(size) + 7) / 8 | |||
| if pos+isRunBitmapSize > len(buf) { | |||
| return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize) | |||
| } | |||
| isRunBitmap, err = stream.next(isRunBitmapSize) | |||
| isRunBitmap = buf[pos : pos+isRunBitmapSize] | |||
| pos += isRunBitmapSize | |||
| if err != nil { | |||
| return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err) | |||
| } | |||
| } else if cookie == serialCookieNoRunContainer { | |||
| size = binary.LittleEndian.Uint32(buf[pos:]) | |||
| pos += 4 | |||
| size, err = stream.readUInt32() | |||
| if err != nil { | |||
| return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err) | |||
| } | |||
| } else { | |||
| return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") | |||
| return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") | |||
| } | |||
| if size > (1 << 16) { | |||
| return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.") | |||
| return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers") | |||
| } | |||
| // descriptive header | |||
| // keycard - is {key, cardinality} tuple slice | |||
| if pos+2*2*int(size) > len(buf) { | |||
| return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size)) | |||
| buf, err := stream.next(2 * 2 * int(size)) | |||
| if err != nil { | |||
| return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err) | |||
| } | |||
| keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)]) | |||
| pos += 2 * 2 * int(size) | |||
| if !haveRunContainers || size >= noOffsetThreshold { | |||
| pos += 4 * int(size) | |||
| keycard := byteSliceAsUint16Slice(buf) | |||
| if isRunBitmap == nil || size >= noOffsetThreshold { | |||
| if err := stream.skipBytes(int(size) * 4); err != nil { | |||
| return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err) | |||
| } | |||
| } | |||
| // Allocate slices upfront as number of containers is known | |||
| @@ -603,11 +612,13 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { | |||
| } else { | |||
| ra.containers = make([]container, size) | |||
| } | |||
| if cap(ra.keys) >= int(size) { | |||
| ra.keys = ra.keys[:size] | |||
| } else { | |||
| ra.keys = make([]uint16, size) | |||
| } | |||
| if cap(ra.needCopyOnWrite) >= int(size) { | |||
| ra.needCopyOnWrite = ra.needCopyOnWrite[:size] | |||
| } else { | |||
| @@ -615,129 +626,62 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { | |||
| } | |||
| for i := uint32(0); i < size; i++ { | |||
| key := uint16(keycard[2*i]) | |||
| key := keycard[2*i] | |||
| card := int(keycard[2*i+1]) + 1 | |||
| ra.keys[i] = key | |||
| ra.needCopyOnWrite[i] = true | |||
| if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 { | |||
| if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 { | |||
| // run container | |||
| nr := binary.LittleEndian.Uint16(buf[pos:]) | |||
| pos += 2 | |||
| if pos+int(nr)*4 > len(buf) { | |||
| return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4) | |||
| nr, err := stream.readUInt16() | |||
| if err != nil { | |||
| return 0, fmt.Errorf("failed to read runtime container size: %s", err) | |||
| } | |||
| buf, err := stream.next(int(nr) * 4) | |||
| if err != nil { | |||
| return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err) | |||
| } | |||
| nb := runContainer16{ | |||
| iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]), | |||
| iv: byteSliceAsInterval16Slice(buf), | |||
| card: int64(card), | |||
| } | |||
| pos += int(nr) * 4 | |||
| ra.containers[i] = &nb | |||
| } else if card > arrayDefaultMaxSize { | |||
| // bitmap container | |||
| buf, err := stream.next(arrayDefaultMaxSize * 2) | |||
| if err != nil { | |||
| return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err) | |||
| } | |||
| nb := bitmapContainer{ | |||
| cardinality: card, | |||
| bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]), | |||
| bitmap: byteSliceAsUint64Slice(buf), | |||
| } | |||
| pos += arrayDefaultMaxSize * 2 | |||
| ra.containers[i] = &nb | |||
| } else { | |||
| // array container | |||
| nb := arrayContainer{ | |||
| byteSliceAsUint16Slice(buf[pos : pos+card*2]), | |||
| } | |||
| pos += card * 2 | |||
| ra.containers[i] = &nb | |||
| } | |||
| } | |||
| return int64(pos), nil | |||
| } | |||
| buf, err := stream.next(card * 2) | |||
| func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) { | |||
| pos := 0 | |||
| var cookie uint32 | |||
| err := binary.Read(stream, binary.LittleEndian, &cookie) | |||
| if err != nil { | |||
| return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) | |||
| } | |||
| pos += 4 | |||
| var size uint32 | |||
| haveRunContainers := false | |||
| var isRun *bitmapContainer | |||
| if cookie&0x0000FFFF == serialCookie { | |||
| haveRunContainers = true | |||
| size = uint32(uint16(cookie>>16) + 1) | |||
| bytesToRead := (int(size) + 7) / 8 | |||
| numwords := (bytesToRead + 7) / 8 | |||
| by := make([]byte, bytesToRead, numwords*8) | |||
| nr, err := io.ReadFull(stream, by) | |||
| if err != nil { | |||
| return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+ | |||
| "runContainer bit flags of length %v bytes: %v", bytesToRead, err) | |||
| } | |||
| pos += bytesToRead | |||
| by = by[:cap(by)] | |||
| isRun = newBitmapContainer() | |||
| for i := 0; i < numwords; i++ { | |||
| isRun.bitmap[i] = binary.LittleEndian.Uint64(by) | |||
| by = by[8:] | |||
| } | |||
| } else if cookie == serialCookieNoRunContainer { | |||
| err = binary.Read(stream, binary.LittleEndian, &size) | |||
| if err != nil { | |||
| return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err) | |||
| } | |||
| pos += 4 | |||
| } else { | |||
| return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") | |||
| } | |||
| if size > (1 << 16) { | |||
| return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.") | |||
| } | |||
| // descriptive header | |||
| keycard := make([]uint16, 2*size, 2*size) | |||
| err = binary.Read(stream, binary.LittleEndian, keycard) | |||
| if err != nil { | |||
| return 0, err | |||
| } | |||
| pos += 2 * 2 * int(size) | |||
| // offset header | |||
| if !haveRunContainers || size >= noOffsetThreshold { | |||
| io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored | |||
| pos += 4 * int(size) | |||
| } | |||
| for i := uint32(0); i < size; i++ { | |||
| key := int(keycard[2*i]) | |||
| card := int(keycard[2*i+1]) + 1 | |||
| if haveRunContainers && isRun.contains(uint16(i)) { | |||
| nb := newRunContainer16() | |||
| nr, err := nb.readFrom(stream) | |||
| if err != nil { | |||
| return 0, err | |||
| return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err) | |||
| } | |||
| pos += nr | |||
| ra.appendContainer(uint16(key), nb, false) | |||
| } else if card > arrayDefaultMaxSize { | |||
| nb := newBitmapContainer() | |||
| nr, err := nb.readFrom(stream) | |||
| if err != nil { | |||
| return 0, err | |||
| } | |||
| nb.cardinality = card | |||
| pos += nr | |||
| ra.appendContainer(keycard[2*i], nb, false) | |||
| } else { | |||
| nb := newArrayContainerSize(card) | |||
| nr, err := nb.readFrom(stream) | |||
| if err != nil { | |||
| return 0, err | |||
| nb := arrayContainer{ | |||
| byteSliceAsUint16Slice(buf), | |||
| } | |||
| pos += nr | |||
| ra.appendContainer(keycard[2*i], nb, false) | |||
| ra.containers[i] = &nb | |||
| } | |||
| } | |||
| return int64(pos), nil | |||
| return stream.getReadBytes(), nil | |||
| } | |||
| func (ra *roaringArray) hasRunCompression() bool { | |||
| @@ -8,7 +8,7 @@ import ( | |||
| "github.com/tinylib/msgp/msgp" | |||
| ) | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -48,7 +48,7 @@ func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) { | |||
| // map header, size 2 | |||
| // write "t" | |||
| @@ -72,7 +72,7 @@ func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) { | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| // map header, size 2 | |||
| @@ -88,7 +88,7 @@ func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -129,13 +129,13 @@ func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z *containerSerz) Msgsize() (s int) { | |||
| s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize() | |||
| return | |||
| } | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| { | |||
| var zajw uint8 | |||
| @@ -148,7 +148,7 @@ func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z contype) EncodeMsg(en *msgp.Writer) (err error) { | |||
| err = en.WriteUint8(uint8(z)) | |||
| if err != nil { | |||
| @@ -157,14 +157,14 @@ func (z contype) EncodeMsg(en *msgp.Writer) (err error) { | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z contype) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| o = msgp.AppendUint8(o, uint8(z)) | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| { | |||
| var zwht uint8 | |||
| @@ -178,13 +178,13 @@ func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z contype) Msgsize() (s int) { | |||
| s = msgp.Uint8Size | |||
| return | |||
| } | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -295,7 +295,7 @@ func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) { | |||
| // map header, size 4 | |||
| // write "keys" | |||
| @@ -370,7 +370,7 @@ func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) { | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| // map header, size 4 | |||
| @@ -407,7 +407,7 @@ func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -519,7 +519,7 @@ func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z *roaringArray) Msgsize() (s int) { | |||
| s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize | |||
| for zxhx := range z.conserz { | |||
| @@ -6,7 +6,7 @@ package roaring | |||
| import "github.com/tinylib/msgp/msgp" | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -169,7 +169,7 @@ func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) { | |||
| // map header, size 5 | |||
| // write "runstart" | |||
| @@ -284,7 +284,7 @@ func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) { | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| // map header, size 5 | |||
| @@ -334,7 +334,7 @@ func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -498,7 +498,7 @@ func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z *addHelper16) Msgsize() (s int) { | |||
| s = 1 + 9 + msgp.Uint16Size + 7 + msgp.Uint16Size + 14 + msgp.Uint16Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 3 | |||
| if z.rc == nil { | |||
| @@ -509,7 +509,7 @@ func (z *addHelper16) Msgsize() (s int) { | |||
| return | |||
| } | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -546,7 +546,7 @@ func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z interval16) EncodeMsg(en *msgp.Writer) (err error) { | |||
| // map header, size 2 | |||
| // write "start" | |||
| @@ -570,7 +570,7 @@ func (z interval16) EncodeMsg(en *msgp.Writer) (err error) { | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z interval16) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| // map header, size 2 | |||
| @@ -583,7 +583,7 @@ func (z interval16) MarshalMsg(b []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -621,13 +621,13 @@ func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z interval16) Msgsize() (s int) { | |||
| s = 1 + 6 + msgp.Uint16Size + 5 + msgp.Uint16Size | |||
| return | |||
| } | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -701,7 +701,7 @@ func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) { | |||
| // map header, size 2 | |||
| // write "iv" | |||
| @@ -746,7 +746,7 @@ func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) { | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| // map header, size 2 | |||
| @@ -768,7 +768,7 @@ func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -843,13 +843,13 @@ func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z *runContainer16) Msgsize() (s int) { | |||
| s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size | |||
| return | |||
| } | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -891,11 +891,6 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| if err != nil { | |||
| return | |||
| } | |||
| case "curSeq": | |||
| z.curSeq, err = dc.ReadInt64() | |||
| if err != nil { | |||
| return | |||
| } | |||
| default: | |||
| err = dc.Skip() | |||
| if err != nil { | |||
| @@ -906,11 +901,11 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) { | |||
| // map header, size 4 | |||
| // map header, size 3 | |||
| // write "rc" | |||
| err = en.Append(0x84, 0xa2, 0x72, 0x63) | |||
| err = en.Append(0x83, 0xa2, 0x72, 0x63) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| @@ -943,24 +938,15 @@ func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) { | |||
| if err != nil { | |||
| return | |||
| } | |||
| // write "curSeq" | |||
| err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| err = en.WriteInt64(z.curSeq) | |||
| if err != nil { | |||
| return | |||
| } | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| // map header, size 4 | |||
| // map header, size 3 | |||
| // string "rc" | |||
| o = append(o, 0x84, 0xa2, 0x72, 0x63) | |||
| o = append(o, 0x83, 0xa2, 0x72, 0x63) | |||
| if z.rc == nil { | |||
| o = msgp.AppendNil(o) | |||
| } else { | |||
| @@ -975,13 +961,10 @@ func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) { | |||
| // string "curPosInIndex" | |||
| o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) | |||
| o = msgp.AppendUint16(o, z.curPosInIndex) | |||
| // string "curSeq" | |||
| o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) | |||
| o = msgp.AppendInt64(o, z.curSeq) | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| var field []byte | |||
| _ = field | |||
| @@ -1023,11 +1006,6 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| if err != nil { | |||
| return | |||
| } | |||
| case "curSeq": | |||
| z.curSeq, bts, err = msgp.ReadInt64Bytes(bts) | |||
| if err != nil { | |||
| return | |||
| } | |||
| default: | |||
| bts, err = msgp.Skip(bts) | |||
| if err != nil { | |||
| @@ -1039,7 +1017,7 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z *runIterator16) Msgsize() (s int) { | |||
| s = 1 + 3 | |||
| if z.rc == nil { | |||
| @@ -1047,11 +1025,11 @@ func (z *runIterator16) Msgsize() (s int) { | |||
| } else { | |||
| s += z.rc.Msgsize() | |||
| } | |||
| s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size + 7 + msgp.Int64Size | |||
| s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size | |||
| return | |||
| } | |||
| // DecodeMsg implements msgp.Decodable | |||
| // Deprecated: DecodeMsg implements msgp.Decodable | |||
| func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| var zjpj uint32 | |||
| zjpj, err = dc.ReadArrayHeader() | |||
| @@ -1072,7 +1050,7 @@ func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) { | |||
| return | |||
| } | |||
| // EncodeMsg implements msgp.Encodable | |||
| // Deprecated: EncodeMsg implements msgp.Encodable | |||
| func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) { | |||
| err = en.WriteArrayHeader(uint32(len(z))) | |||
| if err != nil { | |||
| @@ -1087,7 +1065,7 @@ func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) { | |||
| return | |||
| } | |||
| // MarshalMsg implements msgp.Marshaler | |||
| // Deprecated: MarshalMsg implements msgp.Marshaler | |||
| func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) { | |||
| o = msgp.Require(b, z.Msgsize()) | |||
| o = msgp.AppendArrayHeader(o, uint32(len(z))) | |||
| @@ -1097,7 +1075,7 @@ func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // UnmarshalMsg implements msgp.Unmarshaler | |||
| // Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
| func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| var zgmo uint32 | |||
| zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts) | |||
| @@ -1119,7 +1097,7 @@ func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
| return | |||
| } | |||
| // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
| func (z uint16Slice) Msgsize() (s int) { | |||
| s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint16Size)) | |||
| return | |||
| @@ -2,8 +2,6 @@ package roaring | |||
| import ( | |||
| "encoding/binary" | |||
| "errors" | |||
| "fmt" | |||
| "io" | |||
| "github.com/tinylib/msgp/msgp" | |||
| @@ -22,14 +20,6 @@ func (b *runContainer16) writeTo(stream io.Writer) (int, error) { | |||
| return stream.Write(buf) | |||
| } | |||
| func (b *runContainer32) writeToMsgpack(stream io.Writer) (int, error) { | |||
| bts, err := b.MarshalMsg(nil) | |||
| if err != nil { | |||
| return 0, err | |||
| } | |||
| return stream.Write(bts) | |||
| } | |||
| func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) { | |||
| bts, err := b.MarshalMsg(nil) | |||
| if err != nil { | |||
| @@ -38,46 +28,7 @@ func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) { | |||
| return stream.Write(bts) | |||
| } | |||
| func (b *runContainer32) readFromMsgpack(stream io.Reader) (int, error) { | |||
| err := msgp.Decode(stream, b) | |||
| return 0, err | |||
| } | |||
| func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) { | |||
| err := msgp.Decode(stream, b) | |||
| return 0, err | |||
| } | |||
| var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected") | |||
| func (b *runContainer16) readFrom(stream io.Reader) (int, error) { | |||
| b.iv = b.iv[:0] | |||
| b.card = 0 | |||
| var numRuns uint16 | |||
| err := binary.Read(stream, binary.LittleEndian, &numRuns) | |||
| if err != nil { | |||
| return 0, err | |||
| } | |||
| nr := int(numRuns) | |||
| encRun := make([]uint16, 2*nr) | |||
| by := make([]byte, 4*nr) | |||
| err = binary.Read(stream, binary.LittleEndian, &by) | |||
| if err != nil { | |||
| return 0, err | |||
| } | |||
| for i := range encRun { | |||
| if len(by) < 2 { | |||
| return 0, errCorruptedStream | |||
| } | |||
| encRun[i] = binary.LittleEndian.Uint16(by) | |||
| by = by[2:] | |||
| } | |||
| for i := 0; i < nr; i++ { | |||
| if i > 0 && b.iv[i-1].last() >= encRun[i*2] { | |||
| return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2]) | |||
| } | |||
| b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]}) | |||
| b.card += int64(encRun[i*2+1]) + 1 | |||
| } | |||
| return 0, err | |||
| } | |||
| @@ -4,6 +4,7 @@ package roaring | |||
| import ( | |||
| "encoding/binary" | |||
| "errors" | |||
| "io" | |||
| ) | |||
| @@ -26,6 +27,10 @@ func (b *arrayContainer) readFrom(stream io.Reader) (int, error) { | |||
| } | |||
| func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) { | |||
| if b.cardinality <= arrayDefaultMaxSize { | |||
| return 0, errors.New("refusing to write bitmap container with cardinality of array container") | |||
| } | |||
| // Write set | |||
| buf := make([]byte, 8*len(b.bitmap)) | |||
| for i, v := range b.bitmap { | |||
| @@ -69,6 +74,16 @@ func uint64SliceAsByteSlice(slice []uint64) []byte { | |||
| return by | |||
| } | |||
| func uint16SliceAsByteSlice(slice []uint16) []byte { | |||
| by := make([]byte, len(slice)*2) | |||
| for i, v := range slice { | |||
| binary.LittleEndian.PutUint16(by[i*2:], v) | |||
| } | |||
| return by | |||
| } | |||
| func byteSliceAsUint16Slice(slice []byte) []uint16 { | |||
| if len(slice)%2 != 0 { | |||
| panic("Slice size should be divisible by 2") | |||
| @@ -3,8 +3,10 @@ | |||
| package roaring | |||
| import ( | |||
| "errors" | |||
| "io" | |||
| "reflect" | |||
| "runtime" | |||
| "unsafe" | |||
| ) | |||
| @@ -14,26 +16,13 @@ func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) { | |||
| } | |||
| func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) { | |||
| if bc.cardinality <= arrayDefaultMaxSize { | |||
| return 0, errors.New("refusing to write bitmap container with cardinality of array container") | |||
| } | |||
| buf := uint64SliceAsByteSlice(bc.bitmap) | |||
| return stream.Write(buf) | |||
| } | |||
| // readFrom reads an arrayContainer from stream. | |||
| // PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content) | |||
| // *before* you call readFrom. We can't guess the size in the stream | |||
| // by this point. | |||
| func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) { | |||
| buf := uint16SliceAsByteSlice(ac.content) | |||
| return io.ReadFull(stream, buf) | |||
| } | |||
| func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) { | |||
| buf := uint64SliceAsByteSlice(bc.bitmap) | |||
| n, err := io.ReadFull(stream, buf) | |||
| bc.computeCardinality() | |||
| return n, err | |||
| } | |||
| func uint64SliceAsByteSlice(slice []uint64) []byte { | |||
| // make a new slice header | |||
| header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
| @@ -42,8 +31,12 @@ func uint64SliceAsByteSlice(slice []uint64) []byte { | |||
| header.Len *= 8 | |||
| header.Cap *= 8 | |||
| // instantiate result and use KeepAlive so data isn't unmapped. | |||
| result := *(*[]byte)(unsafe.Pointer(&header)) | |||
| runtime.KeepAlive(&slice) | |||
| // return it | |||
| return *(*[]byte)(unsafe.Pointer(&header)) | |||
| return result | |||
| } | |||
| func uint16SliceAsByteSlice(slice []uint16) []byte { | |||
| @@ -54,8 +47,12 @@ func uint16SliceAsByteSlice(slice []uint16) []byte { | |||
| header.Len *= 2 | |||
| header.Cap *= 2 | |||
| // instantiate result and use KeepAlive so data isn't unmapped. | |||
| result := *(*[]byte)(unsafe.Pointer(&header)) | |||
| runtime.KeepAlive(&slice) | |||
| // return it | |||
| return *(*[]byte)(unsafe.Pointer(&header)) | |||
| return result | |||
| } | |||
| func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { | |||
| @@ -64,50 +61,74 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { | |||
| // Deserialization code follows | |||
| func byteSliceAsUint16Slice(slice []byte) []uint16 { | |||
| //// | |||
| // These methods (byteSliceAsUint16Slice,...) do not make copies, | |||
| // they are pointer-based (unsafe). The caller is responsible to | |||
| // ensure that the input slice does not get garbage collected, deleted | |||
| // or modified while you hold the returned slince. | |||
| //// | |||
| func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder | |||
| if len(slice)%2 != 0 { | |||
| panic("Slice size should be divisible by 2") | |||
| } | |||
| // reference: https://go101.org/article/unsafe.html | |||
| // make a new slice header | |||
| header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
| bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
| rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) | |||
| // update its capacity and length | |||
| header.Len /= 2 | |||
| header.Cap /= 2 | |||
| // transfer the data from the given slice to a new variable (our result) | |||
| rHeader.Data = bHeader.Data | |||
| rHeader.Len = bHeader.Len / 2 | |||
| rHeader.Cap = bHeader.Cap / 2 | |||
| // return it | |||
| return *(*[]uint16)(unsafe.Pointer(&header)) | |||
| // instantiate result and use KeepAlive so data isn't unmapped. | |||
| runtime.KeepAlive(&slice) // it is still crucial, GC can free it) | |||
| // return result | |||
| return | |||
| } | |||
| func byteSliceAsUint64Slice(slice []byte) []uint64 { | |||
| func byteSliceAsUint64Slice(slice []byte) (result []uint64) { | |||
| if len(slice)%8 != 0 { | |||
| panic("Slice size should be divisible by 8") | |||
| } | |||
| // reference: https://go101.org/article/unsafe.html | |||
| // make a new slice header | |||
| header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
| bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
| rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) | |||
| // update its capacity and length | |||
| header.Len /= 8 | |||
| header.Cap /= 8 | |||
| // transfer the data from the given slice to a new variable (our result) | |||
| rHeader.Data = bHeader.Data | |||
| rHeader.Len = bHeader.Len / 8 | |||
| rHeader.Cap = bHeader.Cap / 8 | |||
| // return it | |||
| return *(*[]uint64)(unsafe.Pointer(&header)) | |||
| // instantiate result and use KeepAlive so data isn't unmapped. | |||
| runtime.KeepAlive(&slice) // it is still crucial, GC can free it) | |||
| // return result | |||
| return | |||
| } | |||
| func byteSliceAsInterval16Slice(slice []byte) []interval16 { | |||
| func byteSliceAsInterval16Slice(slice []byte) (result []interval16) { | |||
| if len(slice)%4 != 0 { | |||
| panic("Slice size should be divisible by 4") | |||
| } | |||
| // reference: https://go101.org/article/unsafe.html | |||
| // make a new slice header | |||
| header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
| bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
| rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) | |||
| // update its capacity and length | |||
| header.Len /= 4 | |||
| header.Cap /= 4 | |||
| // transfer the data from the given slice to a new variable (our result) | |||
| rHeader.Data = bHeader.Data | |||
| rHeader.Len = bHeader.Len / 4 | |||
| rHeader.Cap = bHeader.Cap / 4 | |||
| // return it | |||
| return *(*[]interval16)(unsafe.Pointer(&header)) | |||
| // instantiate result and use KeepAlive so data isn't unmapped. | |||
| runtime.KeepAlive(&slice) // it is still crucial, GC can free it) | |||
| // return result | |||
| return | |||
| } | |||
| @@ -5,6 +5,12 @@ type shortIterable interface { | |||
| next() uint16 | |||
| } | |||
| type shortPeekable interface { | |||
| shortIterable | |||
| peekNext() uint16 | |||
| advanceIfNeeded(minval uint16) | |||
| } | |||
| type shortIterator struct { | |||
| slice []uint16 | |||
| loc int | |||
| @@ -19,3 +25,28 @@ func (si *shortIterator) next() uint16 { | |||
| si.loc++ | |||
| return a | |||
| } | |||
| func (si *shortIterator) peekNext() uint16 { | |||
| return si.slice[si.loc] | |||
| } | |||
| func (si *shortIterator) advanceIfNeeded(minval uint16) { | |||
| if si.hasNext() && si.peekNext() < minval { | |||
| si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval) | |||
| } | |||
| } | |||
| type reverseIterator struct { | |||
| slice []uint16 | |||
| loc int | |||
| } | |||
| func (si *reverseIterator) hasNext() bool { | |||
| return si.loc >= 0 | |||
| } | |||
| func (si *reverseIterator) next() uint16 { | |||
| a := si.slice[si.loc] | |||
| si.loc-- | |||
| return a | |||
| } | |||
| @@ -14,6 +14,17 @@ const ( | |||
| serialCookie = 12347 // runs, arrays, and bitmaps | |||
| noOffsetThreshold = 4 | |||
| // MaxUint32 is the largest uint32 value. | |||
| MaxUint32 = 4294967295 | |||
| // MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper | |||
| // bound for ranges. | |||
| MaxRange uint64 = MaxUint32 + 1 | |||
| // MaxUint16 is the largest 16 bit unsigned int. | |||
| // This is the largest value an interval16 can store. | |||
| MaxUint16 = 65535 | |||
| // Compute wordSizeInBytes, the size of a word in bytes. | |||
| _m = ^uint64(0) | |||
| _logS = _m>>8&1 + _m>>16&1 + _m>>32&1 | |||
| @@ -114,7 +125,6 @@ func flipBitmapRange(bitmap []uint64, start int, end int) { | |||
| endword := (end - 1) / 64 | |||
| bitmap[firstword] ^= ^(^uint64(0) << uint(start%64)) | |||
| for i := firstword; i < endword; i++ { | |||
| //p("flipBitmapRange on i=%v", i) | |||
| bitmap[i] = ^bitmap[i] | |||
| } | |||
| bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64) | |||
| @@ -292,24 +302,3 @@ func minOfUint16(a, b uint16) uint16 { | |||
| } | |||
| return b | |||
| } | |||
| func maxInt(a, b int) int { | |||
| if a > b { | |||
| return a | |||
| } | |||
| return b | |||
| } | |||
| func maxUint16(a, b uint16) uint16 { | |||
| if a > b { | |||
| return a | |||
| } | |||
| return b | |||
| } | |||
| func minUint16(a, b uint16) uint16 { | |||
| if a < b { | |||
| return a | |||
| } | |||
| return b | |||
| } | |||
| @@ -3,9 +3,9 @@ sudo: false | |||
| language: go | |||
| go: | |||
| - "1.9.x" | |||
| - "1.10.x" | |||
| - "1.11.x" | |||
| - "1.12.x" | |||
| script: | |||
| - go get golang.org/x/tools/cmd/cover | |||
| @@ -15,7 +15,12 @@ script: | |||
| - gvt restore | |||
| - go test -race -v $(go list ./... | grep -v vendor/) | |||
| - go vet $(go list ./... | grep -v vendor/) | |||
| - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/) | |||
| - go test ./test -v -indexType scorch | |||
| - if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then | |||
| echo "errcheck skipped for go version" $TRAVIS_GO_VERSION; | |||
| else | |||
| errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/); | |||
| fi | |||
| - docs/project-code-coverage.sh | |||
| - docs/build_children.sh | |||
| @@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) { | |||
| return fieldLength, tokenFreqs | |||
| } | |||
| func (t *TextField) Analyzer() *analysis.Analyzer { | |||
| return t.analyzer | |||
| } | |||
| func (t *TextField) Value() []byte { | |||
| return t.value | |||
| } | |||
| @@ -37,6 +37,12 @@ var geoTolerance = 1E-6 | |||
| var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0 | |||
| var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0 | |||
| // Point represents a geo point. | |||
| type Point struct { | |||
| Lon float64 | |||
| Lat float64 | |||
| } | |||
| // MortonHash computes the morton hash value for the provided geo point | |||
| // This point is ordered as lon, lat. | |||
| func MortonHash(lon, lat float64) uint64 { | |||
| @@ -168,3 +174,35 @@ func checkLongitude(longitude float64) error { | |||
| } | |||
| return nil | |||
| } | |||
| func BoundingRectangleForPolygon(polygon []Point) ( | |||
| float64, float64, float64, float64, error) { | |||
| err := checkLongitude(polygon[0].Lon) | |||
| if err != nil { | |||
| return 0, 0, 0, 0, err | |||
| } | |||
| err = checkLatitude(polygon[0].Lat) | |||
| if err != nil { | |||
| return 0, 0, 0, 0, err | |||
| } | |||
| maxY, minY := polygon[0].Lat, polygon[0].Lat | |||
| maxX, minX := polygon[0].Lon, polygon[0].Lon | |||
| for i := 1; i < len(polygon); i++ { | |||
| err := checkLongitude(polygon[i].Lon) | |||
| if err != nil { | |||
| return 0, 0, 0, 0, err | |||
| } | |||
| err = checkLatitude(polygon[i].Lat) | |||
| if err != nil { | |||
| return 0, 0, 0, 0, err | |||
| } | |||
| maxY = math.Max(maxY, polygon[i].Lat) | |||
| minY = math.Min(minY, polygon[i].Lat) | |||
| maxX = math.Max(maxX, polygon[i].Lon) | |||
| minX = math.Min(minX, polygon[i].Lon) | |||
| } | |||
| return minX, maxY, maxX, minY, nil | |||
| } | |||
| @@ -1,32 +1,21 @@ | |||
| // The code here was obtained from: | |||
| // https://github.com/mmcloughlin/geohash | |||
| // The MIT License (MIT) | |||
| // Copyright (c) 2015 Michael McLoughlin | |||
| // Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| // of this software and associated documentation files (the "Software"), to deal | |||
| // in the Software without restriction, including without limitation the rights | |||
| // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| // copies of the Software, and to permit persons to whom the Software is | |||
| // furnished to do so, subject to the following conditions: | |||
| // The above copyright notice and this permission notice shall be included in all | |||
| // copies or substantial portions of the Software. | |||
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
| // SOFTWARE. | |||
| // Copyright (c) 2019 Couchbase, Inc. | |||
| // | |||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||
| // you may not use this file except in compliance with the License. | |||
| // You may obtain a copy of the License at | |||
| // | |||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software | |||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| // This implementation is inspired from the geohash-js | |||
| // ref: https://github.com/davetroy/geohash-js | |||
| package geo | |||
| import ( | |||
| "math" | |||
| ) | |||
| // encoding encapsulates an encoding defined by a given base32 alphabet. | |||
| type encoding struct { | |||
| enc string | |||
| @@ -47,128 +36,76 @@ func newEncoding(encoder string) *encoding { | |||
| return e | |||
| } | |||
| // Decode string into bits of a 64-bit word. The string s may be at most 12 | |||
| // characters. | |||
| func (e *encoding) decode(s string) uint64 { | |||
| x := uint64(0) | |||
| for i := 0; i < len(s); i++ { | |||
| x = (x << 5) | uint64(e.dec[s[i]]) | |||
| } | |||
| return x | |||
| } | |||
| // Encode bits of 64-bit word into a string. | |||
| func (e *encoding) encode(x uint64) string { | |||
| b := [12]byte{} | |||
| for i := 0; i < 12; i++ { | |||
| b[11-i] = e.enc[x&0x1f] | |||
| x >>= 5 | |||
| } | |||
| return string(b[:]) | |||
| } | |||
| // Base32Encoding with the Geohash alphabet. | |||
| // base32encoding with the Geohash alphabet. | |||
| var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz") | |||
| // BoundingBox returns the region encoded by the given string geohash. | |||
| func geoBoundingBox(hash string) geoBox { | |||
| bits := uint(5 * len(hash)) | |||
| inthash := base32encoding.decode(hash) | |||
| return geoBoundingBoxIntWithPrecision(inthash, bits) | |||
| } | |||
| // Box represents a rectangle in latitude/longitude space. | |||
| type geoBox struct { | |||
| minLat float64 | |||
| maxLat float64 | |||
| minLng float64 | |||
| maxLng float64 | |||
| } | |||
| // Round returns a point inside the box, making an effort to round to minimal | |||
| // precision. | |||
| func (b geoBox) round() (lat, lng float64) { | |||
| x := maxDecimalPower(b.maxLat - b.minLat) | |||
| lat = math.Ceil(b.minLat/x) * x | |||
| x = maxDecimalPower(b.maxLng - b.minLng) | |||
| lng = math.Ceil(b.minLng/x) * x | |||
| return | |||
| } | |||
| // precalculated for performance | |||
| var exp232 = math.Exp2(32) | |||
| // errorWithPrecision returns the error range in latitude and longitude for in | |||
| // integer geohash with bits of precision. | |||
| func errorWithPrecision(bits uint) (latErr, lngErr float64) { | |||
| b := int(bits) | |||
| latBits := b / 2 | |||
| lngBits := b - latBits | |||
| latErr = math.Ldexp(180.0, -latBits) | |||
| lngErr = math.Ldexp(360.0, -lngBits) | |||
| return | |||
| } | |||
| // minDecimalPlaces returns the minimum number of decimal places such that | |||
| // there must exist an number with that many places within any range of width | |||
| // r. This is intended for returning minimal precision coordinates inside a | |||
| // box. | |||
| func maxDecimalPower(r float64) float64 { | |||
| m := int(math.Floor(math.Log10(r))) | |||
| return math.Pow10(m) | |||
| } | |||
| // Encode the position of x within the range -r to +r as a 32-bit integer. | |||
| func encodeRange(x, r float64) uint32 { | |||
| p := (x + r) / (2 * r) | |||
| return uint32(p * exp232) | |||
| } | |||
| // Decode the 32-bit range encoding X back to a value in the range -r to +r. | |||
| func decodeRange(X uint32, r float64) float64 { | |||
| p := float64(X) / exp232 | |||
| x := 2*r*p - r | |||
| return x | |||
| } | |||
| // Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are | |||
| // ignored, and may take any value. | |||
| func squash(X uint64) uint32 { | |||
| X &= 0x5555555555555555 | |||
| X = (X | (X >> 1)) & 0x3333333333333333 | |||
| X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f | |||
| X = (X | (X >> 4)) & 0x00ff00ff00ff00ff | |||
| X = (X | (X >> 8)) & 0x0000ffff0000ffff | |||
| X = (X | (X >> 16)) & 0x00000000ffffffff | |||
| return uint32(X) | |||
| } | |||
| var masks = []uint64{16, 8, 4, 2, 1} | |||
| // DecodeGeoHash decodes the string geohash faster with | |||
| // higher precision. This api is in experimental phase. | |||
| func DecodeGeoHash(geoHash string) (float64, float64) { | |||
| even := true | |||
| lat := []float64{-90.0, 90.0} | |||
| lon := []float64{-180.0, 180.0} | |||
| for i := 0; i < len(geoHash); i++ { | |||
| cd := uint64(base32encoding.dec[geoHash[i]]) | |||
| for j := 0; j < 5; j++ { | |||
| if even { | |||
| if cd&masks[j] > 0 { | |||
| lon[0] = (lon[0] + lon[1]) / 2 | |||
| } else { | |||
| lon[1] = (lon[0] + lon[1]) / 2 | |||
| } | |||
| } else { | |||
| if cd&masks[j] > 0 { | |||
| lat[0] = (lat[0] + lat[1]) / 2 | |||
| } else { | |||
| lat[1] = (lat[0] + lat[1]) / 2 | |||
| } | |||
| } | |||
| even = !even | |||
| } | |||
| } | |||
| // Deinterleave the bits of X into 32-bit words containing the even and odd | |||
| // bitlevels of X, respectively. | |||
| func deinterleave(X uint64) (uint32, uint32) { | |||
| return squash(X), squash(X >> 1) | |||
| return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2 | |||
| } | |||
| // BoundingBoxIntWithPrecision returns the region encoded by the integer | |||
| // geohash with the specified precision. | |||
| func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox { | |||
| fullHash := hash << (64 - bits) | |||
| latInt, lngInt := deinterleave(fullHash) | |||
| lat := decodeRange(latInt, 90) | |||
| lng := decodeRange(lngInt, 180) | |||
| latErr, lngErr := errorWithPrecision(bits) | |||
| return geoBox{ | |||
| minLat: lat, | |||
| maxLat: lat + latErr, | |||
| minLng: lng, | |||
| maxLng: lng + lngErr, | |||
| func EncodeGeoHash(lat, lon float64) string { | |||
| even := true | |||
| lats := []float64{-90.0, 90.0} | |||
| lons := []float64{-180.0, 180.0} | |||
| precision := 12 | |||
| var ch, bit uint64 | |||
| var geoHash string | |||
| for len(geoHash) < precision { | |||
| if even { | |||
| mid := (lons[0] + lons[1]) / 2 | |||
| if lon > mid { | |||
| ch |= masks[bit] | |||
| lons[0] = mid | |||
| } else { | |||
| lons[1] = mid | |||
| } | |||
| } else { | |||
| mid := (lats[0] + lats[1]) / 2 | |||
| if lat > mid { | |||
| ch |= masks[bit] | |||
| lats[0] = mid | |||
| } else { | |||
| lats[1] = mid | |||
| } | |||
| } | |||
| even = !even | |||
| if bit < 4 { | |||
| bit++ | |||
| } else { | |||
| geoHash += string(base32encoding.enc[ch]) | |||
| ch = 0 | |||
| bit = 0 | |||
| } | |||
| } | |||
| } | |||
| // ---------------------------------------------------------------------- | |||
| // Decode the string geohash to a (lat, lng) point. | |||
| func GeoHashDecode(hash string) (lat, lng float64) { | |||
| box := geoBoundingBox(hash) | |||
| return box.round() | |||
| return geoHash | |||
| } | |||
| @@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { | |||
| } | |||
| } else { | |||
| // geohash | |||
| lat, lon = GeoHashDecode(geoStr) | |||
| lat, lon = DecodeGeoHash(geoStr) | |||
| foundLat = true | |||
| foundLon = true | |||
| } | |||
| @@ -117,6 +117,8 @@ func (b *Batch) String() string { | |||
| // be re-used in the future. | |||
| func (b *Batch) Reset() { | |||
| b.internal.Reset() | |||
| b.lastDocSize = 0 | |||
| b.totalSize = 0 | |||
| } | |||
| func (b *Batch) Merge(o *Batch) { | |||
| @@ -121,6 +121,10 @@ type IndexReaderOnly interface { | |||
| FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error) | |||
| } | |||
| type IndexReaderContains interface { | |||
| FieldDictContains(field string) (FieldDictContains, error) | |||
| } | |||
| // FieldTerms contains the terms used by a document, keyed by field | |||
| type FieldTerms map[string][]string | |||
| @@ -230,6 +234,10 @@ type FieldDict interface { | |||
| Close() error | |||
| } | |||
| type FieldDictContains interface { | |||
| Contains(key []byte) (bool, error) | |||
| } | |||
| // DocIDReader is the interface exposing enumeration of documents identifiers. | |||
| // Close the reader to release associated resources. | |||
| type DocIDReader interface { | |||
| @@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | |||
| fileSegments++ | |||
| } | |||
| } | |||
| } | |||
| // before the newMerge introduction, need to clean the newly | |||
| @@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | |||
| } | |||
| } | |||
| } | |||
| // In case where all the docs in the newly merged segment getting | |||
| // deleted by the time we reach here, can skip the introduction. | |||
| if nextMerge.new != nil && | |||
| @@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | |||
| newSnapshot.AddRef() // 1 ref for the nextMerge.notify response | |||
| newSnapshot.updateSize() | |||
| s.rootLock.Lock() | |||
| // swap in new index snapshot | |||
| newSnapshot.epoch = s.nextSnapshotEpoch | |||
| @@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { | |||
| } | |||
| newSnapshot.updateSize() | |||
| // swap in new snapshot | |||
| rootPrev := s.root | |||
| s.root = newSnapshot | |||
| @@ -18,6 +18,7 @@ import ( | |||
| "encoding/json" | |||
| "fmt" | |||
| "os" | |||
| "strings" | |||
| "sync/atomic" | |||
| "time" | |||
| @@ -151,13 +152,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||
| atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1) | |||
| return nil | |||
| } | |||
| atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1) | |||
| atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks))) | |||
| // process tasks in serial for now | |||
| var notifications []chan *IndexSnapshot | |||
| var filenames []string | |||
| for _, task := range resultMergePlan.Tasks { | |||
| if len(task.Segments) == 0 { | |||
| atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1) | |||
| @@ -182,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||
| segmentsToMerge = append(segmentsToMerge, zapSeg) | |||
| docsToDrop = append(docsToDrop, segSnapshot.deleted) | |||
| } | |||
| // track the files getting merged for unsetting the | |||
| // removal ineligibility. This helps to unflip files | |||
| // even with fast merger, slow persister work flows. | |||
| path := zapSeg.Path() | |||
| filenames = append(filenames, | |||
| strings.TrimPrefix(path, s.path+string(os.PathSeparator))) | |||
| } | |||
| } | |||
| } | |||
| @@ -221,6 +228,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||
| atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) | |||
| return err | |||
| } | |||
| err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment)) | |||
| if err != nil { | |||
| s.unmarkIneligibleForRemoval(filename) | |||
| return fmt.Errorf("merge validation failed: %v", err) | |||
| } | |||
| oldNewDocNums = make(map[uint64][]uint64) | |||
| for i, segNewDocNums := range newDocNums { | |||
| oldNewDocNums[task.Segments[i].Id()] = segNewDocNums | |||
| @@ -263,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||
| } | |||
| } | |||
| // once all the newly merged segment introductions are done, | |||
| // its safe to unflip the removal ineligibility for the replaced | |||
| // older segments | |||
| for _, f := range filenames { | |||
| s.unmarkIneligibleForRemoval(f) | |||
| } | |||
| return nil | |||
| } | |||
| @@ -311,6 +330,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, | |||
| atomic.AddUint64(&s.stats.TotMemMergeErr, 1) | |||
| return nil, 0, err | |||
| } | |||
| err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment)) | |||
| if err != nil { | |||
| return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err) | |||
| } | |||
| // update persisted stats | |||
| atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count()) | |||
| @@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() { | |||
| var persistWatchers []*epochWatcher | |||
| var lastPersistedEpoch, lastMergedEpoch uint64 | |||
| var ew *epochWatcher | |||
| var unpersistedCallbacks []index.BatchCallback | |||
| po, err := s.parsePersisterOptions() | |||
| if err != nil { | |||
| s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err)) | |||
| @@ -111,7 +114,6 @@ OUTER: | |||
| if ew != nil && ew.epoch > lastMergedEpoch { | |||
| lastMergedEpoch = ew.epoch | |||
| } | |||
| lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch, | |||
| lastMergedEpoch, persistWatchers, po) | |||
| @@ -150,11 +152,25 @@ OUTER: | |||
| _ = ourSnapshot.DecRef() | |||
| break OUTER | |||
| } | |||
| // save this current snapshot's persistedCallbacks, to invoke during | |||
| // the retry attempt | |||
| unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...) | |||
| s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err)) | |||
| _ = ourSnapshot.DecRef() | |||
| atomic.AddUint64(&s.stats.TotPersistLoopErr, 1) | |||
| continue OUTER | |||
| } | |||
| if unpersistedCallbacks != nil { | |||
| // in the event of this being a retry attempt for persisting a snapshot | |||
| // that had earlier failed, prepend the persistedCallbacks associated | |||
| // with earlier segment(s) to the latest persistedCallbacks | |||
| ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...) | |||
| unpersistedCallbacks = nil | |||
| } | |||
| for i := range ourPersistedCallbacks { | |||
| ourPersistedCallbacks[i](err) | |||
| } | |||
| @@ -179,7 +195,6 @@ OUTER: | |||
| s.fireEvent(EventKindPersisterProgress, time.Since(startTime)) | |||
| if changed { | |||
| s.removeOldData() | |||
| atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1) | |||
| continue OUTER | |||
| } | |||
| @@ -230,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64, | |||
| return watchersNext | |||
| } | |||
| func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64, | |||
| persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) { | |||
| func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, | |||
| lastMergedEpoch uint64, persistWatchers []*epochWatcher, | |||
| po *persisterOptions) (uint64, []*epochWatcher) { | |||
| // first, let the watchers proceed if they lag behind | |||
| // First, let the watchers proceed if they lag behind | |||
| persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) | |||
| // check the merger lag by counting the segment files on disk, | |||
| // Check the merger lag by counting the segment files on disk, | |||
| numFilesOnDisk, _ := s.diskFileStats() | |||
| // On finding fewer files on disk, persister takes a short pause | |||
| // for sufficient in-memory segments to pile up for the next | |||
| // memory merge cum persist loop. | |||
| // On finding too many files on disk, persister pause until the merger | |||
| // catches up to reduce the segment file count under the threshold. | |||
| // But if there is memory pressure, then skip this sleep maneuvers. | |||
| numFilesOnDisk, _ := s.diskFileStats() | |||
| if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) && | |||
| po.PersisterNapTimeMSec > 0 && s.paused() == 0 { | |||
| select { | |||
| @@ -261,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM | |||
| return lastMergedEpoch, persistWatchers | |||
| } | |||
| // Finding too many files on disk could be due to two reasons. | |||
| // 1. Too many older snapshots awaiting the clean up. | |||
| // 2. The merger could be lagging behind on merging the disk files. | |||
| if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) { | |||
| s.removeOldData() | |||
| numFilesOnDisk, _ = s.diskFileStats() | |||
| } | |||
| // Persister pause until the merger catches up to reduce the segment | |||
| // file count under the threshold. | |||
| // But if there is memory pressure, then skip this sleep maneuvers. | |||
| OUTER: | |||
| for po.PersisterNapUnderNumFiles > 0 && | |||
| numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) && | |||
| @@ -661,13 +686,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) { | |||
| } | |||
| func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { | |||
| rv := &IndexSnapshot{ | |||
| parent: s, | |||
| internal: make(map[string][]byte), | |||
| refs: 1, | |||
| creator: "loadSnapshot", | |||
| } | |||
| var running uint64 | |||
| c := snapshot.Cursor() | |||
| for k, _ := c.First(); k != nil; k, _ = c.Next() { | |||
| @@ -703,7 +728,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { | |||
| running += segmentSnapshot.segment.Count() | |||
| } | |||
| } | |||
| return rv, nil | |||
| } | |||
| @@ -750,12 +774,11 @@ func (s *Scorch) removeOldData() { | |||
| if err != nil { | |||
| s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err)) | |||
| } | |||
| atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed)) | |||
| if removed > 0 { | |||
| err = s.removeOldZapFiles() | |||
| if err != nil { | |||
| s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) | |||
| } | |||
| err = s.removeOldZapFiles() | |||
| if err != nil { | |||
| s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) | |||
| } | |||
| } | |||
| @@ -41,12 +41,14 @@ const Version uint8 = 2 | |||
| var ErrClosed = fmt.Errorf("scorch closed") | |||
| type Scorch struct { | |||
| nextSegmentID uint64 | |||
| stats Stats | |||
| iStats internalStats | |||
| readOnly bool | |||
| version uint8 | |||
| config map[string]interface{} | |||
| analysisQueue *index.AnalysisQueue | |||
| stats Stats | |||
| nextSegmentID uint64 | |||
| path string | |||
| unsafeBatch bool | |||
| @@ -73,8 +75,6 @@ type Scorch struct { | |||
| onEvent func(event Event) | |||
| onAsyncError func(err error) | |||
| iStats internalStats | |||
| pauseLock sync.RWMutex | |||
| pauseCount uint64 | |||
| @@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) { | |||
| // FIXME could sort ids list concurrent with analysis? | |||
| if len(batch.IndexOps) > 0 { | |||
| if numUpdates > 0 { | |||
| go func() { | |||
| for _, doc := range batch.IndexOps { | |||
| if doc != nil { | |||
| @@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} { | |||
| m["CurOnDiskBytes"] = numBytesUsedDisk | |||
| m["CurOnDiskFiles"] = numFilesOnDisk | |||
| s.rootLock.RLock() | |||
| m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval)) | |||
| s.rootLock.RUnlock() | |||
| // TODO: consider one day removing these backwards compatible | |||
| // names for apps using the old names | |||
| m["updates"] = m["TotUpdates"] | |||
| @@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte, | |||
| return &EmptyDictionaryIterator{} | |||
| } | |||
| func (e *EmptyDictionary) Contains(key []byte) (bool, error) { | |||
| return false, nil | |||
| } | |||
| type EmptyDictionaryIterator struct{} | |||
| func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) { | |||
| return nil, nil | |||
| } | |||
| func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) { | |||
| return false, nil | |||
| } | |||
| func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) { | |||
| return nil, nil | |||
| } | |||
| @@ -19,7 +19,10 @@ | |||
| package segment | |||
| import "fmt" | |||
| import ( | |||
| "errors" | |||
| "fmt" | |||
| ) | |||
| const ( | |||
| MaxVarintSize = 9 | |||
| @@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) { | |||
| } | |||
| return b[length:], v, nil | |||
| } | |||
| // ------------------------------------------------------------ | |||
| type MemUvarintReader struct { | |||
| C int // index of next byte to read from S | |||
| S []byte | |||
| } | |||
| func NewMemUvarintReader(s []byte) *MemUvarintReader { | |||
| return &MemUvarintReader{S: s} | |||
| } | |||
| // Len returns the number of unread bytes. | |||
| func (r *MemUvarintReader) Len() int { | |||
| n := len(r.S) - r.C | |||
| if n < 0 { | |||
| return 0 | |||
| } | |||
| return n | |||
| } | |||
| var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow") | |||
| // ReadUvarint reads an encoded uint64. The original code this was | |||
| // based on is at encoding/binary/ReadUvarint(). | |||
| func (r *MemUvarintReader) ReadUvarint() (uint64, error) { | |||
| var x uint64 | |||
| var s uint | |||
| var C = r.C | |||
| var S = r.S | |||
| for { | |||
| b := S[C] | |||
| C++ | |||
| if b < 0x80 { | |||
| r.C = C | |||
| // why 63? The original code had an 'i += 1' loop var and | |||
| // checked for i > 9 || i == 9 ...; but, we no longer | |||
| // check for the i var, but instead check here for s, | |||
| // which is incremented by 7. So, 7*9 == 63. | |||
| // | |||
| // why the "extra" >= check? The normal case is that s < | |||
| // 63, so we check this single >= guard first so that we | |||
| // hit the normal, nil-error return pathway sooner. | |||
| if s >= 63 && (s > 63 || s == 63 && b > 1) { | |||
| return 0, ErrMemUvarintReaderOverflow | |||
| } | |||
| return x | uint64(b)<<s, nil | |||
| } | |||
| x |= uint64(b&0x7f) << s | |||
| s += 7 | |||
| } | |||
| } | |||
| // SkipUvarint skips ahead one encoded uint64. | |||
| func (r *MemUvarintReader) SkipUvarint() { | |||
| for { | |||
| b := r.S[r.C] | |||
| r.C++ | |||
| if b < 0x80 { | |||
| return | |||
| } | |||
| } | |||
| } | |||
| // SkipBytes skips a count number of bytes. | |||
| func (r *MemUvarintReader) SkipBytes(count int) { | |||
| r.C = r.C + count | |||
| } | |||
| func (r *MemUvarintReader) Reset(s []byte) { | |||
| r.C = 0 | |||
| r.S = s | |||
| } | |||
| @@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string { | |||
| s = s.Sub[0] | |||
| } | |||
| if s.Op == syntax.OpLiteral { | |||
| if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) { | |||
| return string(s.Rune) | |||
| } | |||
| @@ -59,6 +59,8 @@ type TermDictionary interface { | |||
| AutomatonIterator(a vellum.Automaton, | |||
| startKeyInclusive, endKeyExclusive []byte) DictionaryIterator | |||
| OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator | |||
| Contains(key []byte) (bool, error) | |||
| } | |||
| type DictionaryIterator interface { | |||
| @@ -16,6 +16,7 @@ package zap | |||
| import ( | |||
| "bufio" | |||
| "github.com/couchbase/vellum" | |||
| "math" | |||
| "os" | |||
| ) | |||
| @@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32, | |||
| docValueOffset: docValueOffset, | |||
| dictLocs: dictLocs, | |||
| fieldDvReaders: make(map[uint16]*docValueReader), | |||
| fieldFSTs: make(map[uint16]*vellum.FST), | |||
| } | |||
| sb.updateSize() | |||
| @@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) | |||
| return rv | |||
| } | |||
| func (d *Dictionary) Contains(key []byte) (bool, error) { | |||
| return d.fst.Contains(key) | |||
| } | |||
| // Iterator returns an iterator for this dictionary | |||
| func (d *Dictionary) Iterator() segment.DictionaryIterator { | |||
| rv := &DictionaryIterator{ | |||
| @@ -143,11 +147,14 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator | |||
| } | |||
| // need to increment the end position to be inclusive | |||
| endBytes := []byte(end) | |||
| if endBytes[len(endBytes)-1] < 0xff { | |||
| endBytes[len(endBytes)-1]++ | |||
| } else { | |||
| endBytes = append(endBytes, 0xff) | |||
| var endBytes []byte | |||
| if len(end) > 0 { | |||
| endBytes = []byte(end) | |||
| if endBytes[len(endBytes)-1] < 0xff { | |||
| endBytes[len(endBytes)-1]++ | |||
| } else { | |||
| endBytes = append(endBytes, 0xff) | |||
| } | |||
| } | |||
| if d.fst != nil { | |||
| @@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error | |||
| type docVisitState struct { | |||
| dvrs map[uint16]*docValueReader | |||
| segment *Segment | |||
| segment *SegmentBase | |||
| } | |||
| type docValueReader struct { | |||
| @@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string, | |||
| fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) { | |||
| // get the docValue offset for the given fields | |||
| if fieldDvLocStart == fieldNotUninverted { | |||
| return nil, fmt.Errorf("loadFieldDocValueReader: "+ | |||
| "no docValues found for field: %s", field) | |||
| // no docValues found, nothing to do | |||
| return nil, nil | |||
| } | |||
| // read the number of chunks, and chunk offsets position | |||
| @@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string, | |||
| chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8]) | |||
| // acquire position of chunk offsets | |||
| chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen | |||
| } else { | |||
| return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart) | |||
| } | |||
| fdvIter := &docValueReader{ | |||
| @@ -250,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) { | |||
| // VisitDocumentFieldTerms is an implementation of the | |||
| // DocumentFieldTermVisitable interface | |||
| func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, | |||
| func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string, | |||
| visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) ( | |||
| segment.DocVisitState, error) { | |||
| dvs, ok := dvsIn.(*docVisitState) | |||
| @@ -289,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, | |||
| if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil { | |||
| // check if the chunk is already loaded | |||
| if docInChunk != dvr.curChunkNumber() { | |||
| err := dvr.loadDvChunk(docInChunk, &s.SegmentBase) | |||
| err := dvr.loadDvChunk(docInChunk, s) | |||
| if err != nil { | |||
| return dvs, err | |||
| } | |||
| @@ -304,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, | |||
| // VisitableDocValueFields returns the list of fields with | |||
| // persisted doc value terms ready to be visitable using the | |||
| // VisitDocumentFieldTerms method. | |||
| func (s *Segment) VisitableDocValueFields() ([]string, error) { | |||
| func (s *SegmentBase) VisitableDocValueFields() ([]string, error) { | |||
| return s.fieldDvNames, nil | |||
| } | |||
| @@ -31,6 +31,14 @@ import ( | |||
| var DefaultFileMergerBufferSize = 1024 * 1024 | |||
| // ValidateMerge can be set by applications to perform additional checks | |||
| // on a new segment produced by a merge, by default this does nothing. | |||
| // Caller should provide EITHER segments or memSegments, but not both. | |||
| // This API is experimental and may be removed at any time. | |||
| var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error { | |||
| return nil | |||
| } | |||
| const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc | |||
| // Merge takes a slice of zap segments and bit masks describing which | |||
| @@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100 | |||
| var NewSegmentBufferNumResultsFactor float64 = 1.0 | |||
| var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 | |||
| // ValidateDocFields can be set by applications to perform additional checks | |||
| // on fields in a document being added to a new segment, by default it does | |||
| // nothing. | |||
| // This API is experimental and may be removed at any time. | |||
| var ValidateDocFields = func(field document.Field) error { | |||
| return nil | |||
| } | |||
| // AnalysisResultsToSegmentBase produces an in-memory zap-encoded | |||
| // SegmentBase from analysis results | |||
| func AnalysisResultsToSegmentBase(results []*index.AnalysisResult, | |||
| @@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() ( | |||
| if opts.IncludeDocValues() { | |||
| s.IncludeDocValues[fieldID] = true | |||
| } | |||
| err := ValidateDocFields(field) | |||
| if err != nil { | |||
| return 0, err | |||
| } | |||
| } | |||
| var curr int | |||
| @@ -15,10 +15,8 @@ | |||
| package zap | |||
| import ( | |||
| "bytes" | |||
| "encoding/binary" | |||
| "fmt" | |||
| "io" | |||
| "math" | |||
| "reflect" | |||
| @@ -192,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool, | |||
| } | |||
| rv.postings = p | |||
| rv.includeFreqNorm = includeFreq || includeNorm | |||
| rv.includeFreqNorm = includeFreq || includeNorm || includeLocs | |||
| rv.includeLocs = includeLocs | |||
| if p.normBits1Hit != 0 { | |||
| @@ -264,18 +262,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool, | |||
| // Count returns the number of items on this postings list | |||
| func (p *PostingsList) Count() uint64 { | |||
| var n uint64 | |||
| var n, e uint64 | |||
| if p.normBits1Hit != 0 { | |||
| n = 1 | |||
| if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) { | |||
| e = 1 | |||
| } | |||
| } else if p.postings != nil { | |||
| n = p.postings.GetCardinality() | |||
| } | |||
| var e uint64 | |||
| if p.except != nil { | |||
| e = p.except.GetCardinality() | |||
| } | |||
| if n <= e { | |||
| return 0 | |||
| if p.except != nil { | |||
| e = p.postings.AndCardinality(p.except) | |||
| } | |||
| } | |||
| return n - e | |||
| } | |||
| @@ -327,16 +324,16 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error { | |||
| // PostingsIterator provides a way to iterate through the postings list | |||
| type PostingsIterator struct { | |||
| postings *PostingsList | |||
| all roaring.IntIterable | |||
| Actual roaring.IntIterable | |||
| all roaring.IntPeekable | |||
| Actual roaring.IntPeekable | |||
| ActualBM *roaring.Bitmap | |||
| currChunk uint32 | |||
| currChunkFreqNorm []byte | |||
| currChunkLoc []byte | |||
| freqNormReader *bytes.Reader | |||
| locReader *bytes.Reader | |||
| freqNormReader *segment.MemUvarintReader | |||
| locReader *segment.MemUvarintReader | |||
| freqChunkOffsets []uint64 | |||
| freqChunkStart uint64 | |||
| @@ -387,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error { | |||
| end += e | |||
| i.currChunkFreqNorm = i.postings.sb.mem[start:end] | |||
| if i.freqNormReader == nil { | |||
| i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm) | |||
| i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm) | |||
| } else { | |||
| i.freqNormReader.Reset(i.currChunkFreqNorm) | |||
| } | |||
| @@ -405,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error { | |||
| end += e | |||
| i.currChunkLoc = i.postings.sb.mem[start:end] | |||
| if i.locReader == nil { | |||
| i.locReader = bytes.NewReader(i.currChunkLoc) | |||
| i.locReader = segment.NewMemUvarintReader(i.currChunkLoc) | |||
| } else { | |||
| i.locReader.Reset(i.currChunkLoc) | |||
| } | |||
| @@ -420,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) { | |||
| return 1, i.normBits1Hit, false, nil | |||
| } | |||
| freqHasLocs, err := binary.ReadUvarint(i.freqNormReader) | |||
| freqHasLocs, err := i.freqNormReader.ReadUvarint() | |||
| if err != nil { | |||
| return 0, 0, false, fmt.Errorf("error reading frequency: %v", err) | |||
| } | |||
| freq, hasLocs := decodeFreqHasLocs(freqHasLocs) | |||
| normBits, err := binary.ReadUvarint(i.freqNormReader) | |||
| normBits, err := i.freqNormReader.ReadUvarint() | |||
| if err != nil { | |||
| return 0, 0, false, fmt.Errorf("error reading norm: %v", err) | |||
| } | |||
| return freq, normBits, hasLocs, err | |||
| return freq, normBits, hasLocs, nil | |||
| } | |||
| func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) { | |||
| if i.normBits1Hit != 0 { | |||
| return false, nil | |||
| } | |||
| freqHasLocs, err := i.freqNormReader.ReadUvarint() | |||
| if err != nil { | |||
| return false, fmt.Errorf("error reading freqHasLocs: %v", err) | |||
| } | |||
| i.freqNormReader.SkipUvarint() // Skip normBits. | |||
| return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs. | |||
| } | |||
| func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 { | |||
| @@ -449,58 +462,53 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) { | |||
| } | |||
| // readLocation processes all the integers on the stream representing a single | |||
| // location. if you care about it, pass in a non-nil location struct, and we | |||
| // will fill it. if you don't care about it, pass in nil and we safely consume | |||
| // the contents. | |||
| // location. | |||
| func (i *PostingsIterator) readLocation(l *Location) error { | |||
| // read off field | |||
| fieldID, err := binary.ReadUvarint(i.locReader) | |||
| fieldID, err := i.locReader.ReadUvarint() | |||
| if err != nil { | |||
| return fmt.Errorf("error reading location field: %v", err) | |||
| } | |||
| // read off pos | |||
| pos, err := binary.ReadUvarint(i.locReader) | |||
| pos, err := i.locReader.ReadUvarint() | |||
| if err != nil { | |||
| return fmt.Errorf("error reading location pos: %v", err) | |||
| } | |||
| // read off start | |||
| start, err := binary.ReadUvarint(i.locReader) | |||
| start, err := i.locReader.ReadUvarint() | |||
| if err != nil { | |||
| return fmt.Errorf("error reading location start: %v", err) | |||
| } | |||
| // read off end | |||
| end, err := binary.ReadUvarint(i.locReader) | |||
| end, err := i.locReader.ReadUvarint() | |||
| if err != nil { | |||
| return fmt.Errorf("error reading location end: %v", err) | |||
| } | |||
| // read off num array pos | |||
| numArrayPos, err := binary.ReadUvarint(i.locReader) | |||
| numArrayPos, err := i.locReader.ReadUvarint() | |||
| if err != nil { | |||
| return fmt.Errorf("error reading location num array pos: %v", err) | |||
| } | |||
| // group these together for less branching | |||
| if l != nil { | |||
| l.field = i.postings.sb.fieldsInv[fieldID] | |||
| l.pos = pos | |||
| l.start = start | |||
| l.end = end | |||
| if cap(l.ap) < int(numArrayPos) { | |||
| l.ap = make([]uint64, int(numArrayPos)) | |||
| } else { | |||
| l.ap = l.ap[:int(numArrayPos)] | |||
| } | |||
| l.field = i.postings.sb.fieldsInv[fieldID] | |||
| l.pos = pos | |||
| l.start = start | |||
| l.end = end | |||
| if cap(l.ap) < int(numArrayPos) { | |||
| l.ap = make([]uint64, int(numArrayPos)) | |||
| } else { | |||
| l.ap = l.ap[:int(numArrayPos)] | |||
| } | |||
| // read off array positions | |||
| for k := 0; k < int(numArrayPos); k++ { | |||
| ap, err := binary.ReadUvarint(i.locReader) | |||
| ap, err := i.locReader.ReadUvarint() | |||
| if err != nil { | |||
| return fmt.Errorf("error reading array position: %v", err) | |||
| } | |||
| if l != nil { | |||
| l.ap[k] = ap | |||
| } | |||
| l.ap[k] = ap | |||
| } | |||
| return nil | |||
| @@ -557,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err | |||
| } | |||
| rv.locs = i.nextSegmentLocs[:0] | |||
| numLocsBytes, err := binary.ReadUvarint(i.locReader) | |||
| numLocsBytes, err := i.locReader.ReadUvarint() | |||
| if err != nil { | |||
| return nil, fmt.Errorf("error reading location numLocsBytes: %v", err) | |||
| } | |||
| @@ -613,17 +621,14 @@ func (i *PostingsIterator) nextBytes() ( | |||
| if hasLocs { | |||
| startLoc := len(i.currChunkLoc) - i.locReader.Len() | |||
| numLocsBytes, err := binary.ReadUvarint(i.locReader) | |||
| numLocsBytes, err := i.locReader.ReadUvarint() | |||
| if err != nil { | |||
| return 0, 0, 0, nil, nil, | |||
| fmt.Errorf("error reading location nextBytes numLocs: %v", err) | |||
| } | |||
| // skip over all the location bytes | |||
| _, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent) | |||
| if err != nil { | |||
| return 0, 0, 0, nil, nil, err | |||
| } | |||
| i.locReader.SkipBytes(int(numLocsBytes)) | |||
| endLoc := len(i.currChunkLoc) - i.locReader.Len() | |||
| bytesLoc = i.currChunkLoc[startLoc:endLoc] | |||
| @@ -657,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, | |||
| return i.nextDocNumAtOrAfterClean(atOrAfter) | |||
| } | |||
| n := i.Actual.Next() | |||
| for uint64(n) < atOrAfter && i.Actual.HasNext() { | |||
| n = i.Actual.Next() | |||
| } | |||
| if uint64(n) < atOrAfter { | |||
| i.Actual.AdvanceIfNeeded(uint32(atOrAfter)) | |||
| if !i.Actual.HasNext() { | |||
| // couldn't find anything | |||
| return 0, false, nil | |||
| } | |||
| n := i.Actual.Next() | |||
| allN := i.all.Next() | |||
| nChunk := n / i.postings.sb.chunkFactor | |||
| @@ -701,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, | |||
| // no deletions) where the all bitmap is the same as the actual bitmap | |||
| func (i *PostingsIterator) nextDocNumAtOrAfterClean( | |||
| atOrAfter uint64) (uint64, bool, error) { | |||
| n := i.Actual.Next() | |||
| if !i.includeFreqNorm { | |||
| for uint64(n) < atOrAfter && i.Actual.HasNext() { | |||
| n = i.Actual.Next() | |||
| } | |||
| i.Actual.AdvanceIfNeeded(uint32(atOrAfter)) | |||
| if uint64(n) < atOrAfter { | |||
| if !i.Actual.HasNext() { | |||
| return 0, false, nil // couldn't find anything | |||
| } | |||
| return uint64(n), true, nil | |||
| return uint64(i.Actual.Next()), true, nil | |||
| } | |||
| // freq-norm's needed, so maintain freq-norm chunk reader | |||
| sameChunkNexts := 0 // # of times we called Next() in the same chunk | |||
| n := i.Actual.Next() | |||
| nChunk := n / i.postings.sb.chunkFactor | |||
| for uint64(n) < atOrAfter && i.Actual.HasNext() { | |||
| @@ -764,22 +766,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error { | |||
| } | |||
| // read off freq/offsets even though we don't care about them | |||
| _, _, hasLocs, err := i.readFreqNormHasLocs() | |||
| hasLocs, err := i.skipFreqNormReadHasLocs() | |||
| if err != nil { | |||
| return err | |||
| } | |||
| if i.includeLocs && hasLocs { | |||
| numLocsBytes, err := binary.ReadUvarint(i.locReader) | |||
| numLocsBytes, err := i.locReader.ReadUvarint() | |||
| if err != nil { | |||
| return fmt.Errorf("error reading location numLocsBytes: %v", err) | |||
| } | |||
| // skip over all the location bytes | |||
| _, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| i.locReader.SkipBytes(int(numLocsBytes)) | |||
| } | |||
| return nil | |||
| @@ -20,8 +20,8 @@ import ( | |||
| "fmt" | |||
| "io" | |||
| "os" | |||
| "reflect" | |||
| "sync" | |||
| "unsafe" | |||
| "github.com/RoaringBitmap/roaring" | |||
| "github.com/blevesearch/bleve/index/scorch/segment" | |||
| @@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int | |||
| func init() { | |||
| var sb SegmentBase | |||
| reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size()) | |||
| reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb)) | |||
| } | |||
| // Open returns a zap impl of a segment | |||
| @@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) { | |||
| mem: mm[0 : len(mm)-FooterSize], | |||
| fieldsMap: make(map[string]uint16), | |||
| fieldDvReaders: make(map[uint16]*docValueReader), | |||
| fieldFSTs: make(map[uint16]*vellum.FST), | |||
| }, | |||
| f: f, | |||
| mm: mm, | |||
| @@ -101,6 +102,9 @@ type SegmentBase struct { | |||
| fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field | |||
| fieldDvNames []string // field names cached in fieldDvReaders | |||
| size uint64 | |||
| m sync.Mutex | |||
| fieldFSTs map[uint16]*vellum.FST | |||
| } | |||
| func (sb *SegmentBase) Size() int { | |||
| @@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) { | |||
| dictStart := sb.dictLocs[rv.fieldID] | |||
| if dictStart > 0 { | |||
| // read the length of the vellum data | |||
| vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) | |||
| fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] | |||
| if fstBytes != nil { | |||
| var ok bool | |||
| sb.m.Lock() | |||
| if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok { | |||
| // read the length of the vellum data | |||
| vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) | |||
| fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] | |||
| rv.fst, err = vellum.Load(fstBytes) | |||
| if err != nil { | |||
| sb.m.Unlock() | |||
| return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err) | |||
| } | |||
| rv.fstReader, err = rv.fst.Reader() | |||
| if err != nil { | |||
| return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err) | |||
| } | |||
| sb.fieldFSTs[rv.fieldID] = rv.fst | |||
| } | |||
| sb.m.Unlock() | |||
| rv.fstReader, err = rv.fst.Reader() | |||
| if err != nil { | |||
| return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err) | |||
| } | |||
| } | |||
| } | |||
| @@ -527,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) { | |||
| } | |||
| func (s *SegmentBase) loadDvReaders() error { | |||
| if s.docValueOffset == fieldNotUninverted { | |||
| if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 { | |||
| return nil | |||
| } | |||
| @@ -546,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error { | |||
| } | |||
| read += uint64(n) | |||
| fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd) | |||
| fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| if fieldDvReader != nil { | |||
| s.fieldDvReaders[uint16(fieldID)] = fieldDvReader | |||
| s.fieldDvNames = append(s.fieldDvNames, field) | |||
| @@ -28,13 +28,14 @@ import ( | |||
| "github.com/blevesearch/bleve/index" | |||
| "github.com/blevesearch/bleve/index/scorch/segment" | |||
| "github.com/couchbase/vellum" | |||
| lev2 "github.com/couchbase/vellum/levenshtein2" | |||
| lev "github.com/couchbase/vellum/levenshtein" | |||
| ) | |||
| // re usable, threadsafe levenshtein builders | |||
| var lb1, lb2 *lev2.LevenshteinAutomatonBuilder | |||
| var lb1, lb2 *lev.LevenshteinAutomatonBuilder | |||
| type asynchSegmentResult struct { | |||
| dict segment.TermDictionary | |||
| dictItr segment.DictionaryIterator | |||
| index int | |||
| @@ -51,11 +52,11 @@ func init() { | |||
| var is interface{} = IndexSnapshot{} | |||
| reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size()) | |||
| var err error | |||
| lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true) | |||
| lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true) | |||
| if err != nil { | |||
| panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err)) | |||
| } | |||
| lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true) | |||
| lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true) | |||
| if err != nil { | |||
| panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err)) | |||
| } | |||
| @@ -126,7 +127,9 @@ func (i *IndexSnapshot) updateSize() { | |||
| } | |||
| } | |||
| func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { | |||
| func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, | |||
| makeItr func(i segment.TermDictionary) segment.DictionaryIterator, | |||
| randomLookup bool) (*IndexSnapshotFieldDict, error) { | |||
| results := make(chan *asynchSegmentResult) | |||
| for index, segment := range i.segment { | |||
| @@ -135,7 +138,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s | |||
| if err != nil { | |||
| results <- &asynchSegmentResult{err: err} | |||
| } else { | |||
| results <- &asynchSegmentResult{dictItr: makeItr(dict)} | |||
| if randomLookup { | |||
| results <- &asynchSegmentResult{dict: dict} | |||
| } else { | |||
| results <- &asynchSegmentResult{dictItr: makeItr(dict)} | |||
| } | |||
| } | |||
| }(index, segment) | |||
| } | |||
| @@ -150,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s | |||
| if asr.err != nil && err == nil { | |||
| err = asr.err | |||
| } else { | |||
| next, err2 := asr.dictItr.Next() | |||
| if err2 != nil && err == nil { | |||
| err = err2 | |||
| } | |||
| if next != nil { | |||
| if !randomLookup { | |||
| next, err2 := asr.dictItr.Next() | |||
| if err2 != nil && err == nil { | |||
| err = err2 | |||
| } | |||
| if next != nil { | |||
| rv.cursors = append(rv.cursors, &segmentDictCursor{ | |||
| itr: asr.dictItr, | |||
| curr: *next, | |||
| }) | |||
| } | |||
| } else { | |||
| rv.cursors = append(rv.cursors, &segmentDictCursor{ | |||
| itr: asr.dictItr, | |||
| curr: *next, | |||
| dict: asr.dict, | |||
| }) | |||
| } | |||
| } | |||
| @@ -166,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| // prepare heap | |||
| heap.Init(rv) | |||
| if !randomLookup { | |||
| // prepare heap | |||
| heap.Init(rv) | |||
| } | |||
| return rv, nil | |||
| } | |||
| @@ -175,21 +191,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s | |||
| func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) { | |||
| return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
| return i.Iterator() | |||
| }) | |||
| }, false) | |||
| } | |||
| func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte, | |||
| endTerm []byte) (index.FieldDict, error) { | |||
| return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
| return i.RangeIterator(string(startTerm), string(endTerm)) | |||
| }) | |||
| }, false) | |||
| } | |||
| func (i *IndexSnapshot) FieldDictPrefix(field string, | |||
| termPrefix []byte) (index.FieldDict, error) { | |||
| return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
| return i.PrefixIterator(string(termPrefix)) | |||
| }) | |||
| }, false) | |||
| } | |||
| func (i *IndexSnapshot) FieldDictRegexp(field string, | |||
| @@ -204,7 +220,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string, | |||
| return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
| return i.AutomatonIterator(a, prefixBeg, prefixEnd) | |||
| }) | |||
| }, false) | |||
| } | |||
| func (i *IndexSnapshot) getLevAutomaton(term string, | |||
| @@ -232,14 +248,18 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string, | |||
| return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
| return i.AutomatonIterator(a, prefixBeg, prefixEnd) | |||
| }) | |||
| }, false) | |||
| } | |||
| func (i *IndexSnapshot) FieldDictOnly(field string, | |||
| onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) { | |||
| return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
| return i.OnlyIterator(onlyTerms, includeCount) | |||
| }) | |||
| }, false) | |||
| } | |||
| func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) { | |||
| return i.newIndexSnapshotFieldDict(field, nil, true) | |||
| } | |||
| func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) { | |||
| @@ -22,6 +22,7 @@ import ( | |||
| ) | |||
| type segmentDictCursor struct { | |||
| dict segment.TermDictionary | |||
| itr segment.DictionaryIterator | |||
| curr index.DictEntry | |||
| } | |||
| @@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) { | |||
| func (i *IndexSnapshotFieldDict) Close() error { | |||
| return nil | |||
| } | |||
| func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) { | |||
| if len(i.cursors) == 0 { | |||
| return false, nil | |||
| } | |||
| for _, cursor := range i.cursors { | |||
| if found, _ := cursor.dict.Contains(key); found { | |||
| return true, nil | |||
| } | |||
| } | |||
| return false, nil | |||
| } | |||
| @@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) { | |||
| } | |||
| type cachedDocs struct { | |||
| size uint64 | |||
| m sync.Mutex // As the cache is asynchronously prepared, need a lock | |||
| cache map[string]*cachedFieldDocs // Keyed by field | |||
| size uint64 | |||
| } | |||
| func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error { | |||
| @@ -107,6 +107,9 @@ type Stats struct { | |||
| TotFileMergeIntroductionsDone uint64 | |||
| TotFileMergeIntroductionsSkipped uint64 | |||
| CurFilesIneligibleForRemoval uint64 | |||
| TotSnapshotsRemovedFromMetaStore uint64 | |||
| TotMemMergeBeg uint64 | |||
| TotMemMergeErr uint64 | |||
| TotMemMergeDone uint64 | |||
| @@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error { | |||
| func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { | |||
| // do analysis before acquiring write lock | |||
| analysisStart := time.Now() | |||
| numPlainTextBytes := doc.NumPlainTextBytes() | |||
| resultChan := make(chan *index.AnalysisResult) | |||
| aw := index.NewAnalysisWork(udc, doc, resultChan) | |||
| @@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { | |||
| return | |||
| } | |||
| return udc.UpdateWithAnalysis(doc, result, backIndexRow) | |||
| } | |||
| func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document, | |||
| result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) { | |||
| // start a writer for this update | |||
| indexStart := time.Now() | |||
| var kvwriter store.KVWriter | |||
| @@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { | |||
| atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) | |||
| if err == nil { | |||
| atomic.AddUint64(&udc.stats.updates, 1) | |||
| atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) | |||
| atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes()) | |||
| } else { | |||
| atomic.AddUint64(&udc.stats.errors, 1) | |||
| } | |||
| @@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) [] | |||
| } | |||
| func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { | |||
| persistedCallback := batch.PersistedCallback() | |||
| if persistedCallback != nil { | |||
| defer persistedCallback(err) | |||
| } | |||
| analysisStart := time.Now() | |||
| resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) | |||
| @@ -810,7 +818,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { | |||
| } | |||
| } | |||
| if len(batch.IndexOps) > 0 { | |||
| if numUpdates > 0 { | |||
| go func() { | |||
| for _, doc := range batch.IndexOps { | |||
| if doc != nil { | |||
| @@ -961,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { | |||
| atomic.AddUint64(&udc.stats.errors, 1) | |||
| } | |||
| persistedCallback := batch.PersistedCallback() | |||
| if persistedCallback != nil { | |||
| persistedCallback(err) | |||
| } | |||
| return | |||
| } | |||
| @@ -434,6 +434,8 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest { | |||
| Sort: req.Sort.Copy(), | |||
| IncludeLocations: req.IncludeLocations, | |||
| Score: req.Score, | |||
| SearchAfter: req.SearchAfter, | |||
| SearchBefore: req.SearchBefore, | |||
| } | |||
| return &rv | |||
| } | |||
| @@ -451,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se | |||
| searchStart := time.Now() | |||
| asyncResults := make(chan *asyncSearchResult, len(indexes)) | |||
| var reverseQueryExecution bool | |||
| if req.SearchBefore != nil { | |||
| reverseQueryExecution = true | |||
| req.Sort.Reverse() | |||
| req.SearchAfter = req.SearchBefore | |||
| req.SearchBefore = nil | |||
| } | |||
| // run search on each index in separate go routine | |||
| var waitGroup sync.WaitGroup | |||
| @@ -503,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se | |||
| // sort all hits with the requested order | |||
| if len(req.Sort) > 0 { | |||
| sorter := newMultiSearchHitSorter(req.Sort, sr.Hits) | |||
| sorter := newSearchHitSorter(req.Sort, sr.Hits) | |||
| sort.Sort(sorter) | |||
| } | |||
| @@ -524,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se | |||
| sr.Facets.Fixup(name, fr.Size) | |||
| } | |||
| if reverseQueryExecution { | |||
| // reverse the sort back to the original | |||
| req.Sort.Reverse() | |||
| // resort using the original order | |||
| mhs := newSearchHitSorter(req.Sort, sr.Hits) | |||
| sort.Sort(mhs) | |||
| // reset request | |||
| req.SearchBefore = req.SearchAfter | |||
| req.SearchAfter = nil | |||
| } | |||
| // fix up original request | |||
| sr.Request = req | |||
| searchDuration := time.Since(searchStart) | |||
| @@ -581,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error { | |||
| defer f.index.mutex.RUnlock() | |||
| return f.fieldDict.Close() | |||
| } | |||
| type multiSearchHitSorter struct { | |||
| hits search.DocumentMatchCollection | |||
| sort search.SortOrder | |||
| cachedScoring []bool | |||
| cachedDesc []bool | |||
| } | |||
| func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter { | |||
| return &multiSearchHitSorter{ | |||
| sort: sort, | |||
| hits: hits, | |||
| cachedScoring: sort.CacheIsScore(), | |||
| cachedDesc: sort.CacheDescending(), | |||
| } | |||
| } | |||
| func (m *multiSearchHitSorter) Len() int { return len(m.hits) } | |||
| func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } | |||
| func (m *multiSearchHitSorter) Less(i, j int) bool { | |||
| c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) | |||
| return c < 0 | |||
| } | |||
| @@ -19,6 +19,7 @@ import ( | |||
| "encoding/json" | |||
| "fmt" | |||
| "os" | |||
| "sort" | |||
| "sync" | |||
| "sync/atomic" | |||
| "time" | |||
| @@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
| return nil, ErrorIndexClosed | |||
| } | |||
| collector := collector.NewTopNCollector(req.Size, req.From, req.Sort) | |||
| var reverseQueryExecution bool | |||
| if req.SearchBefore != nil { | |||
| reverseQueryExecution = true | |||
| req.Sort.Reverse() | |||
| req.SearchAfter = req.SearchBefore | |||
| req.SearchBefore = nil | |||
| } | |||
| var coll *collector.TopNCollector | |||
| if req.SearchAfter != nil { | |||
| coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter) | |||
| } else { | |||
| coll = collector.NewTopNCollector(req.Size, req.From, req.Sort) | |||
| } | |||
| // open a reader for this search | |||
| indexReader, err := i.i.Reader() | |||
| @@ -494,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
| facetsBuilder.Add(facetName, facetBuilder) | |||
| } | |||
| } | |||
| collector.SetFacetsBuilder(facetsBuilder) | |||
| coll.SetFacetsBuilder(facetsBuilder) | |||
| } | |||
| memNeeded := memNeededForSearch(req, searcher, collector) | |||
| memNeeded := memNeededForSearch(req, searcher, coll) | |||
| if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil { | |||
| if cbF, ok := cb.(SearchQueryStartCallbackFn); ok { | |||
| err = cbF(memNeeded) | |||
| @@ -515,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
| } | |||
| } | |||
| err = collector.Collect(ctx, searcher, indexReader) | |||
| err = coll.Collect(ctx, searcher, indexReader) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| hits := collector.Results() | |||
| hits := coll.Results() | |||
| var highlighter highlight.Highlighter | |||
| @@ -542,71 +556,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
| } | |||
| for _, hit := range hits { | |||
| if len(req.Fields) > 0 || highlighter != nil { | |||
| doc, err := indexReader.Document(hit.ID) | |||
| if err == nil && doc != nil { | |||
| if len(req.Fields) > 0 { | |||
| fieldsToLoad := deDuplicate(req.Fields) | |||
| for _, f := range fieldsToLoad { | |||
| for _, docF := range doc.Fields { | |||
| if f == "*" || docF.Name() == f { | |||
| var value interface{} | |||
| switch docF := docF.(type) { | |||
| case *document.TextField: | |||
| value = string(docF.Value()) | |||
| case *document.NumericField: | |||
| num, err := docF.Number() | |||
| if err == nil { | |||
| value = num | |||
| } | |||
| case *document.DateTimeField: | |||
| datetime, err := docF.DateTime() | |||
| if err == nil { | |||
| value = datetime.Format(time.RFC3339) | |||
| } | |||
| case *document.BooleanField: | |||
| boolean, err := docF.Boolean() | |||
| if err == nil { | |||
| value = boolean | |||
| } | |||
| case *document.GeoPointField: | |||
| lon, err := docF.Lon() | |||
| if err == nil { | |||
| lat, err := docF.Lat() | |||
| if err == nil { | |||
| value = []float64{lon, lat} | |||
| } | |||
| } | |||
| } | |||
| if value != nil { | |||
| hit.AddFieldValue(docF.Name(), value) | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| if highlighter != nil { | |||
| highlightFields := req.Highlight.Fields | |||
| if highlightFields == nil { | |||
| // add all fields with matches | |||
| highlightFields = make([]string, 0, len(hit.Locations)) | |||
| for k := range hit.Locations { | |||
| highlightFields = append(highlightFields, k) | |||
| } | |||
| } | |||
| for _, hf := range highlightFields { | |||
| highlighter.BestFragmentsInField(hit, doc, hf, 1) | |||
| } | |||
| } | |||
| } else if doc == nil { | |||
| // unexpected case, a doc ID that was found as a search hit | |||
| // was unable to be found during document lookup | |||
| return nil, ErrorIndexReadInconsistency | |||
| } | |||
| } | |||
| if i.name != "" { | |||
| hit.Index = i.name | |||
| } | |||
| err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| } | |||
| atomic.AddUint64(&i.stats.searches, 1) | |||
| @@ -618,6 +574,17 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
| logger.Printf("slow search took %s - %v", searchDuration, req) | |||
| } | |||
| if reverseQueryExecution { | |||
| // reverse the sort back to the original | |||
| req.Sort.Reverse() | |||
| // resort using the original order | |||
| mhs := newSearchHitSorter(req.Sort, hits) | |||
| sort.Sort(mhs) | |||
| // reset request | |||
| req.SearchBefore = req.SearchAfter | |||
| req.SearchAfter = nil | |||
| } | |||
| return &SearchResult{ | |||
| Status: &SearchStatus{ | |||
| Total: 1, | |||
| @@ -625,13 +592,82 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
| }, | |||
| Request: req, | |||
| Hits: hits, | |||
| Total: collector.Total(), | |||
| MaxScore: collector.MaxScore(), | |||
| Total: coll.Total(), | |||
| MaxScore: coll.MaxScore(), | |||
| Took: searchDuration, | |||
| Facets: collector.FacetResults(), | |||
| Facets: coll.FacetResults(), | |||
| }, nil | |||
| } | |||
| func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest, | |||
| indexName string, r index.IndexReader, | |||
| highlighter highlight.Highlighter) error { | |||
| if len(req.Fields) > 0 || highlighter != nil { | |||
| doc, err := r.Document(hit.ID) | |||
| if err == nil && doc != nil { | |||
| if len(req.Fields) > 0 { | |||
| fieldsToLoad := deDuplicate(req.Fields) | |||
| for _, f := range fieldsToLoad { | |||
| for _, docF := range doc.Fields { | |||
| if f == "*" || docF.Name() == f { | |||
| var value interface{} | |||
| switch docF := docF.(type) { | |||
| case *document.TextField: | |||
| value = string(docF.Value()) | |||
| case *document.NumericField: | |||
| num, err := docF.Number() | |||
| if err == nil { | |||
| value = num | |||
| } | |||
| case *document.DateTimeField: | |||
| datetime, err := docF.DateTime() | |||
| if err == nil { | |||
| value = datetime.Format(time.RFC3339) | |||
| } | |||
| case *document.BooleanField: | |||
| boolean, err := docF.Boolean() | |||
| if err == nil { | |||
| value = boolean | |||
| } | |||
| case *document.GeoPointField: | |||
| lon, err := docF.Lon() | |||
| if err == nil { | |||
| lat, err := docF.Lat() | |||
| if err == nil { | |||
| value = []float64{lon, lat} | |||
| } | |||
| } | |||
| } | |||
| if value != nil { | |||
| hit.AddFieldValue(docF.Name(), value) | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| if highlighter != nil { | |||
| highlightFields := req.Highlight.Fields | |||
| if highlightFields == nil { | |||
| // add all fields with matches | |||
| highlightFields = make([]string, 0, len(hit.Locations)) | |||
| for k := range hit.Locations { | |||
| highlightFields = append(highlightFields, k) | |||
| } | |||
| } | |||
| for _, hf := range highlightFields { | |||
| highlighter.BestFragmentsInField(hit, doc, hf, 1) | |||
| } | |||
| } | |||
| } else if doc == nil { | |||
| // unexpected case, a doc ID that was found as a search hit | |||
| // was unable to be found during document lookup | |||
| return ErrorIndexReadInconsistency | |||
| } | |||
| } | |||
| return nil | |||
| } | |||
| // Fields returns the name of all the fields this | |||
| // Index has operated on. | |||
| func (i *indexImpl) Fields() (fields []string, err error) { | |||
| @@ -854,3 +890,26 @@ func deDuplicate(fields []string) []string { | |||
| } | |||
| return ret | |||
| } | |||
| type searchHitSorter struct { | |||
| hits search.DocumentMatchCollection | |||
| sort search.SortOrder | |||
| cachedScoring []bool | |||
| cachedDesc []bool | |||
| } | |||
| func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter { | |||
| return &searchHitSorter{ | |||
| sort: sort, | |||
| hits: hits, | |||
| cachedScoring: sort.CacheIsScore(), | |||
| cachedDesc: sort.CacheDescending(), | |||
| } | |||
| } | |||
| func (m *searchHitSorter) Len() int { return len(m.hits) } | |||
| func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } | |||
| func (m *searchHitSorter) Less(i, j int) bool { | |||
| c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) | |||
| return c < 0 | |||
| } | |||
| @@ -525,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, | |||
| if !propertyValue.IsNil() { | |||
| switch property := property.(type) { | |||
| case encoding.TextMarshaler: | |||
| txt, err := property.MarshalText() | |||
| if err == nil && subDocMapping != nil { | |||
| // index by explicit mapping | |||
| // ONLY process TextMarshaler if there is an explicit mapping | |||
| // AND all of the fiels are of type text | |||
| // OTHERWISE process field without TextMarshaler | |||
| if subDocMapping != nil { | |||
| allFieldsText := true | |||
| for _, fieldMapping := range subDocMapping.Fields { | |||
| if fieldMapping.Type == "text" { | |||
| fieldMapping.processString(string(txt), pathString, path, indexes, context) | |||
| if fieldMapping.Type != "text" { | |||
| allFieldsText = false | |||
| break | |||
| } | |||
| } | |||
| } else { | |||
| dm.walkDocument(property, path, indexes, context) | |||
| txt, err := property.MarshalText() | |||
| if err == nil && allFieldsText { | |||
| txtStr := string(txt) | |||
| for _, fieldMapping := range subDocMapping.Fields { | |||
| fieldMapping.processString(txtStr, pathString, path, indexes, context) | |||
| } | |||
| return | |||
| } | |||
| } | |||
| dm.walkDocument(property, path, indexes, context) | |||
| default: | |||
| dm.walkDocument(property, path, indexes, context) | |||
| } | |||
| @@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20 | |||
| type PrefixCoded []byte | |||
| func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { | |||
| rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil) | |||
| return rv, err | |||
| } | |||
| func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) ( | |||
| rv PrefixCoded, preallocRest []byte, err error) { | |||
| if shift > 63 { | |||
| return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) | |||
| return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) | |||
| } | |||
| nChars := ((63 - shift) / 7) + 1 | |||
| rv := make(PrefixCoded, nChars+1) | |||
| size := int(nChars + 1) | |||
| if len(prealloc) >= size { | |||
| rv = PrefixCoded(prealloc[0:size]) | |||
| preallocRest = prealloc[size:] | |||
| } else { | |||
| rv = make(PrefixCoded, size) | |||
| } | |||
| rv[0] = ShiftStartInt64 + byte(shift) | |||
| sortableBits := int64(uint64(in) ^ 0x8000000000000000) | |||
| @@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { | |||
| nChars-- | |||
| sortableBits = int64(uint64(sortableBits) >> 7) | |||
| } | |||
| return rv, nil | |||
| return rv, preallocRest, nil | |||
| } | |||
| func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded { | |||
| @@ -262,6 +262,8 @@ func (h *HighlightRequest) AddField(field string) { | |||
| // result score explanations. | |||
| // Sort describes the desired order for the results to be returned. | |||
| // Score controls the kind of scoring performed | |||
| // SearchAfter supports deep paging by providing a minimum sort key | |||
| // SearchBefore supports deep paging by providing a maximum sort key | |||
| // | |||
| // A special field named "*" can be used to return all fields. | |||
| type SearchRequest struct { | |||
| @@ -275,6 +277,8 @@ type SearchRequest struct { | |||
| Sort search.SortOrder `json:"sort"` | |||
| IncludeLocations bool `json:"includeLocations"` | |||
| Score string `json:"score,omitempty"` | |||
| SearchAfter []string `json:"search_after"` | |||
| SearchBefore []string `json:"search_before"` | |||
| } | |||
| func (r *SearchRequest) Validate() error { | |||
| @@ -285,6 +289,27 @@ func (r *SearchRequest) Validate() error { | |||
| } | |||
| } | |||
| if r.SearchAfter != nil && r.SearchBefore != nil { | |||
| return fmt.Errorf("cannot use search after and search before together") | |||
| } | |||
| if r.SearchAfter != nil { | |||
| if r.From != 0 { | |||
| return fmt.Errorf("cannot use search after with from !=0") | |||
| } | |||
| if len(r.SearchAfter) != len(r.Sort) { | |||
| return fmt.Errorf("search after must have same size as sort order") | |||
| } | |||
| } | |||
| if r.SearchBefore != nil { | |||
| if r.From != 0 { | |||
| return fmt.Errorf("cannot use search before with from !=0") | |||
| } | |||
| if len(r.SearchBefore) != len(r.Sort) { | |||
| return fmt.Errorf("search before must have same size as sort order") | |||
| } | |||
| } | |||
| return r.Facets.Validate() | |||
| } | |||
| @@ -311,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) { | |||
| r.Sort = order | |||
| } | |||
| // SetSearchAfter sets the request to skip over hits with a sort | |||
| // value less than the provided sort after key | |||
| func (r *SearchRequest) SetSearchAfter(after []string) { | |||
| r.SearchAfter = after | |||
| } | |||
| // SetSearchBefore sets the request to skip over hits with a sort | |||
| // value greater than the provided sort before key | |||
| func (r *SearchRequest) SetSearchBefore(before []string) { | |||
| r.SearchBefore = before | |||
| } | |||
| // UnmarshalJSON deserializes a JSON representation of | |||
| // a SearchRequest | |||
| func (r *SearchRequest) UnmarshalJSON(input []byte) error { | |||
| @@ -325,6 +362,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { | |||
| Sort []json.RawMessage `json:"sort"` | |||
| IncludeLocations bool `json:"includeLocations"` | |||
| Score string `json:"score"` | |||
| SearchAfter []string `json:"search_after"` | |||
| SearchBefore []string `json:"search_before"` | |||
| } | |||
| err := json.Unmarshal(input, &temp) | |||
| @@ -352,6 +391,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { | |||
| r.Facets = temp.Facets | |||
| r.IncludeLocations = temp.IncludeLocations | |||
| r.Score = temp.Score | |||
| r.SearchAfter = temp.SearchAfter | |||
| r.SearchBefore = temp.SearchBefore | |||
| r.Query, err = query.ParseQuery(temp.Q) | |||
| if err != nil { | |||
| return err | |||
| @@ -69,6 +69,7 @@ type TopNCollector struct { | |||
| lowestMatchOutsideResults *search.DocumentMatch | |||
| updateFieldVisitor index.DocumentFieldTermVisitor | |||
| dvReader index.DocValueReader | |||
| searchAfter *search.DocumentMatch | |||
| } | |||
| // CheckDoneEvery controls how frequently we check the context deadline | |||
| @@ -78,6 +79,21 @@ const CheckDoneEvery = uint64(1024) | |||
| // skipping over the first 'skip' hits | |||
| // ordering hits by the provided sort order | |||
| func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { | |||
| return newTopNCollector(size, skip, sort) | |||
| } | |||
| // NewTopNCollector builds a collector to find the top 'size' hits | |||
| // skipping over the first 'skip' hits | |||
| // ordering hits by the provided sort order | |||
| func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector { | |||
| rv := newTopNCollector(size, 0, sort) | |||
| rv.searchAfter = &search.DocumentMatch{ | |||
| Sort: after, | |||
| } | |||
| return rv | |||
| } | |||
| func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { | |||
| hc := &TopNCollector{size: size, skip: skip, sort: sort} | |||
| // pre-allocate space on the store to avoid reslicing | |||
| @@ -141,6 +157,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, | |||
| searchContext := &search.SearchContext{ | |||
| DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)), | |||
| Collector: hc, | |||
| IndexReader: reader, | |||
| } | |||
| hc.dvReader, err = reader.DocValueReader(hc.neededFields) | |||
| @@ -265,6 +282,19 @@ func MakeTopNDocumentMatchHandler( | |||
| if d == nil { | |||
| return nil | |||
| } | |||
| // support search after based pagination, | |||
| // if this hit is <= the search after sort key | |||
| // we should skip it | |||
| if hc.searchAfter != nil { | |||
| // exact sort order matches use hit number to break tie | |||
| // but we want to allow for exact match, so we pretend | |||
| hc.searchAfter.HitNumber = d.HitNumber | |||
| if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 { | |||
| return nil | |||
| } | |||
| } | |||
| // optimization, we track lowest sorting hit already removed from heap | |||
| // with this one comparison, we can avoid all heap operations if | |||
| // this hit would have been added and then immediately removed | |||
| @@ -41,6 +41,14 @@ type BleveQueryTime struct { | |||
| time.Time | |||
| } | |||
| var MinRFC3339CompatibleTime time.Time | |||
| var MaxRFC3339CompatibleTime time.Time | |||
| func init() { | |||
| MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z") | |||
| MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z") | |||
| } | |||
| func queryTimeFromString(t string) (time.Time, error) { | |||
| dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser) | |||
| if err != nil { | |||
| @@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { | |||
| min := math.Inf(-1) | |||
| max := math.Inf(1) | |||
| if !q.Start.IsZero() { | |||
| min = numeric.Int64ToFloat64(q.Start.UnixNano()) | |||
| if !isDatetimeCompatible(q.Start) { | |||
| // overflow | |||
| return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start) | |||
| } | |||
| startInt64 := q.Start.UnixNano() | |||
| min = numeric.Int64ToFloat64(startInt64) | |||
| } | |||
| if !q.End.IsZero() { | |||
| max = numeric.Int64ToFloat64(q.End.UnixNano()) | |||
| if !isDatetimeCompatible(q.End) { | |||
| // overflow | |||
| return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End) | |||
| } | |||
| endInt64 := q.End.UnixNano() | |||
| max = numeric.Int64ToFloat64(endInt64) | |||
| } | |||
| return &min, &max, nil | |||
| @@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error { | |||
| } | |||
| return nil | |||
| } | |||
| func isDatetimeCompatible(t BleveQueryTime) bool { | |||
| if QueryDateTimeFormat == time.RFC3339 && | |||
| (t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) { | |||
| return false | |||
| } | |||
| return true | |||
| } | |||
| @@ -80,12 +80,6 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, | |||
| if len(ss) < 1 { | |||
| return searcher.NewMatchNoneSearcher(i) | |||
| } else if len(ss) == 1 && int(q.Min) == ss[0].Min() { | |||
| // apply optimization only if both conditions below are satisfied: | |||
| // - disjunction searcher has only 1 child searcher | |||
| // - parent searcher's min setting is equal to child searcher's min | |||
| return ss[0], nil | |||
| } | |||
| return searcher.NewDisjunctionSearcher(i, ss, q.Min, options) | |||
| @@ -0,0 +1,94 @@ | |||
| // Copyright (c) 2019 Couchbase, Inc. | |||
| // | |||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||
| // you may not use this file except in compliance with the License. | |||
| // You may obtain a copy of the License at | |||
| // | |||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software | |||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| package query | |||
| import ( | |||
| "encoding/json" | |||
| "fmt" | |||
| "github.com/blevesearch/bleve/geo" | |||
| "github.com/blevesearch/bleve/index" | |||
| "github.com/blevesearch/bleve/mapping" | |||
| "github.com/blevesearch/bleve/search" | |||
| "github.com/blevesearch/bleve/search/searcher" | |||
| ) | |||
| type GeoBoundingPolygonQuery struct { | |||
| Points []geo.Point `json:"polygon_points"` | |||
| FieldVal string `json:"field,omitempty"` | |||
| BoostVal *Boost `json:"boost,omitempty"` | |||
| } | |||
| func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery { | |||
| return &GeoBoundingPolygonQuery{ | |||
| Points: points} | |||
| } | |||
| func (q *GeoBoundingPolygonQuery) SetBoost(b float64) { | |||
| boost := Boost(b) | |||
| q.BoostVal = &boost | |||
| } | |||
| func (q *GeoBoundingPolygonQuery) Boost() float64 { | |||
| return q.BoostVal.Value() | |||
| } | |||
| func (q *GeoBoundingPolygonQuery) SetField(f string) { | |||
| q.FieldVal = f | |||
| } | |||
| func (q *GeoBoundingPolygonQuery) Field() string { | |||
| return q.FieldVal | |||
| } | |||
| func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader, | |||
| m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
| field := q.FieldVal | |||
| if q.FieldVal == "" { | |||
| field = m.DefaultSearchField() | |||
| } | |||
| return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options) | |||
| } | |||
| func (q *GeoBoundingPolygonQuery) Validate() error { | |||
| return nil | |||
| } | |||
| func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error { | |||
| tmp := struct { | |||
| Points []interface{} `json:"polygon_points"` | |||
| FieldVal string `json:"field,omitempty"` | |||
| BoostVal *Boost `json:"boost,omitempty"` | |||
| }{} | |||
| err := json.Unmarshal(data, &tmp) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| q.Points = make([]geo.Point, 0, len(tmp.Points)) | |||
| for _, i := range tmp.Points { | |||
| // now use our generic point parsing code from the geo package | |||
| lon, lat, found := geo.ExtractGeoPoint(i) | |||
| if !found { | |||
| return fmt.Errorf("geo polygon point: %v is not in a valid format", i) | |||
| } | |||
| q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat}) | |||
| } | |||
| q.FieldVal = tmp.FieldVal | |||
| q.BoostVal = tmp.BoostVal | |||
| return nil | |||
| } | |||
| @@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) { | |||
| } | |||
| return &rv, nil | |||
| } | |||
| _, hasPoints := tmp["polygon_points"] | |||
| if hasPoints { | |||
| var rv GeoBoundingPolygonQuery | |||
| err := json.Unmarshal(input, &rv) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| return &rv, nil | |||
| } | |||
| return nil, fmt.Errorf("unknown query type") | |||
| } | |||
| @@ -40,6 +40,7 @@ type TermQueryScorer struct { | |||
| idf float64 | |||
| options search.SearcherOptions | |||
| idfExplanation *search.Explanation | |||
| includeScore bool | |||
| queryNorm float64 | |||
| queryWeight float64 | |||
| queryWeightExplanation *search.Explanation | |||
| @@ -62,14 +63,15 @@ func (s *TermQueryScorer) Size() int { | |||
| func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer { | |||
| rv := TermQueryScorer{ | |||
| queryTerm: string(queryTerm), | |||
| queryField: queryField, | |||
| queryBoost: queryBoost, | |||
| docTerm: docTerm, | |||
| docTotal: docTotal, | |||
| idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), | |||
| options: options, | |||
| queryWeight: 1.0, | |||
| queryTerm: string(queryTerm), | |||
| queryField: queryField, | |||
| queryBoost: queryBoost, | |||
| docTerm: docTerm, | |||
| docTotal: docTotal, | |||
| idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), | |||
| options: options, | |||
| queryWeight: 1.0, | |||
| includeScore: options.Score != "none", | |||
| } | |||
| if options.Explain { | |||
| @@ -113,56 +115,61 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) { | |||
| } | |||
| func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch { | |||
| var scoreExplanation *search.Explanation | |||
| // need to compute score | |||
| var tf float64 | |||
| if termMatch.Freq < MaxSqrtCache { | |||
| tf = SqrtCache[int(termMatch.Freq)] | |||
| } else { | |||
| tf = math.Sqrt(float64(termMatch.Freq)) | |||
| } | |||
| score := tf * termMatch.Norm * s.idf | |||
| if s.options.Explain { | |||
| childrenExplanations := make([]*search.Explanation, 3) | |||
| childrenExplanations[0] = &search.Explanation{ | |||
| Value: tf, | |||
| Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq), | |||
| } | |||
| childrenExplanations[1] = &search.Explanation{ | |||
| Value: termMatch.Norm, | |||
| Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID), | |||
| } | |||
| childrenExplanations[2] = s.idfExplanation | |||
| scoreExplanation = &search.Explanation{ | |||
| Value: score, | |||
| Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID), | |||
| Children: childrenExplanations, | |||
| rv := ctx.DocumentMatchPool.Get() | |||
| // perform any score computations only when needed | |||
| if s.includeScore || s.options.Explain { | |||
| var scoreExplanation *search.Explanation | |||
| var tf float64 | |||
| if termMatch.Freq < MaxSqrtCache { | |||
| tf = SqrtCache[int(termMatch.Freq)] | |||
| } else { | |||
| tf = math.Sqrt(float64(termMatch.Freq)) | |||
| } | |||
| } | |||
| score := tf * termMatch.Norm * s.idf | |||
| // if the query weight isn't 1, multiply | |||
| if s.queryWeight != 1.0 { | |||
| score = score * s.queryWeight | |||
| if s.options.Explain { | |||
| childExplanations := make([]*search.Explanation, 2) | |||
| childExplanations[0] = s.queryWeightExplanation | |||
| childExplanations[1] = scoreExplanation | |||
| childrenExplanations := make([]*search.Explanation, 3) | |||
| childrenExplanations[0] = &search.Explanation{ | |||
| Value: tf, | |||
| Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq), | |||
| } | |||
| childrenExplanations[1] = &search.Explanation{ | |||
| Value: termMatch.Norm, | |||
| Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID), | |||
| } | |||
| childrenExplanations[2] = s.idfExplanation | |||
| scoreExplanation = &search.Explanation{ | |||
| Value: score, | |||
| Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID), | |||
| Children: childExplanations, | |||
| Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID), | |||
| Children: childrenExplanations, | |||
| } | |||
| } | |||
| // if the query weight isn't 1, multiply | |||
| if s.queryWeight != 1.0 { | |||
| score = score * s.queryWeight | |||
| if s.options.Explain { | |||
| childExplanations := make([]*search.Explanation, 2) | |||
| childExplanations[0] = s.queryWeightExplanation | |||
| childExplanations[1] = scoreExplanation | |||
| scoreExplanation = &search.Explanation{ | |||
| Value: score, | |||
| Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID), | |||
| Children: childExplanations, | |||
| } | |||
| } | |||
| } | |||
| if s.includeScore { | |||
| rv.Score = score | |||
| } | |||
| if s.options.Explain { | |||
| rv.Expl = scoreExplanation | |||
| } | |||
| } | |||
| rv := ctx.DocumentMatchPool.Get() | |||
| rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...) | |||
| rv.Score = score | |||
| if s.options.Explain { | |||
| rv.Expl = scoreExplanation | |||
| } | |||
| if len(termMatch.Vectors) > 0 { | |||
| if cap(rv.FieldTermLocations) < len(termMatch.Vectors) { | |||
| @@ -17,6 +17,7 @@ package search | |||
| import ( | |||
| "fmt" | |||
| "reflect" | |||
| "sort" | |||
| "github.com/blevesearch/bleve/index" | |||
| "github.com/blevesearch/bleve/size" | |||
| @@ -49,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool { | |||
| return true | |||
| } | |||
| func (ap ArrayPositions) Compare(other ArrayPositions) int { | |||
| for i, p := range ap { | |||
| if i >= len(other) { | |||
| return 1 | |||
| } | |||
| if p < other[i] { | |||
| return -1 | |||
| } | |||
| if p > other[i] { | |||
| return 1 | |||
| } | |||
| } | |||
| if len(ap) < len(other) { | |||
| return -1 | |||
| } | |||
| return 0 | |||
| } | |||
| type Location struct { | |||
| // Pos is the position of the term within the field, starting at 1 | |||
| Pos uint64 `json:"pos"` | |||
| @@ -68,6 +87,46 @@ func (l *Location) Size() int { | |||
| type Locations []*Location | |||
| func (p Locations) Len() int { return len(p) } | |||
| func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] } | |||
| func (p Locations) Less(i, j int) bool { | |||
| c := p[i].ArrayPositions.Compare(p[j].ArrayPositions) | |||
| if c < 0 { | |||
| return true | |||
| } | |||
| if c > 0 { | |||
| return false | |||
| } | |||
| return p[i].Pos < p[j].Pos | |||
| } | |||
| func (p Locations) Dedupe() Locations { // destructive! | |||
| if len(p) <= 1 { | |||
| return p | |||
| } | |||
| sort.Sort(p) | |||
| slow := 0 | |||
| for _, pfast := range p { | |||
| pslow := p[slow] | |||
| if pslow.Pos == pfast.Pos && | |||
| pslow.Start == pfast.Start && | |||
| pslow.End == pfast.End && | |||
| pslow.ArrayPositions.Equals(pfast.ArrayPositions) { | |||
| continue // duplicate, so only move fast ahead | |||
| } | |||
| slow++ | |||
| p[slow] = pfast | |||
| } | |||
| return p[:slow+1] | |||
| } | |||
| type TermLocationMap map[string]Locations | |||
| func (t TermLocationMap) AddLocation(term string, location *Location) { | |||
| @@ -208,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { | |||
| var lastField string | |||
| var tlm TermLocationMap | |||
| var needsDedupe bool | |||
| for i, ftl := range dm.FieldTermLocations { | |||
| if lastField != ftl.Field { | |||
| @@ -231,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { | |||
| loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...) | |||
| } | |||
| tlm[ftl.Term] = append(tlm[ftl.Term], loc) | |||
| locs := tlm[ftl.Term] | |||
| // if the loc is before or at the last location, then there | |||
| // might be duplicates that need to be deduplicated | |||
| if !needsDedupe && len(locs) > 0 { | |||
| last := locs[len(locs)-1] | |||
| cmp := loc.ArrayPositions.Compare(last.ArrayPositions) | |||
| if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) { | |||
| needsDedupe = true | |||
| } | |||
| } | |||
| tlm[ftl.Term] = append(locs, loc) | |||
| dm.FieldTermLocations[i] = FieldTermLocation{ // recycle | |||
| Location: Location{ | |||
| @@ -239,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { | |||
| }, | |||
| } | |||
| } | |||
| if needsDedupe { | |||
| for _, tlm := range dm.Locations { | |||
| for term, locs := range tlm { | |||
| tlm[term] = locs.Dedupe() | |||
| } | |||
| } | |||
| } | |||
| } | |||
| dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle | |||
| @@ -279,6 +359,7 @@ type SearcherOptions struct { | |||
| type SearchContext struct { | |||
| DocumentMatchPool *DocumentMatchPool | |||
| Collector Collector | |||
| IndexReader index.IndexReader | |||
| } | |||
| func (sc *SearchContext) Size() int { | |||
| @@ -45,6 +45,7 @@ type BooleanSearcher struct { | |||
| scorer *scorer.ConjunctionQueryScorer | |||
| matches []*search.DocumentMatch | |||
| initialized bool | |||
| done bool | |||
| } | |||
| func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) { | |||
| @@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) { | |||
| func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { | |||
| if s.done { | |||
| return nil, nil | |||
| } | |||
| if !s.initialized { | |||
| err := s.initSearchers(ctx) | |||
| if err != nil { | |||
| @@ -320,11 +325,19 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch | |||
| } | |||
| } | |||
| if rv == nil { | |||
| s.done = true | |||
| } | |||
| return rv, nil | |||
| } | |||
| func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { | |||
| if s.done { | |||
| return nil, nil | |||
| } | |||
| if !s.initialized { | |||
| err := s.initSearchers(ctx) | |||
| if err != nil { | |||
| @@ -332,14 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter | |||
| } | |||
| } | |||
| // Advance the searchers only if the currentID cursor is trailing the lookup ID, | |||
| // additionally if the mustNotSearcher has been initialized, ensure that the | |||
| // cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by | |||
| // currentID) is trailing the lookup ID as well - for in the case where currentID | |||
| // is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT | |||
| // advance the currentID or the currMustNot cursors. | |||
| if (s.currentID == nil || s.currentID.Compare(ID) < 0) && | |||
| (s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) { | |||
| // Advance the searcher only if the cursor is trailing the lookup ID | |||
| if s.currentID == nil || s.currentID.Compare(ID) < 0 { | |||
| var err error | |||
| if s.mustSearcher != nil { | |||
| if s.currMust != nil { | |||
| @@ -362,12 +369,17 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter | |||
| } | |||
| if s.mustNotSearcher != nil { | |||
| if s.currMustNot != nil { | |||
| ctx.DocumentMatchPool.Put(s.currMustNot) | |||
| } | |||
| s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID) | |||
| if err != nil { | |||
| return nil, err | |||
| // Additional check for mustNotSearcher, whose cursor isn't tracked by | |||
| // currentID to prevent it from moving when the searcher's tracked | |||
| // position is already ahead of or at the requested ID. | |||
| if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 { | |||
| if s.currMustNot != nil { | |||
| ctx.DocumentMatchPool.Put(s.currMustNot) | |||
| } | |||
| s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| } | |||
| } | |||
| @@ -22,6 +22,11 @@ import ( | |||
| "github.com/blevesearch/bleve/search" | |||
| ) | |||
| type filterFunc func(key []byte) bool | |||
| var GeoBitsShift1 = (geo.GeoBits << 1) | |||
| var GeoBitsShift1Minus1 = GeoBitsShift1 - 1 | |||
| func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, | |||
| maxLon, maxLat float64, field string, boost float64, | |||
| options search.SearcherOptions, checkBoundaries bool) ( | |||
| @@ -36,8 +41,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, | |||
| } | |||
| // do math to produce list of terms needed for this search | |||
| onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1, | |||
| minLon, minLat, maxLon, maxLat, checkBoundaries) | |||
| onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1, | |||
| minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| var onBoundarySearcher search.Searcher | |||
| dvReader, err := indexReader.DocValueReader([]string{field}) | |||
| @@ -94,59 +102,123 @@ var geoMaxShift = document.GeoPrecisionStep * 4 | |||
| var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2 | |||
| func ComputeGeoRange(term uint64, shift uint, | |||
| sminLon, sminLat, smaxLon, smaxLat float64, | |||
| checkBoundaries bool) ( | |||
| onBoundary [][]byte, notOnBoundary [][]byte) { | |||
| split := term | uint64(0x1)<<shift | |||
| var upperMax uint64 | |||
| if shift < 63 { | |||
| upperMax = term | ((uint64(1) << (shift + 1)) - 1) | |||
| } else { | |||
| upperMax = 0xffffffffffffffff | |||
| sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool, | |||
| indexReader index.IndexReader, field string) ( | |||
| onBoundary [][]byte, notOnBoundary [][]byte, err error) { | |||
| preallocBytesLen := 32 | |||
| preallocBytes := make([]byte, preallocBytesLen) | |||
| makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) { | |||
| if len(preallocBytes) <= 0 { | |||
| preallocBytesLen = preallocBytesLen * 2 | |||
| preallocBytes = make([]byte, preallocBytesLen) | |||
| } | |||
| rv, preallocBytes, err = | |||
| numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes) | |||
| return rv | |||
| } | |||
| var fieldDict index.FieldDictContains | |||
| var isIndexed filterFunc | |||
| if irr, ok := indexReader.(index.IndexReaderContains); ok { | |||
| fieldDict, err = irr.FieldDictContains(field) | |||
| if err != nil { | |||
| return nil, nil, err | |||
| } | |||
| isIndexed = func(term []byte) bool { | |||
| found, err := fieldDict.Contains(term) | |||
| return err == nil && found | |||
| } | |||
| } | |||
| lowerMax := split - 1 | |||
| onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift, | |||
| sminLon, sminLat, smaxLon, smaxLat, checkBoundaries) | |||
| plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift, | |||
| sminLon, sminLat, smaxLon, smaxLat, checkBoundaries) | |||
| onBoundary = append(onBoundary, plusOnBoundary...) | |||
| notOnBoundary = append(notOnBoundary, plusNotOnBoundary...) | |||
| return | |||
| } | |||
| func relateAndRecurse(start, end uint64, res uint, | |||
| sminLon, sminLat, smaxLon, smaxLat float64, | |||
| checkBoundaries bool) ( | |||
| onBoundary [][]byte, notOnBoundary [][]byte) { | |||
| minLon := geo.MortonUnhashLon(start) | |||
| minLat := geo.MortonUnhashLat(start) | |||
| maxLon := geo.MortonUnhashLon(end) | |||
| maxLat := geo.MortonUnhashLat(end) | |||
| level := ((geo.GeoBits << 1) - res) >> 1 | |||
| within := res%document.GeoPrecisionStep == 0 && | |||
| geo.RectWithin(minLon, minLat, maxLon, maxLat, | |||
| sminLon, sminLat, smaxLon, smaxLat) | |||
| if within || (level == geoDetailLevel && | |||
| geo.RectIntersects(minLon, minLat, maxLon, maxLat, | |||
| sminLon, sminLat, smaxLon, smaxLat)) { | |||
| if !within && checkBoundaries { | |||
| return [][]byte{ | |||
| numeric.MustNewPrefixCodedInt64(int64(start), res), | |||
| }, nil | |||
| defer func() { | |||
| if fieldDict != nil { | |||
| if fd, ok := fieldDict.(index.FieldDict); ok { | |||
| cerr := fd.Close() | |||
| if cerr != nil { | |||
| err = cerr | |||
| } | |||
| } | |||
| } | |||
| return nil, | |||
| [][]byte{ | |||
| numeric.MustNewPrefixCodedInt64(int64(start), res), | |||
| }() | |||
| if isIndexed == nil { | |||
| isIndexed = func(term []byte) bool { | |||
| if indexReader != nil { | |||
| reader, err := indexReader.TermFieldReader(term, field, false, false, false) | |||
| if err != nil || reader == nil { | |||
| return false | |||
| } | |||
| if reader.Count() == 0 { | |||
| _ = reader.Close() | |||
| return false | |||
| } | |||
| _ = reader.Close() | |||
| } | |||
| } else if level < geoDetailLevel && | |||
| geo.RectIntersects(minLon, minLat, maxLon, maxLat, | |||
| sminLon, sminLat, smaxLon, smaxLat) { | |||
| return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat, | |||
| checkBoundaries) | |||
| return true | |||
| } | |||
| } | |||
| return nil, nil | |||
| var computeGeoRange func(term uint64, shift uint) // declare for recursion | |||
| relateAndRecurse := func(start, end uint64, res, level uint) { | |||
| minLon := geo.MortonUnhashLon(start) | |||
| minLat := geo.MortonUnhashLat(start) | |||
| maxLon := geo.MortonUnhashLon(end) | |||
| maxLat := geo.MortonUnhashLat(end) | |||
| within := res%document.GeoPrecisionStep == 0 && | |||
| geo.RectWithin(minLon, minLat, maxLon, maxLat, | |||
| sminLon, sminLat, smaxLon, smaxLat) | |||
| if within || (level == geoDetailLevel && | |||
| geo.RectIntersects(minLon, minLat, maxLon, maxLat, | |||
| sminLon, sminLat, smaxLon, smaxLat)) { | |||
| codedTerm := makePrefixCoded(int64(start), res) | |||
| if isIndexed(codedTerm) { | |||
| if !within && checkBoundaries { | |||
| onBoundary = append(onBoundary, codedTerm) | |||
| } else { | |||
| notOnBoundary = append(notOnBoundary, codedTerm) | |||
| } | |||
| } | |||
| } else if level < geoDetailLevel && | |||
| geo.RectIntersects(minLon, minLat, maxLon, maxLat, | |||
| sminLon, sminLat, smaxLon, smaxLat) { | |||
| computeGeoRange(start, res-1) | |||
| } | |||
| } | |||
| computeGeoRange = func(term uint64, shift uint) { | |||
| if err != nil { | |||
| return | |||
| } | |||
| split := term | uint64(0x1)<<shift | |||
| var upperMax uint64 | |||
| if shift < 63 { | |||
| upperMax = term | ((uint64(1) << (shift + 1)) - 1) | |||
| } else { | |||
| upperMax = 0xffffffffffffffff | |||
| } | |||
| lowerMax := split - 1 | |||
| level := (GeoBitsShift1 - shift) >> 1 | |||
| relateAndRecurse(term, lowerMax, shift, level) | |||
| relateAndRecurse(split, upperMax, shift, level) | |||
| } | |||
| computeGeoRange(term, shift) | |||
| if err != nil { | |||
| return nil, nil, err | |||
| } | |||
| return onBoundary, notOnBoundary, err | |||
| } | |||
| func buildRectFilter(dvReader index.DocValueReader, field string, | |||
| @@ -34,7 +34,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, | |||
| // build a searcher for the box | |||
| boxSearcher, err := boxSearcher(indexReader, | |||
| topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, | |||
| field, boost, options) | |||
| field, boost, options, false) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| @@ -54,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, | |||
| // two boxes joined through a disjunction searcher | |||
| func boxSearcher(indexReader index.IndexReader, | |||
| topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64, | |||
| field string, boost float64, options search.SearcherOptions) ( | |||
| field string, boost float64, options search.SearcherOptions, checkBoundaries bool) ( | |||
| search.Searcher, error) { | |||
| if bottomRightLon < topLeftLon { | |||
| // cross date line, rewrite as two parts | |||
| leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader, | |||
| -180, bottomRightLat, bottomRightLon, topLeftLat, | |||
| field, boost, options, false) | |||
| field, boost, options, checkBoundaries) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader, | |||
| topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false) | |||
| topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, | |||
| checkBoundaries) | |||
| if err != nil { | |||
| _ = leftSearcher.Close() | |||
| return nil, err | |||
| @@ -85,7 +86,7 @@ func boxSearcher(indexReader index.IndexReader, | |||
| // build geoboundinggox searcher for that bounding box | |||
| boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader, | |||
| topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost, | |||
| options, false) | |||
| options, checkBoundaries) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| @@ -0,0 +1,110 @@ | |||
| // Copyright (c) 2019 Couchbase, Inc. | |||
| // | |||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||
| // you may not use this file except in compliance with the License. | |||
| // You may obtain a copy of the License at | |||
| // | |||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software | |||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| package searcher | |||
| import ( | |||
| "github.com/blevesearch/bleve/geo" | |||
| "github.com/blevesearch/bleve/index" | |||
| "github.com/blevesearch/bleve/numeric" | |||
| "github.com/blevesearch/bleve/search" | |||
| "math" | |||
| ) | |||
| func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader, | |||
| polygon []geo.Point, field string, boost float64, | |||
| options search.SearcherOptions) (search.Searcher, error) { | |||
| // compute the bounding box enclosing the polygon | |||
| topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err := | |||
| geo.BoundingRectangleForPolygon(polygon) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| // build a searcher for the bounding box on the polygon | |||
| boxSearcher, err := boxSearcher(indexReader, | |||
| topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, | |||
| field, boost, options, true) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| dvReader, err := indexReader.DocValueReader([]string{field}) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| // wrap it in a filtering searcher that checks for the polygon inclusivity | |||
| return NewFilteringSearcher(boxSearcher, | |||
| buildPolygonFilter(dvReader, field, polygon)), nil | |||
| } | |||
| const float64EqualityThreshold = 1e-6 | |||
| func almostEqual(a, b float64) bool { | |||
| return math.Abs(a-b) <= float64EqualityThreshold | |||
| } | |||
| // buildPolygonFilter returns true if the point lies inside the | |||
| // polygon. It is based on the ray-casting technique as referred | |||
| // here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html | |||
| func buildPolygonFilter(dvReader index.DocValueReader, field string, | |||
| polygon []geo.Point) FilterFunc { | |||
| return func(d *search.DocumentMatch) bool { | |||
| var lon, lat float64 | |||
| var found bool | |||
| err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) { | |||
| // only consider the values which are shifted 0 | |||
| prefixCoded := numeric.PrefixCoded(term) | |||
| shift, err := prefixCoded.Shift() | |||
| if err == nil && shift == 0 { | |||
| i64, err := prefixCoded.Int64() | |||
| if err == nil { | |||
| lon = geo.MortonUnhashLon(uint64(i64)) | |||
| lat = geo.MortonUnhashLat(uint64(i64)) | |||
| found = true | |||
| } | |||
| } | |||
| }) | |||
| // Note: this approach works for points which are strictly inside | |||
| // the polygon. ie it might fail for certain points on the polygon boundaries. | |||
| if err == nil && found { | |||
| nVertices := len(polygon) | |||
| var inside bool | |||
| // check for a direct vertex match | |||
| if almostEqual(polygon[0].Lat, lat) && | |||
| almostEqual(polygon[0].Lon, lon) { | |||
| return true | |||
| } | |||
| for i := 1; i < nVertices; i++ { | |||
| if almostEqual(polygon[i].Lat, lat) && | |||
| almostEqual(polygon[i].Lon, lon) { | |||
| return true | |||
| } | |||
| if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) && | |||
| lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/ | |||
| (polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon { | |||
| inside = !inside | |||
| } | |||
| } | |||
| return inside | |||
| } | |||
| return false | |||
| } | |||
| } | |||
| @@ -53,20 +53,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, | |||
| if !*inclusiveMax && maxInt64 != math.MinInt64 { | |||
| maxInt64-- | |||
| } | |||
| var fieldDict index.FieldDictContains | |||
| var isIndexed filterFunc | |||
| var err error | |||
| if irr, ok := indexReader.(index.IndexReaderContains); ok { | |||
| fieldDict, err = irr.FieldDictContains(field) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| isIndexed = func(term []byte) bool { | |||
| found, err := fieldDict.Contains(term) | |||
| return err == nil && found | |||
| } | |||
| } | |||
| // FIXME hard-coded precision, should match field declaration | |||
| termRanges := splitInt64Range(minInt64, maxInt64, 4) | |||
| terms := termRanges.Enumerate() | |||
| terms := termRanges.Enumerate(isIndexed) | |||
| if fieldDict != nil { | |||
| if fd, ok := fieldDict.(index.FieldDict); ok { | |||
| cerr := fd.Close() | |||
| if cerr != nil { | |||
| err = cerr | |||
| } | |||
| } | |||
| } | |||
| if len(terms) < 1 { | |||
| // cannot return MatchNoneSearcher because of interaction with | |||
| // commit f391b991c20f02681bacd197afc6d8aed444e132 | |||
| return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options, | |||
| true) | |||
| } | |||
| var err error | |||
| terms, err = filterCandidateTerms(indexReader, terms, field) | |||
| if err != nil { | |||
| return nil, err | |||
| // for upside_down | |||
| if isIndexed == nil { | |||
| terms, err = filterCandidateTerms(indexReader, terms, field) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| } | |||
| if tooManyClauses(len(terms)) { | |||
| return nil, tooManyClausesErr(len(terms)) | |||
| } | |||
| @@ -125,11 +154,17 @@ type termRange struct { | |||
| endTerm []byte | |||
| } | |||
| func (t *termRange) Enumerate() [][]byte { | |||
| func (t *termRange) Enumerate(filter filterFunc) [][]byte { | |||
| var rv [][]byte | |||
| next := t.startTerm | |||
| for bytes.Compare(next, t.endTerm) <= 0 { | |||
| rv = append(rv, next) | |||
| if filter != nil { | |||
| if filter(next) { | |||
| rv = append(rv, next) | |||
| } | |||
| } else { | |||
| rv = append(rv, next) | |||
| } | |||
| next = incrementBytes(next) | |||
| } | |||
| return rv | |||
| @@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte { | |||
| type termRanges []*termRange | |||
| func (tr termRanges) Enumerate() [][]byte { | |||
| func (tr termRanges) Enumerate(filter filterFunc) [][]byte { | |||
| var rv [][]byte | |||
| for _, tri := range tr { | |||
| trie := tri.Enumerate() | |||
| trie := tri.Enumerate(filter) | |||
| rv = append(rv, trie...) | |||
| } | |||
| return rv | |||
| @@ -38,6 +38,8 @@ type SearchSort interface { | |||
| RequiresScoring() bool | |||
| RequiresFields() []string | |||
| Reverse() | |||
| Copy() SearchSort | |||
| } | |||
| @@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool { | |||
| return rv | |||
| } | |||
| func (so SortOrder) Reverse() { | |||
| for _, soi := range so { | |||
| soi.Reverse() | |||
| } | |||
| } | |||
| // SortFieldType lets you control some internal sort behavior | |||
| // normally leaving this to the zero-value of SortFieldAuto is fine | |||
| type SortFieldType int | |||
| @@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort { | |||
| return &rv | |||
| } | |||
| func (s *SortField) Reverse() { | |||
| s.Desc = !s.Desc | |||
| if s.Missing == SortFieldMissingFirst { | |||
| s.Missing = SortFieldMissingLast | |||
| } else { | |||
| s.Missing = SortFieldMissingFirst | |||
| } | |||
| } | |||
| // SortDocID will sort results by the document identifier | |||
| type SortDocID struct { | |||
| Desc bool | |||
| @@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort { | |||
| return &rv | |||
| } | |||
| func (s *SortDocID) Reverse() { | |||
| s.Desc = !s.Desc | |||
| } | |||
| // SortScore will sort results by the document match score | |||
| type SortScore struct { | |||
| Desc bool | |||
| @@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort { | |||
| return &rv | |||
| } | |||
| func (s *SortScore) Reverse() { | |||
| s.Desc = !s.Desc | |||
| } | |||
| var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0)) | |||
| // NewSortGeoDistance creates SearchSort instance for sorting documents by | |||
| @@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort { | |||
| return &rv | |||
| } | |||
| func (s *SortGeoDistance) Reverse() { | |||
| s.Desc = !s.Desc | |||
| } | |||
| type BytesSlice [][]byte | |||
| func (p BytesSlice) Len() int { return len(p) } | |||
| @@ -1,10 +1,9 @@ | |||
| language: go | |||
| go: | |||
| - 1.4 | |||
| - 1.7 | |||
| script: | |||
| - go get golang.org/x/tools/cmd/vet | |||
| - go get golang.org/x/tools/cmd/cover | |||
| - go get github.com/mattn/goveralls | |||
| - go test -v -covermode=count -coverprofile=profile.out | |||
| @@ -18,7 +18,7 @@ import ( | |||
| "bytes" | |||
| ) | |||
| // Iterator represents a means of visity key/value pairs in order. | |||
| // Iterator represents a means of visiting key/value pairs in order. | |||
| type Iterator interface { | |||
| // Current() returns the key/value pair currently pointed to. | |||
| @@ -186,20 +186,29 @@ func (i *FSTIterator) Next() error { | |||
| } | |||
| func (i *FSTIterator) next(lastOffset int) error { | |||
| // remember where we started | |||
| // remember where we started with keysStack in this next() call | |||
| i.nextStart = append(i.nextStart[:0], i.keysStack...) | |||
| nextOffset := lastOffset + 1 | |||
| allowCompare := false | |||
| OUTER: | |||
| for true { | |||
| curr := i.statesStack[len(i.statesStack)-1] | |||
| autCurr := i.autStatesStack[len(i.autStatesStack)-1] | |||
| if curr.Final() && i.aut.IsMatch(autCurr) && | |||
| bytes.Compare(i.keysStack, i.nextStart) > 0 { | |||
| // in final state greater than start key | |||
| return nil | |||
| if curr.Final() && i.aut.IsMatch(autCurr) && allowCompare { | |||
| // check to see if new keystack might have gone too far | |||
| if i.endKeyExclusive != nil && | |||
| bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 { | |||
| return ErrIteratorDone | |||
| } | |||
| cmp := bytes.Compare(i.keysStack, i.nextStart) | |||
| if cmp > 0 { | |||
| // in final state greater than start key | |||
| return nil | |||
| } | |||
| } | |||
| numTrans := curr.NumTransitions() | |||
| @@ -207,8 +216,12 @@ OUTER: | |||
| INNER: | |||
| for nextOffset < numTrans { | |||
| t := curr.TransitionAt(nextOffset) | |||
| autNext := i.aut.Accept(autCurr, t) | |||
| if !i.aut.CanMatch(autNext) { | |||
| // TODO: potential optimization to skip nextOffset | |||
| // forwards more directly to something that the | |||
| // automaton likes rather than a linear scan? | |||
| nextOffset += 1 | |||
| continue INNER | |||
| } | |||
| @@ -234,30 +247,41 @@ OUTER: | |||
| i.valsStack = append(i.valsStack, v) | |||
| i.autStatesStack = append(i.autStatesStack, autNext) | |||
| // check to see if new keystack might have gone too far | |||
| if i.endKeyExclusive != nil && | |||
| bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 { | |||
| return ErrIteratorDone | |||
| } | |||
| nextOffset = 0 | |||
| allowCompare = true | |||
| continue OUTER | |||
| } | |||
| // no more transitions, so need to backtrack and stack pop | |||
| if len(i.statesStack) <= 1 { | |||
| // stack len is 1 (root), can't go back further, we're done | |||
| break | |||
| } | |||
| // no transitions, and still room to pop | |||
| i.statesStack = i.statesStack[:len(i.statesStack)-1] | |||
| i.keysStack = i.keysStack[:len(i.keysStack)-1] | |||
| // if the top of the stack represents a linear chain of states | |||
| // (i.e., a suffix of nodes linked by single transitions), | |||
| // then optimize by popping the suffix in one shot without | |||
| // going back all the way to the OUTER loop | |||
| var popNum int | |||
| for j := len(i.statesStack) - 1; j > 0; j-- { | |||
| if i.statesStack[j].NumTransitions() != 1 { | |||
| popNum = len(i.statesStack) - 1 - j | |||
| break | |||
| } | |||
| } | |||
| if popNum < 1 { // always pop at least 1 entry from the stacks | |||
| popNum = 1 | |||
| } | |||
| nextOffset = i.keysPosStack[len(i.keysPosStack)-1] + 1 | |||
| nextOffset = i.keysPosStack[len(i.keysPosStack)-popNum] + 1 | |||
| allowCompare = false | |||
| i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1] | |||
| i.valsStack = i.valsStack[:len(i.valsStack)-1] | |||
| i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1] | |||
| i.statesStack = i.statesStack[:len(i.statesStack)-popNum] | |||
| i.keysStack = i.keysStack[:len(i.keysStack)-popNum] | |||
| i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-popNum] | |||
| i.valsStack = i.valsStack[:len(i.valsStack)-popNum] | |||
| i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-popNum] | |||
| } | |||
| return ErrIteratorDone | |||
| @@ -0,0 +1,10 @@ | |||
| module github.com/couchbase/vellum | |||
| go 1.12 | |||
| require ( | |||
| github.com/edsrzf/mmap-go v1.0.0 | |||
| github.com/spf13/cobra v0.0.5 | |||
| github.com/willf/bitset v1.1.10 | |||
| golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a // indirect | |||
| ) | |||
| @@ -0,0 +1,39 @@ | |||
| github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= | |||
| github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= | |||
| github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= | |||
| github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= | |||
| github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= | |||
| github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= | |||
| github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | |||
| github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw= | |||
| github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= | |||
| github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= | |||
| github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= | |||
| github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= | |||
| github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= | |||
| github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= | |||
| github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= | |||
| github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= | |||
| github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= | |||
| github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | |||
| github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= | |||
| github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= | |||
| github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= | |||
| github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s= | |||
| github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= | |||
| github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= | |||
| github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= | |||
| github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= | |||
| github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= | |||
| github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= | |||
| github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= | |||
| github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= | |||
| github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= | |||
| github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= | |||
| golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= | |||
| golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
| golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ= | |||
| golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | |||
| gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | |||
| gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= | |||
| @@ -12,7 +12,7 @@ | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| package levenshtein2 | |||
| package levenshtein | |||
| import ( | |||
| "fmt" | |||
| @@ -12,7 +12,7 @@ | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| package levenshtein2 | |||
| package levenshtein | |||
| import ( | |||
| "fmt" | |||
| @@ -12,7 +12,7 @@ | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| package levenshtein2 | |||
| package levenshtein | |||
| import "fmt" | |||
| @@ -12,7 +12,7 @@ | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| package levenshtein2 | |||
| package levenshtein | |||
| import ( | |||
| "math" | |||
| @@ -12,7 +12,7 @@ | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| package levenshtein2 | |||
| package levenshtein | |||
| import ( | |||
| "crypto/md5" | |||
| @@ -75,15 +75,23 @@ func (c *compiler) c(ast *syntax.Regexp) (err error) { | |||
| Rune0: [2]rune{r, r}, | |||
| } | |||
| next.Rune = next.Rune0[0:2] | |||
| return c.c(&next) | |||
| } | |||
| c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( | |||
| r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| for _, seq := range c.sequences { | |||
| c.compileUtf8Ranges(seq) | |||
| // try to find more folded runes | |||
| for r1 := unicode.SimpleFold(r); r1 != r; r1 = unicode.SimpleFold(r1) { | |||
| next.Rune = append(next.Rune, r1, r1) | |||
| } | |||
| err = c.c(&next) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| } else { | |||
| c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( | |||
| r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| for _, seq := range c.sequences { | |||
| c.compileUtf8Ranges(seq) | |||
| } | |||
| } | |||
| } | |||
| case syntax.OpAnyChar: | |||
| @@ -0,0 +1,12 @@ | |||
| // +build riscv64 | |||
| package bbolt | |||
| // maxMapSize represents the largest mmap size supported by Bolt. | |||
| const maxMapSize = 0xFFFFFFFFFFFF // 256TB | |||
| // maxAllocSize is the size used when creating array pointers. | |||
| const maxAllocSize = 0x7FFFFFFF | |||
| // Are unaligned load/stores broken on this arch? | |||
| var brokenUnaligned = true | |||
| @@ -121,6 +121,7 @@ type DB struct { | |||
| AllocSize int | |||
| path string | |||
| openFile func(string, int, os.FileMode) (*os.File, error) | |||
| file *os.File | |||
| dataref []byte // mmap'ed readonly, write throws SEGV | |||
| data *[maxMapSize]byte | |||
| @@ -199,10 +200,15 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { | |||
| db.readOnly = true | |||
| } | |||
| db.openFile = options.OpenFile | |||
| if db.openFile == nil { | |||
| db.openFile = os.OpenFile | |||
| } | |||
| // Open data file and separate sync handler for metadata writes. | |||
| db.path = path | |||
| var err error | |||
| if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil { | |||
| if db.file, err = db.openFile(db.path, flag|os.O_CREATE, mode); err != nil { | |||
| _ = db.close() | |||
| return nil, err | |||
| } | |||
| @@ -1054,6 +1060,10 @@ type Options struct { | |||
| // set directly on the DB itself when returned from Open(), but this option | |||
| // is useful in APIs which expose Options but not the underlying DB. | |||
| NoSync bool | |||
| // OpenFile is used to open files. It defaults to os.OpenFile. This option | |||
| // is useful for writing hermetic tests. | |||
| OpenFile func(string, int, os.FileMode) (*os.File, error) | |||
| } | |||
| // DefaultOptions represent the options used if nil options are passed into Open(). | |||
| @@ -349,6 +349,28 @@ func (f *freelist) reload(p *page) { | |||
| f.readIDs(a) | |||
| } | |||
| // noSyncReload reads the freelist from pgids and filters out pending items. | |||
| func (f *freelist) noSyncReload(pgids []pgid) { | |||
| // Build a cache of only pending pages. | |||
| pcache := make(map[pgid]bool) | |||
| for _, txp := range f.pending { | |||
| for _, pendingID := range txp.ids { | |||
| pcache[pendingID] = true | |||
| } | |||
| } | |||
| // Check each page in the freelist and build a new available freelist | |||
| // with any pages not in the pending lists. | |||
| var a []pgid | |||
| for _, id := range pgids { | |||
| if !pcache[id] { | |||
| a = append(a, id) | |||
| } | |||
| } | |||
| f.readIDs(a) | |||
| } | |||
| // reindex rebuilds the free cache based on available and pending free lists. | |||
| func (f *freelist) reindex() { | |||
| ids := f.getFreePageIDs() | |||
| @@ -254,17 +254,36 @@ func (tx *Tx) Rollback() error { | |||
| if tx.db == nil { | |||
| return ErrTxClosed | |||
| } | |||
| tx.rollback() | |||
| tx.nonPhysicalRollback() | |||
| return nil | |||
| } | |||
| // nonPhysicalRollback is called when user calls Rollback directly, in this case we do not need to reload the free pages from disk. | |||
| func (tx *Tx) nonPhysicalRollback() { | |||
| if tx.db == nil { | |||
| return | |||
| } | |||
| if tx.writable { | |||
| tx.db.freelist.rollback(tx.meta.txid) | |||
| } | |||
| tx.close() | |||
| } | |||
| // rollback needs to reload the free pages from disk in case some system error happens like fsync error. | |||
| func (tx *Tx) rollback() { | |||
| if tx.db == nil { | |||
| return | |||
| } | |||
| if tx.writable { | |||
| tx.db.freelist.rollback(tx.meta.txid) | |||
| tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) | |||
| if !tx.db.hasSyncedFreelist() { | |||
| // Reconstruct free page list by scanning the DB to get the whole free page list. | |||
| // Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode. | |||
| tx.db.freelist.noSyncReload(tx.db.freepages()) | |||
| } else { | |||
| // Read free page list from freelist page. | |||
| tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) | |||
| } | |||
| } | |||
| tx.close() | |||
| } | |||
| @@ -315,7 +334,7 @@ func (tx *Tx) Copy(w io.Writer) error { | |||
| // If err == nil then exactly tx.Size() bytes will be written into the writer. | |||
| func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { | |||
| // Attempt to open reader with WriteFlag | |||
| f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) | |||
| f, err := tx.db.openFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) | |||
| if err != nil { | |||
| return 0, err | |||
| } | |||
| @@ -369,7 +388,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { | |||
| // A reader transaction is maintained during the copy so it is safe to continue | |||
| // using the database while a copy is in progress. | |||
| func (tx *Tx) CopyFile(path string, mode os.FileMode) error { | |||
| f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) | |||
| f, err := tx.db.openFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| @@ -1,3 +1,5 @@ | |||
| The MIT license. | |||
| Copyright (c) 2014 the go-unsnap-stream authors. | |||
| Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
| @@ -7,6 +9,9 @@ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | |||
| the Software, and to permit persons to whom the Software is furnished to do so, | |||
| subject to the following conditions: | |||
| The above copyright notice and this permission notice shall be included in all | |||
| copies or substantial portions of the Software. | |||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | |||
| FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | |||
| @@ -14,5 +19,3 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | |||
| IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |||
| CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| Permission is explicitly granted to relicense this material under new terms of | |||
| your choice when integrating this library with another library or project. | |||
| @@ -7,7 +7,9 @@ Note that the *streaming or framing format* for snappy is different from snappy | |||
| Strangely, though the streaming format was first proposed in Go[1][2], it was never upated, and I could not locate any other library for Go that would handle the streaming/framed snappy format. Hence this implementation of the spec. There is a command line tool[3] that has a C implementation, but this is the only Go implementation that I am aware of. The reference for the framing/streaming spec seems to be the python implementation[4]. | |||
| For binary compatibility with the python implementation, one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C. | |||
| Update to the previous paragraph: Horray! Good news: Thanks to @nigeltao, we have since learned that the [github.com/golang/snappy](https://github.com/golang/snappy) package now provides the snappy streaming format too. Even though the type level descriptions are a little misleading because they don't mention that they are for the stream format, the [snappy package header documentation](https://godoc.org/github.com/golang/snappy) points out that the [snappy.Reader](https://godoc.org/github.com/golang/snappy#Reader) and [snappy.Writer](https://godoc.org/github.com/golang/snappy#Writer) types do indeed provide stream (vs block) handling. Although I have not benchmarked, you should probably prefer that package as it will likely be maintained more than I have time to devote, and also perhaps better integrated with the underlying snappy as they share the same repo. | |||
| For binary compatibility with the [python implementation](https://pypi.python.org/pypi/python-snappy) in [4], one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C. | |||
| However, while the c-snappy was useful for checking compatibility, it introduced dependencies on external C libraries (both the c-snappy library and the C standard library). Our go binary executable that used the go-unsnap-stream library was no longer standalone, and deployment was painful if not impossible if the target had a different C standard library. So we've gone back to using the snappy-go implementation (entirely in Go) for ease of deployment. See the comments at the top of unsnap.go if you wish to use c-snappy instead. | |||
| @@ -17,4 +19,4 @@ However, while the c-snappy was useful for checking compatibility, it introduced | |||
| [3] https://github.com/kubo/snzip | |||
| [4] https://pypi.python.org/pypi/python-snappy | |||
| [4] https://pypi.python.org/pypi/python-snappy | |||
| @@ -7,6 +7,7 @@ import ( | |||
| "io" | |||
| "io/ioutil" | |||
| "os" | |||
| "strings" | |||
| "hash/crc32" | |||
| @@ -189,7 +190,12 @@ func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedS | |||
| err = nil | |||
| } | |||
| } else { | |||
| panic(err) | |||
| // may be an odd already closed... don't panic on that | |||
| if strings.Contains(err.Error(), "file already closed") { | |||
| err = nil | |||
| } else { | |||
| panic(err) | |||
| } | |||
| } | |||
| } | |||
| @@ -5,6 +5,8 @@ import ( | |||
| "reflect" | |||
| ) | |||
| const resumableDefault = false | |||
| var ( | |||
| // ErrShortBytes is returned when the | |||
| // slice being decoded is too short to | |||
| @@ -26,99 +28,240 @@ type Error interface { | |||
| // Resumable returns whether | |||
| // or not the error means that | |||
| // the stream of data is malformed | |||
| // and the information is unrecoverable. | |||
| // and the information is unrecoverable. | |||
| Resumable() bool | |||
| } | |||
| // contextError allows msgp Error instances to be enhanced with additional | |||
| // context about their origin. | |||
| type contextError interface { | |||
| Error | |||
| // withContext must not modify the error instance - it must clone and | |||
| // return a new error with the context added. | |||
| withContext(ctx string) error | |||
| } | |||
| // Cause returns the underlying cause of an error that has been wrapped | |||
| // with additional context. | |||
| func Cause(e error) error { | |||
| out := e | |||
| if e, ok := e.(errWrapped); ok && e.cause != nil { | |||
| out = e.cause | |||
| } | |||
| return out | |||
| } | |||
| // Resumable returns whether or not the error means that the stream of data is | |||
| // malformed and the information is unrecoverable. | |||
| func Resumable(e error) bool { | |||
| if e, ok := e.(Error); ok { | |||
| return e.Resumable() | |||
| } | |||
| return resumableDefault | |||
| } | |||
| // WrapError wraps an error with additional context that allows the part of the | |||
| // serialized type that caused the problem to be identified. Underlying errors | |||
| // can be retrieved using Cause() | |||
| // | |||
| // The input error is not modified - a new error should be returned. | |||
| // | |||
| // ErrShortBytes is not wrapped with any context due to backward compatibility | |||
| // issues with the public API. | |||
| // | |||
| func WrapError(err error, ctx ...interface{}) error { | |||
| switch e := err.(type) { | |||
| case errShort: | |||
| return e | |||
| case contextError: | |||
| return e.withContext(ctxString(ctx)) | |||
| default: | |||
| return errWrapped{cause: err, ctx: ctxString(ctx)} | |||
| } | |||
| } | |||
| // ctxString converts the incoming interface{} slice into a single string. | |||
| func ctxString(ctx []interface{}) string { | |||
| out := "" | |||
| for idx, cv := range ctx { | |||
| if idx > 0 { | |||
| out += "/" | |||
| } | |||
| out += fmt.Sprintf("%v", cv) | |||
| } | |||
| return out | |||
| } | |||
| func addCtx(ctx, add string) string { | |||
| if ctx != "" { | |||
| return add + "/" + ctx | |||
| } else { | |||
| return add | |||
| } | |||
| } | |||
| // errWrapped allows arbitrary errors passed to WrapError to be enhanced with | |||
| // context and unwrapped with Cause() | |||
| type errWrapped struct { | |||
| cause error | |||
| ctx string | |||
| } | |||
| func (e errWrapped) Error() string { | |||
| if e.ctx != "" { | |||
| return fmt.Sprintf("%s at %s", e.cause, e.ctx) | |||
| } else { | |||
| return e.cause.Error() | |||
| } | |||
| } | |||
| func (e errWrapped) Resumable() bool { | |||
| if e, ok := e.cause.(Error); ok { | |||
| return e.Resumable() | |||
| } | |||
| return resumableDefault | |||
| } | |||
| type errShort struct{} | |||
| func (e errShort) Error() string { return "msgp: too few bytes left to read object" } | |||
| func (e errShort) Resumable() bool { return false } | |||
| type errFatal struct{} | |||
| type errFatal struct { | |||
| ctx string | |||
| } | |||
| func (f errFatal) Error() string { | |||
| out := "msgp: fatal decoding error (unreachable code)" | |||
| if f.ctx != "" { | |||
| out += " at " + f.ctx | |||
| } | |||
| return out | |||
| } | |||
| func (f errFatal) Error() string { return "msgp: fatal decoding error (unreachable code)" } | |||
| func (f errFatal) Resumable() bool { return false } | |||
| func (f errFatal) withContext(ctx string) error { f.ctx = addCtx(f.ctx, ctx); return f } | |||
| // ArrayError is an error returned | |||
| // when decoding a fix-sized array | |||
| // of the wrong size | |||
| type ArrayError struct { | |||
| Wanted uint32 | |||
| Got uint32 | |||
| ctx string | |||
| } | |||
| // Error implements the error interface | |||
| func (a ArrayError) Error() string { | |||
| return fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got) | |||
| out := fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got) | |||
| if a.ctx != "" { | |||
| out += " at " + a.ctx | |||
| } | |||
| return out | |||
| } | |||
| // Resumable is always 'true' for ArrayErrors | |||
| func (a ArrayError) Resumable() bool { return true } | |||
| func (a ArrayError) withContext(ctx string) error { a.ctx = addCtx(a.ctx, ctx); return a } | |||
| // IntOverflow is returned when a call | |||
| // would downcast an integer to a type | |||
| // with too few bits to hold its value. | |||
| type IntOverflow struct { | |||
| Value int64 // the value of the integer | |||
| FailedBitsize int // the bit size that the int64 could not fit into | |||
| ctx string | |||
| } | |||
| // Error implements the error interface | |||
| func (i IntOverflow) Error() string { | |||
| return fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize) | |||
| str := fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize) | |||
| if i.ctx != "" { | |||
| str += " at " + i.ctx | |||
| } | |||
| return str | |||
| } | |||
| // Resumable is always 'true' for overflows | |||
| func (i IntOverflow) Resumable() bool { return true } | |||
| func (i IntOverflow) withContext(ctx string) error { i.ctx = addCtx(i.ctx, ctx); return i } | |||
| // UintOverflow is returned when a call | |||
| // would downcast an unsigned integer to a type | |||
| // with too few bits to hold its value | |||
| type UintOverflow struct { | |||
| Value uint64 // value of the uint | |||
| FailedBitsize int // the bit size that couldn't fit the value | |||
| ctx string | |||
| } | |||
| // Error implements the error interface | |||
| func (u UintOverflow) Error() string { | |||
| return fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize) | |||
| str := fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize) | |||
| if u.ctx != "" { | |||
| str += " at " + u.ctx | |||
| } | |||
| return str | |||
| } | |||
| // Resumable is always 'true' for overflows | |||
| func (u UintOverflow) Resumable() bool { return true } | |||
| func (u UintOverflow) withContext(ctx string) error { u.ctx = addCtx(u.ctx, ctx); return u } | |||
| // UintBelowZero is returned when a call | |||
| // would cast a signed integer below zero | |||
| // to an unsigned integer. | |||
| type UintBelowZero struct { | |||
| Value int64 // value of the incoming int | |||
| ctx string | |||
| } | |||
| // Error implements the error interface | |||
| func (u UintBelowZero) Error() string { | |||
| return fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value) | |||
| str := fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value) | |||
| if u.ctx != "" { | |||
| str += " at " + u.ctx | |||
| } | |||
| return str | |||
| } | |||
| // Resumable is always 'true' for overflows | |||
| func (u UintBelowZero) Resumable() bool { return true } | |||
| func (u UintBelowZero) withContext(ctx string) error { | |||
| u.ctx = ctx | |||
| return u | |||
| } | |||
| // A TypeError is returned when a particular | |||
| // decoding method is unsuitable for decoding | |||
| // a particular MessagePack value. | |||
| type TypeError struct { | |||
| Method Type // Type expected by method | |||
| Encoded Type // Type actually encoded | |||
| ctx string | |||
| } | |||
| // Error implements the error interface | |||
| func (t TypeError) Error() string { | |||
| return fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method) | |||
| out := fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method) | |||
| if t.ctx != "" { | |||
| out += " at " + t.ctx | |||
| } | |||
| return out | |||
| } | |||
| // Resumable returns 'true' for TypeErrors | |||
| func (t TypeError) Resumable() bool { return true } | |||
| func (t TypeError) withContext(ctx string) error { t.ctx = addCtx(t.ctx, ctx); return t } | |||
| // returns either InvalidPrefixError or | |||
| // TypeError depending on whether or not | |||
| // the prefix is recognized | |||
| @@ -148,10 +291,24 @@ func (i InvalidPrefixError) Resumable() bool { return false } | |||
| // to a function that takes `interface{}`. | |||
| type ErrUnsupportedType struct { | |||
| T reflect.Type | |||
| ctx string | |||
| } | |||
| // Error implements error | |||
| func (e *ErrUnsupportedType) Error() string { return fmt.Sprintf("msgp: type %q not supported", e.T) } | |||
| func (e *ErrUnsupportedType) Error() string { | |||
| out := fmt.Sprintf("msgp: type %q not supported", e.T) | |||
| if e.ctx != "" { | |||
| out += " at " + e.ctx | |||
| } | |||
| return out | |||
| } | |||
| // Resumable returns 'true' for ErrUnsupportedType | |||
| func (e *ErrUnsupportedType) Resumable() bool { return true } | |||
| func (e *ErrUnsupportedType) withContext(ctx string) error { | |||
| o := *e | |||
| o.ctx = addCtx(o.ctx, ctx) | |||
| return &o | |||
| } | |||
| @@ -685,7 +685,7 @@ func (mw *Writer) WriteIntf(v interface{}) error { | |||
| case reflect.Map: | |||
| return mw.writeMap(val) | |||
| } | |||
| return &ErrUnsupportedType{val.Type()} | |||
| return &ErrUnsupportedType{T: val.Type()} | |||
| } | |||
| func (mw *Writer) writeMap(v reflect.Value) (err error) { | |||