LL: support schedule workers on multi-nodestags/v0.5.0
| @@ -0,0 +1,26 @@ | |||
| = vendor/github.com/aymerick/douceur licensed under: = | |||
| The MIT License (MIT) | |||
| Copyright (c) 2015 Aymerick JEHANNE | |||
| Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| of this software and associated documentation files (the "Software"), to deal | |||
| in the Software without restriction, including without limitation the rights | |||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| copies of the Software, and to permit persons to whom the Software is | |||
| furnished to do so, subject to the following conditions: | |||
| The above copyright notice and this permission notice shall be included in all | |||
| copies or substantial portions of the Software. | |||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
| SOFTWARE. | |||
| = vendor/github.com/aymerick/douceur/LICENSE fd0cb84f618c94d44537f2cce7065bd4 | |||
| @@ -0,0 +1,31 @@ | |||
| = vendor/github.com/gorilla/css licensed under: = | |||
| Copyright (c) 2013, Gorilla web toolkit | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without modification, | |||
| are permitted provided that the following conditions are met: | |||
| Redistributions of source code must retain the above copyright notice, this | |||
| list of conditions and the following disclaimer. | |||
| Redistributions in binary form must reproduce the above copyright notice, this | |||
| list of conditions and the following disclaimer in the documentation and/or | |||
| other materials provided with the distribution. | |||
| Neither the name of the {organization} nor the names of its | |||
| contributors may be used to endorse or promote products derived from | |||
| this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |||
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR | |||
| ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | |||
| ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| = vendor/github.com/gorilla/css/LICENSE 4d8d46bc2d62ed6ac9293a579a144344 | |||
| @@ -0,0 +1,32 @@ | |||
| = vendor/github.com/microcosm-cc/bluemonday licensed under: = | |||
| Copyright (c) 2014, David Kitchen <david@buro9.com> | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are met: | |||
| * Redistributions of source code must retain the above copyright notice, this | |||
| list of conditions and the following disclaimer. | |||
| * Redistributions in binary form must reproduce the above copyright notice, | |||
| this list of conditions and the following disclaimer in the documentation | |||
| and/or other materials provided with the distribution. | |||
| * Neither the name of the organisation (Microcosm) nor the names of its | |||
| contributors may be used to endorse or promote products derived from | |||
| this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |||
| FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| = vendor/github.com/microcosm-cc/bluemonday/LICENSE.md 0d75e28805ddef5b397ee9a7e5282055 | |||
| @@ -5,6 +5,7 @@ go 1.16 | |||
| require ( | |||
| github.com/emicklei/go-restful/v3 v3.4.0 | |||
| github.com/gorilla/websocket v1.4.2 | |||
| github.com/microcosm-cc/bluemonday v1.0.18 | |||
| github.com/minio/minio-go/v7 v7.0.10 | |||
| github.com/onsi/ginkgo v1.11.0 | |||
| github.com/onsi/gomega v1.7.0 | |||
| @@ -68,6 +68,8 @@ github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:l | |||
| github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= | |||
| github.com/auth0/go-jwt-middleware v0.0.0-20170425171159-5493cabe49f7/go.mod h1:LWMyo4iOLWXHGdBki7NIht1kHru/0wM179h+d3g8ATM= | |||
| github.com/aws/aws-sdk-go v1.35.24/go.mod h1:tlPOdRjfxPBpNIwqDj61rmsnA85v9jc0Ps9+muhnW+k= | |||
| github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= | |||
| github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= | |||
| github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= | |||
| github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= | |||
| github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= | |||
| @@ -306,6 +308,8 @@ github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3i | |||
| github.com/gophercloud/gophercloud v0.1.0/go.mod h1:vxM41WHh5uqHVBMZHzuwNOHh8XEoIEcSTewFxm1c5g8= | |||
| github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= | |||
| github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= | |||
| github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= | |||
| github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= | |||
| github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= | |||
| github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= | |||
| github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= | |||
| @@ -416,6 +420,8 @@ github.com/mattn/go-sqlite3 v1.14.5/go.mod h1:WVKg1VTActs4Qso6iwGbiFih2UIHo0ENGw | |||
| github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= | |||
| github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= | |||
| github.com/mholt/certmagic v0.6.2-0.20190624175158-6a42ef9fe8c2/go.mod h1:g4cOPxcjV0oFq3qwpjSA30LReKD8AoIfwAY9VvG35NY= | |||
| github.com/microcosm-cc/bluemonday v1.0.18 h1:6HcxvXDAi3ARt3slx6nTesbvorIc3QeTzBNRvWktHBo= | |||
| github.com/microcosm-cc/bluemonday v1.0.18/go.mod h1:Z0r70sCuXHig8YpBzCc5eGHAap2K7e/u082ZUpDRRqM= | |||
| github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= | |||
| github.com/miekg/dns v1.1.3/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= | |||
| github.com/miekg/dns v1.1.35/go.mod h1:KNUDUusw/aVsxyTYZM1oqvCicbwhgbNgztCETuNZ7xM= | |||
| @@ -707,8 +713,9 @@ golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81R | |||
| golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= | |||
| golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= | |||
| golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= | |||
| golang.org/x/net v0.0.0-20210224082022-3d97a244fca7 h1:OgUuv8lsRpBibGNbSizVwKWlysjaNzmC9gYMhPVfqFM= | |||
| golang.org/x/net v0.0.0-20210224082022-3d97a244fca7/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= | |||
| golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q= | |||
| golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= | |||
| golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= | |||
| golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= | |||
| golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= | |||
| @@ -776,6 +783,7 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w | |||
| golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/sys v0.0.0-20210426230700-d19ff857e887 h1:dXfMednGJh/SUUFjTLsWJz3P+TQt9qnR11GgeI3vWKs= | |||
| golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= | |||
| @@ -787,8 +795,9 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | |||
| golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | |||
| golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= | |||
| golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= | |||
| golang.org/x/text v0.3.4 h1:0YWbFKbhXG/wIiuHDSKpS0Iy7FSA+u45VtBMfQcFTTc= | |||
| golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= | |||
| golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= | |||
| golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= | |||
| golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= | |||
| golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= | |||
| golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= | |||
| @@ -17,9 +17,12 @@ limitations under the License. | |||
| package lifelonglearning | |||
| import ( | |||
| "fmt" | |||
| "context" | |||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | |||
| "k8s.io/apimachinery/pkg/util/sets" | |||
| "k8s.io/apimachinery/pkg/watch" | |||
| "k8s.io/klog/v2" | |||
| sednav1 "github.com/kubeedge/sedna/pkg/apis/sedna/v1alpha1" | |||
| "github.com/kubeedge/sedna/pkg/globalmanager/runtime" | |||
| @@ -36,17 +39,97 @@ func (c *Controller) syncToEdge(eventType watch.EventType, obj interface{}) erro | |||
| // more details at https://github.com/kubernetes/kubernetes/issues/3030 | |||
| job.Kind = KindName | |||
| // Here only propagate to the nodes with non empty name | |||
| dataName := job.Spec.Dataset.Name | |||
| // LC has dataset object on this node that may call dataset node | |||
| var dsNodeName string | |||
| ds, err := c.client.Datasets(job.Namespace).Get(context.TODO(), dataName, metav1.GetOptions{}) | |||
| if err != nil { | |||
| klog.Errorf("not found job(name=%s/%s)'s dataset, error: %v", job.Kind, job.Name, err) | |||
| } else { | |||
| dsNodeName = ds.Spec.NodeName | |||
| } | |||
| var trainNodeName string | |||
| var evalNodeName string | |||
| var deployNodeName string | |||
| // FIXME(llhuii): only the case that all workers having the same nodeName are support, | |||
| // will support Spec.NodeSelector and different nodeName. | |||
| nodeName := job.Spec.TrainSpec.Template.Spec.NodeName | |||
| if len(nodeName) == 0 { | |||
| return fmt.Errorf("empty node name") | |||
| getAnnotationsNodeName := func(nodeName sednav1.LLJobStage) string { | |||
| return runtime.AnnotationsKeyPrefix + string(nodeName) | |||
| } | |||
| ann := job.GetAnnotations() | |||
| if ann != nil { | |||
| trainNodeName = ann[getAnnotationsNodeName(sednav1.LLJobTrain)] | |||
| evalNodeName = ann[getAnnotationsNodeName(sednav1.LLJobEval)] | |||
| deployNodeName = ann[getAnnotationsNodeName(sednav1.LLJobDeploy)] | |||
| } | |||
| if eventType == watch.Deleted { | |||
| // delete jobs from all LCs | |||
| nodes := sets.NewString(dsNodeName, trainNodeName, evalNodeName, deployNodeName) | |||
| for node := range nodes { | |||
| c.sendToEdgeFunc(node, eventType, job) | |||
| } | |||
| return nil | |||
| } | |||
| if dsNodeName == "" { | |||
| return nil | |||
| } | |||
| jobConditions := job.Status.Conditions | |||
| if len(jobConditions) == 0 { | |||
| return nil | |||
| } | |||
| latestCondition := jobConditions[len(jobConditions)-1] | |||
| currentType := latestCondition.Type | |||
| jobStage := latestCondition.Stage | |||
| syncJobWithNodeName := func(nodeName string) { | |||
| if err := c.sendToEdgeFunc(nodeName, eventType, job); err != nil { | |||
| klog.Warningf("Error to sync lifelong learning job %s to node %s in stage %s: %v", | |||
| job.Name, nodeName, jobStage, err) | |||
| } | |||
| } | |||
| runtime.InjectSecretAnnotations(c.kubeClient, job, job.Spec.CredentialName) | |||
| return c.sendToEdgeFunc(nodeName, eventType, job) | |||
| // isJobResidentNode checks whether nodeName is a job resident node | |||
| isJobResidentNode := func(nodeName string) bool { | |||
| // the node where LC monitors dataset and the node where inference worker is running are job resident node | |||
| if nodeName == dsNodeName || nodeName == deployNodeName { | |||
| return true | |||
| } | |||
| return false | |||
| } | |||
| doJobStageEvent := func(nodeName string) { | |||
| switch currentType { | |||
| case sednav1.LLJobStageCondWaiting: | |||
| syncJobWithNodeName(dsNodeName) | |||
| case sednav1.LLJobStageCondRunning: | |||
| syncJobWithNodeName(nodeName) | |||
| case sednav1.LLJobStageCondCompleted, sednav1.LLJobStageCondFailed: | |||
| if !isJobResidentNode(nodeName) { | |||
| // delete LC's job from nodeName that's different from dataset node when worker's status | |||
| // is completed or failed. | |||
| c.sendToEdgeFunc(nodeName, watch.Deleted, job) | |||
| } | |||
| } | |||
| } | |||
| switch jobStage { | |||
| case sednav1.LLJobTrain: | |||
| doJobStageEvent(trainNodeName) | |||
| case sednav1.LLJobEval: | |||
| doJobStageEvent(evalNodeName) | |||
| case sednav1.LLJobDeploy: | |||
| doJobStageEvent(deployNodeName) | |||
| } | |||
| return nil | |||
| } | |||
| func (c *Controller) SetDownstreamSendFunc(f runtime.DownstreamSendFunc) error { | |||
| @@ -18,6 +18,7 @@ package lifelonglearning | |||
| import ( | |||
| "context" | |||
| "encoding/json" | |||
| "fmt" | |||
| "strings" | |||
| "time" | |||
| @@ -25,6 +26,7 @@ import ( | |||
| v1 "k8s.io/api/core/v1" | |||
| "k8s.io/apimachinery/pkg/api/errors" | |||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | |||
| "k8s.io/apimachinery/pkg/types" | |||
| utilrand "k8s.io/apimachinery/pkg/util/rand" | |||
| utilruntime "k8s.io/apimachinery/pkg/util/runtime" | |||
| "k8s.io/apimachinery/pkg/util/wait" | |||
| @@ -294,10 +296,51 @@ func (c *Controller) sync(key string) (bool, error) { | |||
| return forget, err | |||
| } | |||
| // setWorkerNodeNameOfJob sets the worker nodeName of the specified job | |||
| // which is used for downstream to sync job info to the specified LC located in nodeName. | |||
| func (c *Controller) setWorkerNodeNameOfJob(job *sednav1.LifelongLearningJob, jobStage string, nodeName string) error { | |||
| key := runtime.AnnotationsKeyPrefix + jobStage | |||
| return c.addJobAnnotations(job, key, nodeName) | |||
| } | |||
| // addJobAnnotations adds info in job annotations | |||
| func (c *Controller) addJobAnnotations(job *sednav1.LifelongLearningJob, key string, value string) error { | |||
| ann := job.GetAnnotations() | |||
| if ann[key] == value { | |||
| // already set | |||
| return nil | |||
| } | |||
| patchData := metav1.PartialObjectMetadata{ | |||
| ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{key: value}}} | |||
| patchDataBytes, err := json.Marshal(&patchData) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| jobClient := c.client.LifelongLearningJobs(job.Namespace) | |||
| return runtime.RetryUpdateStatus(job.Name, job.Namespace, func() error { | |||
| newJob, err := jobClient.Get(context.TODO(), job.Name, metav1.GetOptions{}) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| annotations := newJob.GetAnnotations() | |||
| if annotations[key] == value { | |||
| return nil | |||
| } | |||
| _, err = jobClient.Patch(context.TODO(), job.Name, types.MergePatchType, patchDataBytes, metav1.PatchOptions{}) | |||
| return err | |||
| }) | |||
| } | |||
| // transitJobState transit job to next state | |||
| func (c *Controller) transitJobState(job *sednav1.LifelongLearningJob) (bool, error) { | |||
| var initialType sednav1.LLJobStageConditionType | |||
| var latestCondition = sednav1.LLJobCondition{ | |||
| var latestCondition sednav1.LLJobCondition = sednav1.LLJobCondition{ | |||
| Stage: sednav1.LLJobTrain, | |||
| Type: initialType, | |||
| } | |||
| @@ -305,14 +348,16 @@ func (c *Controller) transitJobState(job *sednav1.LifelongLearningJob) (bool, er | |||
| var newConditionType sednav1.LLJobStageConditionType | |||
| var needUpdated = false | |||
| var podStatus = v1.PodUnknown | |||
| var podStatus v1.PodPhase = v1.PodUnknown | |||
| var pod *v1.Pod | |||
| jobConditions := job.Status.Conditions | |||
| if len(jobConditions) > 0 { | |||
| // get latest pod and pod status | |||
| latestCondition = (jobConditions)[len(jobConditions)-1] | |||
| klog.V(2).Infof("lifelonglearning job %v/%v latest stage %v:", job.Namespace, job.Name, | |||
| latestCondition.Stage) | |||
| pod := c.getSpecifiedPods(job, string(latestCondition.Stage)) | |||
| pod = c.getSpecifiedPods(job, string(latestCondition.Stage)) | |||
| if pod != nil { | |||
| podStatus = pod.Status.Phase | |||
| @@ -337,25 +382,30 @@ func (c *Controller) transitJobState(job *sednav1.LifelongLearningJob) (bool, er | |||
| err = c.restartInferPod(job) | |||
| if err != nil { | |||
| klog.V(2).Infof("lifelonglearning job %v/%v inference pod failed to restart, err:%s", job.Namespace, job.Name, err) | |||
| } else { | |||
| klog.V(2).Infof("lifelonglearning job %v/%v inference pod restarts successfully", job.Namespace, job.Name) | |||
| return needUpdated, err | |||
| } | |||
| } else if podStatus != v1.PodPending && podStatus != v1.PodRunning { | |||
| err = c.createPod(job, jobStage) | |||
| } | |||
| if err != nil { | |||
| return needUpdated, err | |||
| klog.V(2).Infof("lifelonglearning job %v/%v inference pod restarts successfully", job.Namespace, job.Name) | |||
| newConditionType = sednav1.LLJobStageCondCompleted | |||
| } else { | |||
| if podStatus != v1.PodPending && podStatus != v1.PodRunning { | |||
| err = c.createPod(job, jobStage) | |||
| if err != nil { | |||
| return needUpdated, err | |||
| } | |||
| } | |||
| newConditionType = sednav1.LLJobStageCondStarting | |||
| } | |||
| newConditionType = sednav1.LLJobStageCondStarting | |||
| case sednav1.LLJobStageCondStarting, sednav1.LLJobStageCondRunning: | |||
| if podStatus == v1.PodRunning { | |||
| if jobStage == sednav1.LLJobDeploy { | |||
| newConditionType = sednav1.LLJobStageCondCompleted | |||
| } else { | |||
| // watch pod status, if pod running, set type running | |||
| newConditionType = sednav1.LLJobStageCondRunning | |||
| // add nodeName to job | |||
| if err := c.setWorkerNodeNameOfJob(job, string(jobStage), pod.Spec.NodeName); err != nil { | |||
| return needUpdated, err | |||
| } | |||
| // watch pod status, if pod running, set type running | |||
| newConditionType = sednav1.LLJobStageCondRunning | |||
| } else if podStatus == v1.PodSucceeded { | |||
| // watch pod status, if pod completed, set type completed | |||
| newConditionType = sednav1.LLJobStageCondCompleted | |||
| @@ -541,7 +591,7 @@ func (c *Controller) createPod(job *sednav1.LifelongLearningJob, podtype sednav1 | |||
| originalDataURLOrIndex = dataset.Spec.URL | |||
| } | |||
| var workerParam = new(runtime.WorkerParam) | |||
| var workerParam *runtime.WorkerParam = new(runtime.WorkerParam) | |||
| if podtype == sednav1.LLJobTrain { | |||
| workerParam.WorkerType = "Train" | |||
| @@ -672,7 +722,7 @@ func (c *Controller) createInferPod(job *sednav1.LifelongLearningJob) error { | |||
| return err | |||
| } | |||
| var workerParam = new(runtime.WorkerParam) | |||
| var workerParam *runtime.WorkerParam = new(runtime.WorkerParam) | |||
| workerParam.Mounts = append(workerParam.Mounts, | |||
| runtime.WorkerMount{ | |||
| URL: &runtime.MountURL{ | |||
| @@ -0,0 +1,22 @@ | |||
| The MIT License (MIT) | |||
| Copyright (c) 2015 Aymerick JEHANNE | |||
| Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| of this software and associated documentation files (the "Software"), to deal | |||
| in the Software without restriction, including without limitation the rights | |||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| copies of the Software, and to permit persons to whom the Software is | |||
| furnished to do so, subject to the following conditions: | |||
| The above copyright notice and this permission notice shall be included in all | |||
| copies or substantial portions of the Software. | |||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
| SOFTWARE. | |||
| @@ -0,0 +1,60 @@ | |||
| package css | |||
| import "fmt" | |||
| // Declaration represents a parsed style property | |||
| type Declaration struct { | |||
| Property string | |||
| Value string | |||
| Important bool | |||
| } | |||
| // NewDeclaration instanciates a new Declaration | |||
| func NewDeclaration() *Declaration { | |||
| return &Declaration{} | |||
| } | |||
| // Returns string representation of the Declaration | |||
| func (decl *Declaration) String() string { | |||
| return decl.StringWithImportant(true) | |||
| } | |||
| // StringWithImportant returns string representation with optional !important part | |||
| func (decl *Declaration) StringWithImportant(option bool) string { | |||
| result := fmt.Sprintf("%s: %s", decl.Property, decl.Value) | |||
| if option && decl.Important { | |||
| result += " !important" | |||
| } | |||
| result += ";" | |||
| return result | |||
| } | |||
| // Equal returns true if both Declarations are equals | |||
| func (decl *Declaration) Equal(other *Declaration) bool { | |||
| return (decl.Property == other.Property) && (decl.Value == other.Value) && (decl.Important == other.Important) | |||
| } | |||
| // | |||
| // DeclarationsByProperty | |||
| // | |||
| // DeclarationsByProperty represents sortable style declarations | |||
| type DeclarationsByProperty []*Declaration | |||
| // Implements sort.Interface | |||
| func (declarations DeclarationsByProperty) Len() int { | |||
| return len(declarations) | |||
| } | |||
| // Implements sort.Interface | |||
| func (declarations DeclarationsByProperty) Swap(i, j int) { | |||
| declarations[i], declarations[j] = declarations[j], declarations[i] | |||
| } | |||
| // Implements sort.Interface | |||
| func (declarations DeclarationsByProperty) Less(i, j int) bool { | |||
| return declarations[i].Property < declarations[j].Property | |||
| } | |||
| @@ -0,0 +1,230 @@ | |||
| package css | |||
| import ( | |||
| "fmt" | |||
| "strings" | |||
| ) | |||
| const ( | |||
| indentSpace = 2 | |||
| ) | |||
| // RuleKind represents a Rule kind | |||
| type RuleKind int | |||
| // Rule kinds | |||
| const ( | |||
| QualifiedRule RuleKind = iota | |||
| AtRule | |||
| ) | |||
| // At Rules than have Rules inside their block instead of Declarations | |||
| var atRulesWithRulesBlock = []string{ | |||
| "@document", "@font-feature-values", "@keyframes", "@media", "@supports", | |||
| } | |||
| // Rule represents a parsed CSS rule | |||
| type Rule struct { | |||
| Kind RuleKind | |||
| // At Rule name (eg: "@media") | |||
| Name string | |||
| // Raw prelude | |||
| Prelude string | |||
| // Qualified Rule selectors parsed from prelude | |||
| Selectors []string | |||
| // Style properties | |||
| Declarations []*Declaration | |||
| // At Rule embedded rules | |||
| Rules []*Rule | |||
| // Current rule embedding level | |||
| EmbedLevel int | |||
| } | |||
| // NewRule instanciates a new Rule | |||
| func NewRule(kind RuleKind) *Rule { | |||
| return &Rule{ | |||
| Kind: kind, | |||
| } | |||
| } | |||
| // Returns string representation of rule kind | |||
| func (kind RuleKind) String() string { | |||
| switch kind { | |||
| case QualifiedRule: | |||
| return "Qualified Rule" | |||
| case AtRule: | |||
| return "At Rule" | |||
| default: | |||
| return "WAT" | |||
| } | |||
| } | |||
| // EmbedsRules returns true if this rule embeds another rules | |||
| func (rule *Rule) EmbedsRules() bool { | |||
| if rule.Kind == AtRule { | |||
| for _, atRuleName := range atRulesWithRulesBlock { | |||
| if rule.Name == atRuleName { | |||
| return true | |||
| } | |||
| } | |||
| } | |||
| return false | |||
| } | |||
| // Equal returns true if both rules are equals | |||
| func (rule *Rule) Equal(other *Rule) bool { | |||
| if (rule.Kind != other.Kind) || | |||
| (rule.Prelude != other.Prelude) || | |||
| (rule.Name != other.Name) { | |||
| return false | |||
| } | |||
| if (len(rule.Selectors) != len(other.Selectors)) || | |||
| (len(rule.Declarations) != len(other.Declarations)) || | |||
| (len(rule.Rules) != len(other.Rules)) { | |||
| return false | |||
| } | |||
| for i, sel := range rule.Selectors { | |||
| if sel != other.Selectors[i] { | |||
| return false | |||
| } | |||
| } | |||
| for i, decl := range rule.Declarations { | |||
| if !decl.Equal(other.Declarations[i]) { | |||
| return false | |||
| } | |||
| } | |||
| for i, rule := range rule.Rules { | |||
| if !rule.Equal(other.Rules[i]) { | |||
| return false | |||
| } | |||
| } | |||
| return true | |||
| } | |||
| // Diff returns a string representation of rules differences | |||
| func (rule *Rule) Diff(other *Rule) []string { | |||
| result := []string{} | |||
| if rule.Kind != other.Kind { | |||
| result = append(result, fmt.Sprintf("Kind: %s | %s", rule.Kind.String(), other.Kind.String())) | |||
| } | |||
| if rule.Prelude != other.Prelude { | |||
| result = append(result, fmt.Sprintf("Prelude: \"%s\" | \"%s\"", rule.Prelude, other.Prelude)) | |||
| } | |||
| if rule.Name != other.Name { | |||
| result = append(result, fmt.Sprintf("Name: \"%s\" | \"%s\"", rule.Name, other.Name)) | |||
| } | |||
| if len(rule.Selectors) != len(other.Selectors) { | |||
| result = append(result, fmt.Sprintf("Selectors: %v | %v", strings.Join(rule.Selectors, ", "), strings.Join(other.Selectors, ", "))) | |||
| } else { | |||
| for i, sel := range rule.Selectors { | |||
| if sel != other.Selectors[i] { | |||
| result = append(result, fmt.Sprintf("Selector: \"%s\" | \"%s\"", sel, other.Selectors[i])) | |||
| } | |||
| } | |||
| } | |||
| if len(rule.Declarations) != len(other.Declarations) { | |||
| result = append(result, fmt.Sprintf("Declarations Nb: %d | %d", len(rule.Declarations), len(other.Declarations))) | |||
| } else { | |||
| for i, decl := range rule.Declarations { | |||
| if !decl.Equal(other.Declarations[i]) { | |||
| result = append(result, fmt.Sprintf("Declaration: \"%s\" | \"%s\"", decl.String(), other.Declarations[i].String())) | |||
| } | |||
| } | |||
| } | |||
| if len(rule.Rules) != len(other.Rules) { | |||
| result = append(result, fmt.Sprintf("Rules Nb: %d | %d", len(rule.Rules), len(other.Rules))) | |||
| } else { | |||
| for i, rule := range rule.Rules { | |||
| if !rule.Equal(other.Rules[i]) { | |||
| result = append(result, fmt.Sprintf("Rule: \"%s\" | \"%s\"", rule.String(), other.Rules[i].String())) | |||
| } | |||
| } | |||
| } | |||
| return result | |||
| } | |||
| // Returns the string representation of a rule | |||
| func (rule *Rule) String() string { | |||
| result := "" | |||
| if rule.Kind == QualifiedRule { | |||
| for i, sel := range rule.Selectors { | |||
| if i != 0 { | |||
| result += ", " | |||
| } | |||
| result += sel | |||
| } | |||
| } else { | |||
| // AtRule | |||
| result += fmt.Sprintf("%s", rule.Name) | |||
| if rule.Prelude != "" { | |||
| if result != "" { | |||
| result += " " | |||
| } | |||
| result += fmt.Sprintf("%s", rule.Prelude) | |||
| } | |||
| } | |||
| if (len(rule.Declarations) == 0) && (len(rule.Rules) == 0) { | |||
| result += ";" | |||
| } else { | |||
| result += " {\n" | |||
| if rule.EmbedsRules() { | |||
| for _, subRule := range rule.Rules { | |||
| result += fmt.Sprintf("%s%s\n", rule.indent(), subRule.String()) | |||
| } | |||
| } else { | |||
| for _, decl := range rule.Declarations { | |||
| result += fmt.Sprintf("%s%s\n", rule.indent(), decl.String()) | |||
| } | |||
| } | |||
| result += fmt.Sprintf("%s}", rule.indentEndBlock()) | |||
| } | |||
| return result | |||
| } | |||
| // Returns identation spaces for declarations and rules | |||
| func (rule *Rule) indent() string { | |||
| result := "" | |||
| for i := 0; i < ((rule.EmbedLevel + 1) * indentSpace); i++ { | |||
| result += " " | |||
| } | |||
| return result | |||
| } | |||
| // Returns identation spaces for end of block character | |||
| func (rule *Rule) indentEndBlock() string { | |||
| result := "" | |||
| for i := 0; i < (rule.EmbedLevel * indentSpace); i++ { | |||
| result += " " | |||
| } | |||
| return result | |||
| } | |||
| @@ -0,0 +1,25 @@ | |||
| package css | |||
| // Stylesheet represents a parsed stylesheet | |||
| type Stylesheet struct { | |||
| Rules []*Rule | |||
| } | |||
| // NewStylesheet instanciate a new Stylesheet | |||
| func NewStylesheet() *Stylesheet { | |||
| return &Stylesheet{} | |||
| } | |||
| // Returns string representation of the Stylesheet | |||
| func (sheet *Stylesheet) String() string { | |||
| result := "" | |||
| for _, rule := range sheet.Rules { | |||
| if result != "" { | |||
| result += "\n" | |||
| } | |||
| result += rule.String() | |||
| } | |||
| return result | |||
| } | |||
| @@ -0,0 +1,409 @@ | |||
| package parser | |||
| import ( | |||
| "errors" | |||
| "fmt" | |||
| "regexp" | |||
| "strings" | |||
| "github.com/gorilla/css/scanner" | |||
| "github.com/aymerick/douceur/css" | |||
| ) | |||
| const ( | |||
| importantSuffixRegexp = `(?i)\s*!important\s*$` | |||
| ) | |||
| var ( | |||
| importantRegexp *regexp.Regexp | |||
| ) | |||
| // Parser represents a CSS parser | |||
| type Parser struct { | |||
| scan *scanner.Scanner // Tokenizer | |||
| // Tokens parsed but not consumed yet | |||
| tokens []*scanner.Token | |||
| // Rule embedding level | |||
| embedLevel int | |||
| } | |||
| func init() { | |||
| importantRegexp = regexp.MustCompile(importantSuffixRegexp) | |||
| } | |||
| // NewParser instanciates a new parser | |||
| func NewParser(txt string) *Parser { | |||
| return &Parser{ | |||
| scan: scanner.New(txt), | |||
| } | |||
| } | |||
| // Parse parses a whole stylesheet | |||
| func Parse(text string) (*css.Stylesheet, error) { | |||
| result, err := NewParser(text).ParseStylesheet() | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| return result, nil | |||
| } | |||
| // ParseDeclarations parses CSS declarations | |||
| func ParseDeclarations(text string) ([]*css.Declaration, error) { | |||
| result, err := NewParser(text).ParseDeclarations() | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| return result, nil | |||
| } | |||
| // ParseStylesheet parses a stylesheet | |||
| func (parser *Parser) ParseStylesheet() (*css.Stylesheet, error) { | |||
| result := css.NewStylesheet() | |||
| // Parse BOM | |||
| if _, err := parser.parseBOM(); err != nil { | |||
| return result, err | |||
| } | |||
| // Parse list of rules | |||
| rules, err := parser.ParseRules() | |||
| if err != nil { | |||
| return result, err | |||
| } | |||
| result.Rules = rules | |||
| return result, nil | |||
| } | |||
| // ParseRules parses a list of rules | |||
| func (parser *Parser) ParseRules() ([]*css.Rule, error) { | |||
| result := []*css.Rule{} | |||
| inBlock := false | |||
| if parser.tokenChar("{") { | |||
| // parsing a block of rules | |||
| inBlock = true | |||
| parser.embedLevel++ | |||
| parser.shiftToken() | |||
| } | |||
| for parser.tokenParsable() { | |||
| if parser.tokenIgnorable() { | |||
| parser.shiftToken() | |||
| } else if parser.tokenChar("}") { | |||
| if !inBlock { | |||
| errMsg := fmt.Sprintf("Unexpected } character: %s", parser.nextToken().String()) | |||
| return result, errors.New(errMsg) | |||
| } | |||
| parser.shiftToken() | |||
| parser.embedLevel-- | |||
| // finished | |||
| break | |||
| } else { | |||
| rule, err := parser.ParseRule() | |||
| if err != nil { | |||
| return result, err | |||
| } | |||
| rule.EmbedLevel = parser.embedLevel | |||
| result = append(result, rule) | |||
| } | |||
| } | |||
| return result, parser.err() | |||
| } | |||
| // ParseRule parses a rule | |||
| func (parser *Parser) ParseRule() (*css.Rule, error) { | |||
| if parser.tokenAtKeyword() { | |||
| return parser.parseAtRule() | |||
| } | |||
| return parser.parseQualifiedRule() | |||
| } | |||
| // ParseDeclarations parses a list of declarations | |||
| func (parser *Parser) ParseDeclarations() ([]*css.Declaration, error) { | |||
| result := []*css.Declaration{} | |||
| if parser.tokenChar("{") { | |||
| parser.shiftToken() | |||
| } | |||
| for parser.tokenParsable() { | |||
| if parser.tokenIgnorable() { | |||
| parser.shiftToken() | |||
| } else if parser.tokenChar("}") { | |||
| // end of block | |||
| parser.shiftToken() | |||
| break | |||
| } else { | |||
| declaration, err := parser.ParseDeclaration() | |||
| if err != nil { | |||
| return result, err | |||
| } | |||
| result = append(result, declaration) | |||
| } | |||
| } | |||
| return result, parser.err() | |||
| } | |||
| // ParseDeclaration parses a declaration | |||
| func (parser *Parser) ParseDeclaration() (*css.Declaration, error) { | |||
| result := css.NewDeclaration() | |||
| curValue := "" | |||
| for parser.tokenParsable() { | |||
| if parser.tokenChar(":") { | |||
| result.Property = strings.TrimSpace(curValue) | |||
| curValue = "" | |||
| parser.shiftToken() | |||
| } else if parser.tokenChar(";") || parser.tokenChar("}") { | |||
| if result.Property == "" { | |||
| errMsg := fmt.Sprintf("Unexpected ; character: %s", parser.nextToken().String()) | |||
| return result, errors.New(errMsg) | |||
| } | |||
| if importantRegexp.MatchString(curValue) { | |||
| result.Important = true | |||
| curValue = importantRegexp.ReplaceAllString(curValue, "") | |||
| } | |||
| result.Value = strings.TrimSpace(curValue) | |||
| if parser.tokenChar(";") { | |||
| parser.shiftToken() | |||
| } | |||
| // finished | |||
| break | |||
| } else { | |||
| token := parser.shiftToken() | |||
| curValue += token.Value | |||
| } | |||
| } | |||
| // log.Printf("[parsed] Declaration: %s", result.String()) | |||
| return result, parser.err() | |||
| } | |||
| // Parse an At Rule | |||
| func (parser *Parser) parseAtRule() (*css.Rule, error) { | |||
| // parse rule name (eg: "@import") | |||
| token := parser.shiftToken() | |||
| result := css.NewRule(css.AtRule) | |||
| result.Name = token.Value | |||
| for parser.tokenParsable() { | |||
| if parser.tokenChar(";") { | |||
| parser.shiftToken() | |||
| // finished | |||
| break | |||
| } else if parser.tokenChar("{") { | |||
| if result.EmbedsRules() { | |||
| // parse rules block | |||
| rules, err := parser.ParseRules() | |||
| if err != nil { | |||
| return result, err | |||
| } | |||
| result.Rules = rules | |||
| } else { | |||
| // parse declarations block | |||
| declarations, err := parser.ParseDeclarations() | |||
| if err != nil { | |||
| return result, err | |||
| } | |||
| result.Declarations = declarations | |||
| } | |||
| // finished | |||
| break | |||
| } else { | |||
| // parse prelude | |||
| prelude, err := parser.parsePrelude() | |||
| if err != nil { | |||
| return result, err | |||
| } | |||
| result.Prelude = prelude | |||
| } | |||
| } | |||
| // log.Printf("[parsed] Rule: %s", result.String()) | |||
| return result, parser.err() | |||
| } | |||
| // Parse a Qualified Rule | |||
| func (parser *Parser) parseQualifiedRule() (*css.Rule, error) { | |||
| result := css.NewRule(css.QualifiedRule) | |||
| for parser.tokenParsable() { | |||
| if parser.tokenChar("{") { | |||
| if result.Prelude == "" { | |||
| errMsg := fmt.Sprintf("Unexpected { character: %s", parser.nextToken().String()) | |||
| return result, errors.New(errMsg) | |||
| } | |||
| // parse declarations block | |||
| declarations, err := parser.ParseDeclarations() | |||
| if err != nil { | |||
| return result, err | |||
| } | |||
| result.Declarations = declarations | |||
| // finished | |||
| break | |||
| } else { | |||
| // parse prelude | |||
| prelude, err := parser.parsePrelude() | |||
| if err != nil { | |||
| return result, err | |||
| } | |||
| result.Prelude = prelude | |||
| } | |||
| } | |||
| result.Selectors = strings.Split(result.Prelude, ",") | |||
| for i, sel := range result.Selectors { | |||
| result.Selectors[i] = strings.TrimSpace(sel) | |||
| } | |||
| // log.Printf("[parsed] Rule: %s", result.String()) | |||
| return result, parser.err() | |||
| } | |||
| // Parse Rule prelude | |||
| func (parser *Parser) parsePrelude() (string, error) { | |||
| result := "" | |||
| for parser.tokenParsable() && !parser.tokenEndOfPrelude() { | |||
| token := parser.shiftToken() | |||
| result += token.Value | |||
| } | |||
| result = strings.TrimSpace(result) | |||
| // log.Printf("[parsed] prelude: %s", result) | |||
| return result, parser.err() | |||
| } | |||
| // Parse BOM | |||
| func (parser *Parser) parseBOM() (bool, error) { | |||
| if parser.nextToken().Type == scanner.TokenBOM { | |||
| parser.shiftToken() | |||
| return true, nil | |||
| } | |||
| return false, parser.err() | |||
| } | |||
| // Returns next token without removing it from tokens buffer | |||
| func (parser *Parser) nextToken() *scanner.Token { | |||
| if len(parser.tokens) == 0 { | |||
| // fetch next token | |||
| nextToken := parser.scan.Next() | |||
| // log.Printf("[token] %s => %v", nextToken.Type.String(), nextToken.Value) | |||
| // queue it | |||
| parser.tokens = append(parser.tokens, nextToken) | |||
| } | |||
| return parser.tokens[0] | |||
| } | |||
| // Returns next token and remove it from the tokens buffer | |||
| func (parser *Parser) shiftToken() *scanner.Token { | |||
| var result *scanner.Token | |||
| result, parser.tokens = parser.tokens[0], parser.tokens[1:] | |||
| return result | |||
| } | |||
| // Returns tokenizer error, or nil if no error | |||
| func (parser *Parser) err() error { | |||
| if parser.tokenError() { | |||
| token := parser.nextToken() | |||
| return fmt.Errorf("Tokenizer error: %s", token.String()) | |||
| } | |||
| return nil | |||
| } | |||
| // Returns true if next token is Error | |||
| func (parser *Parser) tokenError() bool { | |||
| return parser.nextToken().Type == scanner.TokenError | |||
| } | |||
| // Returns true if next token is EOF | |||
| func (parser *Parser) tokenEOF() bool { | |||
| return parser.nextToken().Type == scanner.TokenEOF | |||
| } | |||
| // Returns true if next token is a whitespace | |||
| func (parser *Parser) tokenWS() bool { | |||
| return parser.nextToken().Type == scanner.TokenS | |||
| } | |||
| // Returns true if next token is a comment | |||
| func (parser *Parser) tokenComment() bool { | |||
| return parser.nextToken().Type == scanner.TokenComment | |||
| } | |||
| // Returns true if next token is a CDO or a CDC | |||
| func (parser *Parser) tokenCDOorCDC() bool { | |||
| switch parser.nextToken().Type { | |||
| case scanner.TokenCDO, scanner.TokenCDC: | |||
| return true | |||
| default: | |||
| return false | |||
| } | |||
| } | |||
| // Returns true if next token is ignorable | |||
| func (parser *Parser) tokenIgnorable() bool { | |||
| return parser.tokenWS() || parser.tokenComment() || parser.tokenCDOorCDC() | |||
| } | |||
| // Returns true if next token is parsable | |||
| func (parser *Parser) tokenParsable() bool { | |||
| return !parser.tokenEOF() && !parser.tokenError() | |||
| } | |||
| // Returns true if next token is an At Rule keyword | |||
| func (parser *Parser) tokenAtKeyword() bool { | |||
| return parser.nextToken().Type == scanner.TokenAtKeyword | |||
| } | |||
| // Returns true if next token is given character | |||
| func (parser *Parser) tokenChar(value string) bool { | |||
| token := parser.nextToken() | |||
| return (token.Type == scanner.TokenChar) && (token.Value == value) | |||
| } | |||
| // Returns true if next token marks the end of a prelude | |||
| func (parser *Parser) tokenEndOfPrelude() bool { | |||
| return parser.tokenChar(";") || parser.tokenChar("{") | |||
| } | |||
| @@ -0,0 +1,27 @@ | |||
| Copyright (c) 2013, Gorilla web toolkit | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without modification, | |||
| are permitted provided that the following conditions are met: | |||
| Redistributions of source code must retain the above copyright notice, this | |||
| list of conditions and the following disclaimer. | |||
| Redistributions in binary form must reproduce the above copyright notice, this | |||
| list of conditions and the following disclaimer in the documentation and/or | |||
| other materials provided with the distribution. | |||
| Neither the name of the {organization} nor the names of its | |||
| contributors may be used to endorse or promote products derived from | |||
| this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |||
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR | |||
| ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | |||
| ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| @@ -0,0 +1,33 @@ | |||
| // Copyright 2012 The Gorilla Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style | |||
| // license that can be found in the LICENSE file. | |||
| /* | |||
| Package gorilla/css/scanner generates tokens for a CSS3 input. | |||
| It follows the CSS3 specification located at: | |||
| http://www.w3.org/TR/css3-syntax/ | |||
| To use it, create a new scanner for a given CSS string and call Next() until | |||
| the token returned has type TokenEOF or TokenError: | |||
| s := scanner.New(myCSS) | |||
| for { | |||
| token := s.Next() | |||
| if token.Type == scanner.TokenEOF || token.Type == scanner.TokenError { | |||
| break | |||
| } | |||
| // Do something with the token... | |||
| } | |||
| Following the CSS3 specification, an error can only occur when the scanner | |||
| finds an unclosed quote or unclosed comment. In these cases the text becomes | |||
| "untokenizable". Everything else is tokenizable and it is up to a parser | |||
| to make sense of the token stream (or ignore nonsensical token sequences). | |||
| Note: the scanner doesn't perform lexical analysis or, in other words, it | |||
| doesn't care about the token context. It is intended to be used by a | |||
| lexer or parser. | |||
| */ | |||
| package scanner | |||
| @@ -0,0 +1,356 @@ | |||
| // Copyright 2012 The Gorilla Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style | |||
| // license that can be found in the LICENSE file. | |||
| package scanner | |||
| import ( | |||
| "fmt" | |||
| "regexp" | |||
| "strings" | |||
| "unicode" | |||
| "unicode/utf8" | |||
| ) | |||
| // tokenType identifies the type of lexical tokens. | |||
| type tokenType int | |||
| // String returns a string representation of the token type. | |||
| func (t tokenType) String() string { | |||
| return tokenNames[t] | |||
| } | |||
| // Token represents a token and the corresponding string. | |||
| type Token struct { | |||
| Type tokenType | |||
| Value string | |||
| Line int | |||
| Column int | |||
| } | |||
| // String returns a string representation of the token. | |||
| func (t *Token) String() string { | |||
| if len(t.Value) > 10 { | |||
| return fmt.Sprintf("%s (line: %d, column: %d): %.10q...", | |||
| t.Type, t.Line, t.Column, t.Value) | |||
| } | |||
| return fmt.Sprintf("%s (line: %d, column: %d): %q", | |||
| t.Type, t.Line, t.Column, t.Value) | |||
| } | |||
| // All tokens ----------------------------------------------------------------- | |||
| // The complete list of tokens in CSS3. | |||
| const ( | |||
| // Scanner flags. | |||
| TokenError tokenType = iota | |||
| TokenEOF | |||
| // From now on, only tokens from the CSS specification. | |||
| TokenIdent | |||
| TokenAtKeyword | |||
| TokenString | |||
| TokenHash | |||
| TokenNumber | |||
| TokenPercentage | |||
| TokenDimension | |||
| TokenURI | |||
| TokenUnicodeRange | |||
| TokenCDO | |||
| TokenCDC | |||
| TokenS | |||
| TokenComment | |||
| TokenFunction | |||
| TokenIncludes | |||
| TokenDashMatch | |||
| TokenPrefixMatch | |||
| TokenSuffixMatch | |||
| TokenSubstringMatch | |||
| TokenChar | |||
| TokenBOM | |||
| ) | |||
| // tokenNames maps tokenType's to their names. Used for conversion to string. | |||
| var tokenNames = map[tokenType]string{ | |||
| TokenError: "error", | |||
| TokenEOF: "EOF", | |||
| TokenIdent: "IDENT", | |||
| TokenAtKeyword: "ATKEYWORD", | |||
| TokenString: "STRING", | |||
| TokenHash: "HASH", | |||
| TokenNumber: "NUMBER", | |||
| TokenPercentage: "PERCENTAGE", | |||
| TokenDimension: "DIMENSION", | |||
| TokenURI: "URI", | |||
| TokenUnicodeRange: "UNICODE-RANGE", | |||
| TokenCDO: "CDO", | |||
| TokenCDC: "CDC", | |||
| TokenS: "S", | |||
| TokenComment: "COMMENT", | |||
| TokenFunction: "FUNCTION", | |||
| TokenIncludes: "INCLUDES", | |||
| TokenDashMatch: "DASHMATCH", | |||
| TokenPrefixMatch: "PREFIXMATCH", | |||
| TokenSuffixMatch: "SUFFIXMATCH", | |||
| TokenSubstringMatch: "SUBSTRINGMATCH", | |||
| TokenChar: "CHAR", | |||
| TokenBOM: "BOM", | |||
| } | |||
| // Macros and productions ----------------------------------------------------- | |||
| // http://www.w3.org/TR/css3-syntax/#tokenization | |||
| var macroRegexp = regexp.MustCompile(`\{[a-z]+\}`) | |||
| // macros maps macro names to patterns to be expanded. | |||
| var macros = map[string]string{ | |||
| // must be escaped: `\.+*?()|[]{}^$` | |||
| "ident": `-?{nmstart}{nmchar}*`, | |||
| "name": `{nmchar}+`, | |||
| "nmstart": `[a-zA-Z_]|{nonascii}|{escape}`, | |||
| "nonascii": "[\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]", | |||
| "unicode": `\\[0-9a-fA-F]{1,6}{wc}?`, | |||
| "escape": "{unicode}|\\\\[\u0020-\u007E\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]", | |||
| "nmchar": `[a-zA-Z0-9_-]|{nonascii}|{escape}`, | |||
| "num": `[0-9]*\.[0-9]+|[0-9]+`, | |||
| "string": `"(?:{stringchar}|')*"|'(?:{stringchar}|")*'`, | |||
| "stringchar": `{urlchar}|[ ]|\\{nl}`, | |||
| "nl": `[\n\r\f]|\r\n`, | |||
| "w": `{wc}*`, | |||
| "wc": `[\t\n\f\r ]`, | |||
| // urlchar should accept [(ascii characters minus those that need escaping)|{nonascii}|{escape}] | |||
| // ASCII characters range = `[\u0020-\u007e]` | |||
| // Skip space \u0020 = `[\u0021-\u007e]` | |||
| // Skip quotation mark \0022 = `[\u0021\u0023-\u007e]` | |||
| // Skip apostrophe \u0027 = `[\u0021\u0023-\u0026\u0028-\u007e]` | |||
| // Skip reverse solidus \u005c = `[\u0021\u0023-\u0026\u0028-\u005b\u005d\u007e]` | |||
| // Finally, the left square bracket (\u005b) and right (\u005d) needs escaping themselves | |||
| "urlchar": "[\u0021\u0023-\u0026\u0028-\\\u005b\\\u005d-\u007E]|{nonascii}|{escape}", | |||
| } | |||
| // productions maps the list of tokens to patterns to be expanded. | |||
| var productions = map[tokenType]string{ | |||
| // Unused regexps (matched using other methods) are commented out. | |||
| TokenIdent: `{ident}`, | |||
| TokenAtKeyword: `@{ident}`, | |||
| TokenString: `{string}`, | |||
| TokenHash: `#{name}`, | |||
| TokenNumber: `{num}`, | |||
| TokenPercentage: `{num}%`, | |||
| TokenDimension: `{num}{ident}`, | |||
| TokenURI: `url\({w}(?:{string}|{urlchar}*?){w}\)`, | |||
| TokenUnicodeRange: `U\+[0-9A-F\?]{1,6}(?:-[0-9A-F]{1,6})?`, | |||
| //TokenCDO: `<!--`, | |||
| TokenCDC: `-->`, | |||
| TokenS: `{wc}+`, | |||
| TokenComment: `/\*[^\*]*[\*]+(?:[^/][^\*]*[\*]+)*/`, | |||
| TokenFunction: `{ident}\(`, | |||
| //TokenIncludes: `~=`, | |||
| //TokenDashMatch: `\|=`, | |||
| //TokenPrefixMatch: `\^=`, | |||
| //TokenSuffixMatch: `\$=`, | |||
| //TokenSubstringMatch: `\*=`, | |||
| //TokenChar: `[^"']`, | |||
| //TokenBOM: "\uFEFF", | |||
| } | |||
| // matchers maps the list of tokens to compiled regular expressions. | |||
| // | |||
| // The map is filled on init() using the macros and productions defined in | |||
| // the CSS specification. | |||
| var matchers = map[tokenType]*regexp.Regexp{} | |||
| // matchOrder is the order to test regexps when first-char shortcuts | |||
| // can't be used. | |||
| var matchOrder = []tokenType{ | |||
| TokenURI, | |||
| TokenFunction, | |||
| TokenUnicodeRange, | |||
| TokenIdent, | |||
| TokenDimension, | |||
| TokenPercentage, | |||
| TokenNumber, | |||
| TokenCDC, | |||
| } | |||
| func init() { | |||
| // replace macros and compile regexps for productions. | |||
| replaceMacro := func(s string) string { | |||
| return "(?:" + macros[s[1:len(s)-1]] + ")" | |||
| } | |||
| for t, s := range productions { | |||
| for macroRegexp.MatchString(s) { | |||
| s = macroRegexp.ReplaceAllStringFunc(s, replaceMacro) | |||
| } | |||
| matchers[t] = regexp.MustCompile("^(?:" + s + ")") | |||
| } | |||
| } | |||
| // Scanner -------------------------------------------------------------------- | |||
| // New returns a new CSS scanner for the given input. | |||
| func New(input string) *Scanner { | |||
| // Normalize newlines. | |||
| input = strings.Replace(input, "\r\n", "\n", -1) | |||
| return &Scanner{ | |||
| input: input, | |||
| row: 1, | |||
| col: 1, | |||
| } | |||
| } | |||
| // Scanner scans an input and emits tokens following the CSS3 specification. | |||
| type Scanner struct { | |||
| input string | |||
| pos int | |||
| row int | |||
| col int | |||
| err *Token | |||
| } | |||
| // Next returns the next token from the input. | |||
| // | |||
| // At the end of the input the token type is TokenEOF. | |||
| // | |||
| // If the input can't be tokenized the token type is TokenError. This occurs | |||
| // in case of unclosed quotation marks or comments. | |||
| func (s *Scanner) Next() *Token { | |||
| if s.err != nil { | |||
| return s.err | |||
| } | |||
| if s.pos >= len(s.input) { | |||
| s.err = &Token{TokenEOF, "", s.row, s.col} | |||
| return s.err | |||
| } | |||
| if s.pos == 0 { | |||
| // Test BOM only once, at the beginning of the file. | |||
| if strings.HasPrefix(s.input, "\uFEFF") { | |||
| return s.emitSimple(TokenBOM, "\uFEFF") | |||
| } | |||
| } | |||
| // There's a lot we can guess based on the first byte so we'll take a | |||
| // shortcut before testing multiple regexps. | |||
| input := s.input[s.pos:] | |||
| switch input[0] { | |||
| case '\t', '\n', '\f', '\r', ' ': | |||
| // Whitespace. | |||
| return s.emitToken(TokenS, matchers[TokenS].FindString(input)) | |||
| case '.': | |||
| // Dot is too common to not have a quick check. | |||
| // We'll test if this is a Char; if it is followed by a number it is a | |||
| // dimension/percentage/number, and this will be matched later. | |||
| if len(input) > 1 && !unicode.IsDigit(rune(input[1])) { | |||
| return s.emitSimple(TokenChar, ".") | |||
| } | |||
| case '#': | |||
| // Another common one: Hash or Char. | |||
| if match := matchers[TokenHash].FindString(input); match != "" { | |||
| return s.emitToken(TokenHash, match) | |||
| } | |||
| return s.emitSimple(TokenChar, "#") | |||
| case '@': | |||
| // Another common one: AtKeyword or Char. | |||
| if match := matchers[TokenAtKeyword].FindString(input); match != "" { | |||
| return s.emitSimple(TokenAtKeyword, match) | |||
| } | |||
| return s.emitSimple(TokenChar, "@") | |||
| case ':', ',', ';', '%', '&', '+', '=', '>', '(', ')', '[', ']', '{', '}': | |||
| // More common chars. | |||
| return s.emitSimple(TokenChar, string(input[0])) | |||
| case '"', '\'': | |||
| // String or error. | |||
| match := matchers[TokenString].FindString(input) | |||
| if match != "" { | |||
| return s.emitToken(TokenString, match) | |||
| } | |||
| s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col} | |||
| return s.err | |||
| case '/': | |||
| // Comment, error or Char. | |||
| if len(input) > 1 && input[1] == '*' { | |||
| match := matchers[TokenComment].FindString(input) | |||
| if match != "" { | |||
| return s.emitToken(TokenComment, match) | |||
| } else { | |||
| s.err = &Token{TokenError, "unclosed comment", s.row, s.col} | |||
| return s.err | |||
| } | |||
| } | |||
| return s.emitSimple(TokenChar, "/") | |||
| case '~': | |||
| // Includes or Char. | |||
| return s.emitPrefixOrChar(TokenIncludes, "~=") | |||
| case '|': | |||
| // DashMatch or Char. | |||
| return s.emitPrefixOrChar(TokenDashMatch, "|=") | |||
| case '^': | |||
| // PrefixMatch or Char. | |||
| return s.emitPrefixOrChar(TokenPrefixMatch, "^=") | |||
| case '$': | |||
| // SuffixMatch or Char. | |||
| return s.emitPrefixOrChar(TokenSuffixMatch, "$=") | |||
| case '*': | |||
| // SubstringMatch or Char. | |||
| return s.emitPrefixOrChar(TokenSubstringMatch, "*=") | |||
| case '<': | |||
| // CDO or Char. | |||
| return s.emitPrefixOrChar(TokenCDO, "<!--") | |||
| } | |||
| // Test all regexps, in order. | |||
| for _, token := range matchOrder { | |||
| if match := matchers[token].FindString(input); match != "" { | |||
| return s.emitToken(token, match) | |||
| } | |||
| } | |||
| // We already handled unclosed quotation marks and comments, | |||
| // so this can only be a Char. | |||
| r, width := utf8.DecodeRuneInString(input) | |||
| token := &Token{TokenChar, string(r), s.row, s.col} | |||
| s.col += width | |||
| s.pos += width | |||
| return token | |||
| } | |||
| // updatePosition updates input coordinates based on the consumed text. | |||
| func (s *Scanner) updatePosition(text string) { | |||
| width := utf8.RuneCountInString(text) | |||
| lines := strings.Count(text, "\n") | |||
| s.row += lines | |||
| if lines == 0 { | |||
| s.col += width | |||
| } else { | |||
| s.col = utf8.RuneCountInString(text[strings.LastIndex(text, "\n"):]) | |||
| } | |||
| s.pos += len(text) // while col is a rune index, pos is a byte index | |||
| } | |||
| // emitToken returns a Token for the string v and updates the scanner position. | |||
| func (s *Scanner) emitToken(t tokenType, v string) *Token { | |||
| token := &Token{t, v, s.row, s.col} | |||
| s.updatePosition(v) | |||
| return token | |||
| } | |||
| // emitSimple returns a Token for the string v and updates the scanner | |||
| // position in a simplified manner. | |||
| // | |||
| // The string is known to have only ASCII characters and to not have a newline. | |||
| func (s *Scanner) emitSimple(t tokenType, v string) *Token { | |||
| token := &Token{t, v, s.row, s.col} | |||
| s.col += len(v) | |||
| s.pos += len(v) | |||
| return token | |||
| } | |||
| // emitPrefixOrChar returns a Token for type t if the current position | |||
| // matches the given prefix. Otherwise it returns a Char token using the | |||
| // first character from the prefix. | |||
| // | |||
| // The prefix is known to have only ASCII characters and to not have a newline. | |||
| func (s *Scanner) emitPrefixOrChar(t tokenType, prefix string) *Token { | |||
| if strings.HasPrefix(s.input[s.pos:], prefix) { | |||
| return s.emitSimple(t, prefix) | |||
| } | |||
| return s.emitSimple(TokenChar, string(prefix[0])) | |||
| } | |||
| @@ -0,0 +1 @@ | |||
| repo_token: x2wlA1x0X8CK45ybWpZRCVRB4g7vtkhaw | |||
| @@ -0,0 +1,4 @@ | |||
| root = true | |||
| [*] | |||
| end_of_line = lf | |||
| @@ -0,0 +1 @@ | |||
| * text=auto eol=lf | |||
| @@ -0,0 +1,15 @@ | |||
| # Binaries for programs and plugins | |||
| *.exe | |||
| *.exe~ | |||
| *.dll | |||
| *.so | |||
| *.dylib | |||
| # Test binary, built with `go test -c` | |||
| *.test | |||
| # Output of the go coverage tool, specifically when used with LiteIDE | |||
| *.out | |||
| # goland idea folder | |||
| *.idea | |||
| @@ -0,0 +1,26 @@ | |||
| language: go | |||
| go: | |||
| - 1.2.x | |||
| - 1.3.x | |||
| - 1.4.x | |||
| - 1.5.x | |||
| - 1.6.x | |||
| - 1.7.x | |||
| - 1.8.x | |||
| - 1.9.x | |||
| - 1.10.x | |||
| - 1.11.x | |||
| - 1.12.x | |||
| - 1.13.x | |||
| - 1.14.x | |||
| - 1.15.x | |||
| - 1.16.x | |||
| - tip | |||
| matrix: | |||
| allow_failures: | |||
| - go: tip | |||
| fast_finish: true | |||
| install: | |||
| - go get . | |||
| script: | |||
| - go test -v ./... | |||
| @@ -0,0 +1,52 @@ | |||
| # Contributing to bluemonday | |||
| Third-party patches are essential for keeping bluemonday secure and offering the features developers want. However there are a few guidelines that we need contributors to follow so that we can maintain the quality of work that developers who use bluemonday expect. | |||
| ## Getting Started | |||
| * Make sure you have a [Github account](https://github.com/signup/free) | |||
| ## Guidelines | |||
| 1. Do not vendor dependencies. As a security package, were we to vendor dependencies the projects that then vendor bluemonday may not receive the latest security updates to the dependencies. By not vendoring dependencies the project that implements bluemonday will vendor the latest version of any dependent packages. Vendoring is a project problem, not a package problem. bluemonday will be tested against the latest version of dependencies periodically and during any PR/merge. | |||
| 2. I do not care about spelling mistakes or whitespace and I do not believe that you should either. PRs therefore must be functional in their nature or be substantial and impactful if documentation or examples. | |||
| ## Submitting an Issue | |||
| * Submit a ticket for your issue, assuming one does not already exist | |||
| * Clearly describe the issue including the steps to reproduce (with sample input and output) if it is a bug | |||
| If you are reporting a security flaw, you may expect that we will provide the code to fix it for you. Otherwise you may want to submit a pull request to ensure the resolution is applied sooner rather than later: | |||
| * Fork the repository on Github | |||
| * Issue a pull request containing code to resolve the issue | |||
| ## Submitting a Pull Request | |||
| * Submit a ticket for your issue, assuming one does not already exist | |||
| * Describe the reason for the pull request and if applicable show some example inputs and outputs to demonstrate what the patch does | |||
| * Fork the repository on Github | |||
| * Before submitting the pull request you should | |||
| 1. Include tests for your patch, 1 test should encapsulate the entire patch and should refer to the Github issue | |||
| 1. If you have added new exposed/public functionality, you should ensure it is documented appropriately | |||
| 1. If you have added new exposed/public functionality, you should consider demonstrating how to use it within one of the helpers or shipped policies if appropriate or within a test if modifying a helper or policy is not appropriate | |||
| 1. Run all of the tests `go test -v ./...` or `make test` and ensure all tests pass | |||
| 1. Run gofmt `gofmt -w ./$*` or `make fmt` | |||
| 1. Run vet `go tool vet *.go` or `make vet` and resolve any issues | |||
| 1. Install golint using `go get -u github.com/golang/lint/golint` and run vet `golint *.go` or `make lint` and resolve every warning | |||
| * When submitting the pull request you should | |||
| 1. Note the issue(s) it resolves, i.e. `Closes #6` in the pull request comment to close issue #6 when the pull request is accepted | |||
| Once you have submitted a pull request, we *may* merge it without changes. If we have any comments or feedback, or need you to make changes to your pull request we will update the Github pull request or the associated issue. We expect responses from you within two weeks, and we may close the pull request is there is no activity. | |||
| ### Contributor Licence Agreement | |||
| We haven't gone for the formal "Sign a Contributor Licence Agreement" thing that projects like [puppet](https://cla.puppetlabs.com/), [Mojito](https://developer.yahoo.com/cocktails/mojito/cla/) and companies like [Google](http://code.google.com/legal/individual-cla-v1.0.html) are using. | |||
| But we do need to know that we can accept and merge your contributions, so for now the act of contributing a pull request should be considered equivalent to agreeing to a contributor licence agreement, specifically: | |||
| You accept that the act of submitting code to the bluemonday project is to grant a copyright licence to the project that is perpetual, worldwide, non-exclusive, no-charge, royalty free and irrevocable. | |||
| You accept that all who comply with the licence of the project (BSD 3-clause) are permitted to use your contributions to the project. | |||
| You accept, and by submitting code do declare, that you have the legal right to grant such a licence to the project and that each of the contributions is your own original creation. | |||
| @@ -0,0 +1,8 @@ | |||
| 1. John Graham-Cumming http://jgc.org/ | |||
| 1. Mohammad Gufran https://github.com/Gufran | |||
| 1. Steven Gutzwiller https://github.com/StevenGutzwiller | |||
| 1. Andrew Krasichkov @buglloc https://github.com/buglloc | |||
| 1. Mike Samuel mikesamuel@gmail.com | |||
| 1. Dmitri Shuralyov shurcooL@gmail.com | |||
| 1. opennota https://github.com/opennota https://gitlab.com/opennota | |||
| 1. Tom Anthony https://www.tomanthony.co.uk/ | |||
| @@ -0,0 +1,28 @@ | |||
| Copyright (c) 2014, David Kitchen <david@buro9.com> | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are met: | |||
| * Redistributions of source code must retain the above copyright notice, this | |||
| list of conditions and the following disclaimer. | |||
| * Redistributions in binary form must reproduce the above copyright notice, | |||
| this list of conditions and the following disclaimer in the documentation | |||
| and/or other materials provided with the distribution. | |||
| * Neither the name of the organisation (Microcosm) nor the names of its | |||
| contributors may be used to endorse or promote products derived from | |||
| this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |||
| FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| @@ -0,0 +1,48 @@ | |||
| # Targets: | |||
| # | |||
| # all: Builds the code locally after testing | |||
| # | |||
| # fmt: Formats the source files | |||
| # fmt-check: Check if the source files are formated | |||
| # build: Builds the code locally | |||
| # vet: Vets the code | |||
| # lint: Runs lint over the code (you do not need to fix everything) | |||
| # test: Runs the tests | |||
| # cover: Gives you the URL to a nice test coverage report | |||
| # | |||
| # install: Builds, tests and installs the code locally | |||
| GOFILES_NOVENDOR = $(shell find . -type f -name '*.go' -not -path "./vendor/*" -not -path "./.git/*") | |||
| .PHONY: all fmt build vet lint test cover install | |||
| # The first target is always the default action if `make` is called without | |||
| # args we build and install into $GOPATH so that it can just be run | |||
| all: fmt vet test install | |||
| fmt: | |||
| @gofmt -s -w ${GOFILES_NOVENDOR} | |||
| fmt-check: | |||
| @([ -z "$(shell gofmt -d $(GOFILES_NOVENDOR) | head)" ]) || (echo "Source is unformatted"; exit 1) | |||
| build: | |||
| @go build | |||
| vet: | |||
| @go vet | |||
| lint: | |||
| @golint *.go | |||
| test: | |||
| @go test -v ./... | |||
| cover: COVERAGE_FILE := coverage.out | |||
| cover: | |||
| @go test -coverprofile=$(COVERAGE_FILE) && \ | |||
| cover -html=$(COVERAGE_FILE) && rm $(COVERAGE_FILE) | |||
| install: | |||
| @go install ./... | |||
| @@ -0,0 +1,418 @@ | |||
| # bluemonday [](https://travis-ci.org/microcosm-cc/bluemonday) [](https://godoc.org/github.com/microcosm-cc/bluemonday) [](https://sourcegraph.com/github.com/microcosm-cc/bluemonday?badge) | |||
| bluemonday is a HTML sanitizer implemented in Go. It is fast and highly configurable. | |||
| bluemonday takes untrusted user generated content as an input, and will return HTML that has been sanitised against an allowlist of approved HTML elements and attributes so that you can safely include the content in your web page. | |||
| If you accept user generated content, and your server uses Go, you **need** bluemonday. | |||
| The default policy for user generated content (`bluemonday.UGCPolicy().Sanitize()`) turns this: | |||
| ```html | |||
| Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World | |||
| ``` | |||
| Into a harmless: | |||
| ```html | |||
| Hello World | |||
| ``` | |||
| And it turns this: | |||
| ```html | |||
| <a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a> | |||
| ``` | |||
| Into this: | |||
| ```html | |||
| XSS | |||
| ``` | |||
| Whilst still allowing this: | |||
| ```html | |||
| <a href="http://www.google.com/"> | |||
| <img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/> | |||
| </a> | |||
| ``` | |||
| To pass through mostly unaltered (it gained a rel="nofollow" which is a good thing for user generated content): | |||
| ```html | |||
| <a href="http://www.google.com/" rel="nofollow"> | |||
| <img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/> | |||
| </a> | |||
| ``` | |||
| It protects sites from [XSS](http://en.wikipedia.org/wiki/Cross-site_scripting) attacks. There are many [vectors for an XSS attack](https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet) and the best way to mitigate the risk is to sanitize user input against a known safe list of HTML elements and attributes. | |||
| You should **always** run bluemonday **after** any other processing. | |||
| If you use [blackfriday](https://github.com/russross/blackfriday) or [Pandoc](http://johnmacfarlane.net/pandoc/) then bluemonday should be run after these steps. This ensures that no insecure HTML is introduced later in your process. | |||
| bluemonday is heavily inspired by both the [OWASP Java HTML Sanitizer](https://code.google.com/p/owasp-java-html-sanitizer/) and the [HTML Purifier](http://htmlpurifier.org/). | |||
| ## Technical Summary | |||
| Allowlist based, you need to either build a policy describing the HTML elements and attributes to permit (and the `regexp` patterns of attributes), or use one of the supplied policies representing good defaults. | |||
| The policy containing the allowlist is applied using a fast non-validating, forward only, token-based parser implemented in the [Go net/html library](https://godoc.org/golang.org/x/net/html) by the core Go team. | |||
| We expect to be supplied with well-formatted HTML (closing elements for every applicable open element, nested correctly) and so we do not focus on repairing badly nested or incomplete HTML. We focus on simply ensuring that whatever elements do exist are described in the policy allowlist and that attributes and links are safe for use on your web page. [GIGO](http://en.wikipedia.org/wiki/Garbage_in,_garbage_out) does apply and if you feed it bad HTML bluemonday is not tasked with figuring out how to make it good again. | |||
| ### Supported Go Versions | |||
| bluemonday is tested on all versions since Go 1.2 including tip. | |||
| We do not support Go 1.0 as we depend on `golang.org/x/net/html` which includes a reference to `io.ErrNoProgress` which did not exist in Go 1.0. | |||
| We support Go 1.1 but Travis no longer tests against it. | |||
| ## Is it production ready? | |||
| *Yes* | |||
| We are using bluemonday in production having migrated from the widely used and heavily field tested OWASP Java HTML Sanitizer. | |||
| We are passing our extensive test suite (including AntiSamy tests as well as tests for any issues raised). Check for any [unresolved issues](https://github.com/microcosm-cc/bluemonday/issues?page=1&state=open) to see whether anything may be a blocker for you. | |||
| We invite pull requests and issues to help us ensure we are offering comprehensive protection against various attacks via user generated content. | |||
| ## Usage | |||
| Install in your `${GOPATH}` using `go get -u github.com/microcosm-cc/bluemonday` | |||
| Then call it: | |||
| ```go | |||
| package main | |||
| import ( | |||
| "fmt" | |||
| "github.com/microcosm-cc/bluemonday" | |||
| ) | |||
| func main() { | |||
| // Do this once for each unique policy, and use the policy for the life of the program | |||
| // Policy creation/editing is not safe to use in multiple goroutines | |||
| p := bluemonday.UGCPolicy() | |||
| // The policy can then be used to sanitize lots of input and it is safe to use the policy in multiple goroutines | |||
| html := p.Sanitize( | |||
| `<a onblur="alert(secret)" href="http://www.google.com">Google</a>`, | |||
| ) | |||
| // Output: | |||
| // <a href="http://www.google.com" rel="nofollow">Google</a> | |||
| fmt.Println(html) | |||
| } | |||
| ``` | |||
| We offer three ways to call Sanitize: | |||
| ```go | |||
| p.Sanitize(string) string | |||
| p.SanitizeBytes([]byte) []byte | |||
| p.SanitizeReader(io.Reader) bytes.Buffer | |||
| ``` | |||
| If you are obsessed about performance, `p.SanitizeReader(r).Bytes()` will return a `[]byte` without performing any unnecessary casting of the inputs or outputs. Though the difference is so negligible you should never need to care. | |||
| You can build your own policies: | |||
| ```go | |||
| package main | |||
| import ( | |||
| "fmt" | |||
| "github.com/microcosm-cc/bluemonday" | |||
| ) | |||
| func main() { | |||
| p := bluemonday.NewPolicy() | |||
| // Require URLs to be parseable by net/url.Parse and either: | |||
| // mailto: http:// or https:// | |||
| p.AllowStandardURLs() | |||
| // We only allow <p> and <a href=""> | |||
| p.AllowAttrs("href").OnElements("a") | |||
| p.AllowElements("p") | |||
| html := p.Sanitize( | |||
| `<a onblur="alert(secret)" href="http://www.google.com">Google</a>`, | |||
| ) | |||
| // Output: | |||
| // <a href="http://www.google.com">Google</a> | |||
| fmt.Println(html) | |||
| } | |||
| ``` | |||
| We ship two default policies: | |||
| 1. `bluemonday.StrictPolicy()` which can be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist. An example usage scenario would be blog post titles where HTML tags are not expected at all and if they are then the elements *and* the content of the elements should be stripped. This is a *very* strict policy. | |||
| 2. `bluemonday.UGCPolicy()` which allows a broad selection of HTML elements and attributes that are safe for user generated content. Note that this policy does *not* allow iframes, object, embed, styles, script, etc. An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs. | |||
| ## Policy Building | |||
| The essence of building a policy is to determine which HTML elements and attributes are considered safe for your scenario. OWASP provide an [XSS prevention cheat sheet](https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet) to help explain the risks, but essentially: | |||
| 1. Avoid anything other than the standard HTML elements | |||
| 1. Avoid `script`, `style`, `iframe`, `object`, `embed`, `base` elements that allow code to be executed by the client or third party content to be included that can execute code | |||
| 1. Avoid anything other than plain HTML attributes with values matched to a regexp | |||
| Basically, you should be able to describe what HTML is fine for your scenario. If you do not have confidence that you can describe your policy please consider using one of the shipped policies such as `bluemonday.UGCPolicy()`. | |||
| To create a new policy: | |||
| ```go | |||
| p := bluemonday.NewPolicy() | |||
| ``` | |||
| To add elements to a policy either add just the elements: | |||
| ```go | |||
| p.AllowElements("b", "strong") | |||
| ``` | |||
| Or using a regex: | |||
| _Note: if an element is added by name as shown above, any matching regex will be ignored_ | |||
| It is also recommended to ensure multiple patterns don't overlap as order of execution is not guaranteed and can result in some rules being missed. | |||
| ```go | |||
| p.AllowElementsMatching(regex.MustCompile(`^my-element-`)) | |||
| ``` | |||
| Or add elements as a virtue of adding an attribute: | |||
| ```go | |||
| // Note the recommended pattern, see the recommendation on using .Matching() below | |||
| p.AllowAttrs("nowrap").OnElements("td", "th") | |||
| ``` | |||
| Again, this also supports a regex pattern match alternative: | |||
| ```go | |||
| p.AllowAttrs("nowrap").OnElementsMatching(regex.MustCompile(`^my-element-`)) | |||
| ``` | |||
| Attributes can either be added to all elements: | |||
| ```go | |||
| p.AllowAttrs("dir").Matching(regexp.MustCompile("(?i)rtl|ltr")).Globally() | |||
| ``` | |||
| Or attributes can be added to specific elements: | |||
| ```go | |||
| // Not the recommended pattern, see the recommendation on using .Matching() below | |||
| p.AllowAttrs("value").OnElements("li") | |||
| ``` | |||
| It is **always** recommended that an attribute be made to match a pattern. XSS in HTML attributes is very easy otherwise: | |||
| ```go | |||
| // \p{L} matches unicode letters, \p{N} matches unicode numbers | |||
| p.AllowAttrs("title").Matching(regexp.MustCompile(`[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&]*`)).Globally() | |||
| ``` | |||
| You can stop at any time and call .Sanitize(): | |||
| ```go | |||
| // string htmlIn passed in from a HTTP POST | |||
| htmlOut := p.Sanitize(htmlIn) | |||
| ``` | |||
| And you can take any existing policy and extend it: | |||
| ```go | |||
| p := bluemonday.UGCPolicy() | |||
| p.AllowElements("fieldset", "select", "option") | |||
| ``` | |||
| ### Inline CSS | |||
| Although it's possible to handle inline CSS using `AllowAttrs` with a `Matching` rule, writing a single monolithic regular expression to safely process all inline CSS which you wish to allow is not a trivial task. Instead of attempting to do so, you can allow the `style` attribute on whichever element(s) you desire and use style policies to control and sanitize inline styles. | |||
| It is strongly recommended that you use `Matching` (with a suitable regular expression) | |||
| `MatchingEnum`, or `MatchingHandler` to ensure each style matches your needs, | |||
| but default handlers are supplied for most widely used styles. | |||
| Similar to attributes, you can allow specific CSS properties to be set inline: | |||
| ```go | |||
| p.AllowAttrs("style").OnElements("span", "p") | |||
| // Allow the 'color' property with valid RGB(A) hex values only (on any element allowed a 'style' attribute) | |||
| p.AllowStyles("color").Matching(regexp.MustCompile("(?i)^#([0-9a-f]{3,4}|[0-9a-f]{6}|[0-9a-f]{8})$")).Globally() | |||
| ``` | |||
| Additionally, you can allow a CSS property to be set only to an allowed value: | |||
| ```go | |||
| p.AllowAttrs("style").OnElements("span", "p") | |||
| // Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none' | |||
| // on 'span' elements only | |||
| p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElements("span") | |||
| ``` | |||
| Or you can specify elements based on a regex pattern match: | |||
| ```go | |||
| p.AllowAttrs("style").OnElementsMatching(regex.MustCompile(`^my-element-`)) | |||
| // Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none' | |||
| // on 'span' elements only | |||
| p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElementsMatching(regex.MustCompile(`^my-element-`)) | |||
| ``` | |||
| If you need more specific checking, you can create a handler that takes in a string and returns a bool to | |||
| validate the values for a given property. The string parameter has been | |||
| converted to lowercase and unicode code points have been converted. | |||
| ```go | |||
| myHandler := func(value string) bool{ | |||
| // Validate your input here | |||
| return true | |||
| } | |||
| p.AllowAttrs("style").OnElements("span", "p") | |||
| // Allow the 'color' property with values validated by the handler (on any element allowed a 'style' attribute) | |||
| p.AllowStyles("color").MatchingHandler(myHandler).Globally() | |||
| ``` | |||
| ### Links | |||
| Links are difficult beasts to sanitise safely and also one of the biggest attack vectors for malicious content. | |||
| It is possible to do this: | |||
| ```go | |||
| p.AllowAttrs("href").Matching(regexp.MustCompile(`(?i)mailto|https?`)).OnElements("a") | |||
| ``` | |||
| But that will not protect you as the regular expression is insufficient in this case to have prevented a malformed value doing something unexpected. | |||
| We provide some additional global options for safely working with links. | |||
| `RequireParseableURLs` will ensure that URLs are parseable by Go's `net/url` package: | |||
| ```go | |||
| p.RequireParseableURLs(true) | |||
| ``` | |||
| If you have enabled parseable URLs then the following option will `AllowRelativeURLs`. By default this is disabled (bluemonday is an allowlist tool... you need to explicitly tell us to permit things) and when disabled it will prevent all local and scheme relative URLs (i.e. `href="localpage.html"`, `href="../home.html"` and even `href="//www.google.com"` are relative): | |||
| ```go | |||
| p.AllowRelativeURLs(true) | |||
| ``` | |||
| If you have enabled parseable URLs then you can allow the schemes (commonly called protocol when thinking of `http` and `https`) that are permitted. Bear in mind that allowing relative URLs in the above option will allow for a blank scheme: | |||
| ```go | |||
| p.AllowURLSchemes("mailto", "http", "https") | |||
| ``` | |||
| Regardless of whether you have enabled parseable URLs, you can force all URLs to have a rel="nofollow" attribute. This will be added if it does not exist, but only when the `href` is valid: | |||
| ```go | |||
| // This applies to "a" "area" "link" elements that have a "href" attribute | |||
| p.RequireNoFollowOnLinks(true) | |||
| ``` | |||
| Similarly, you can force all URLs to have "noreferrer" in their rel attribute. | |||
| ```go | |||
| // This applies to "a" "area" "link" elements that have a "href" attribute | |||
| p.RequireNoReferrerOnLinks(true) | |||
| ``` | |||
| We provide a convenience method that applies all of the above, but you will still need to allow the linkable elements for the URL rules to be applied to: | |||
| ```go | |||
| p.AllowStandardURLs() | |||
| p.AllowAttrs("cite").OnElements("blockquote", "q") | |||
| p.AllowAttrs("href").OnElements("a", "area") | |||
| p.AllowAttrs("src").OnElements("img") | |||
| ``` | |||
| An additional complexity regarding links is the data URI as defined in [RFC2397](http://tools.ietf.org/html/rfc2397). The data URI allows for images to be served inline using this format: | |||
| ```html | |||
| <img src=""> | |||
| ``` | |||
| We have provided a helper to verify the mimetype followed by base64 content of data URIs links: | |||
| ```go | |||
| p.AllowDataURIImages() | |||
| ``` | |||
| That helper will enable GIF, JPEG, PNG and WEBP images. | |||
| It should be noted that there is a potential [security](http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/) [risk](https://capec.mitre.org/data/definitions/244.html) with the use of data URI links. You should only enable data URI links if you already trust the content. | |||
| We also have some features to help deal with user generated content: | |||
| ```go | |||
| p.AddTargetBlankToFullyQualifiedLinks(true) | |||
| ``` | |||
| This will ensure that anchor `<a href="" />` links that are fully qualified (the href destination includes a host name) will get `target="_blank"` added to them. | |||
| Additionally any link that has `target="_blank"` after the policy has been applied will also have the `rel` attribute adjusted to add `noopener`. This means a link may start like `<a href="//host/path"/>` and will end up as `<a href="//host/path" rel="noopener" target="_blank">`. It is important to note that the addition of `noopener` is a security feature and not an issue. There is an unfortunate feature to browsers that a browser window opened as a result of `target="_blank"` can still control the opener (your web page) and this protects against that. The background to this can be found here: [https://dev.to/ben/the-targetblank-vulnerability-by-example](https://dev.to/ben/the-targetblank-vulnerability-by-example) | |||
| ### Policy Building Helpers | |||
| We also bundle some helpers to simplify policy building: | |||
| ```go | |||
| // Permits the "dir", "id", "lang", "title" attributes globally | |||
| p.AllowStandardAttributes() | |||
| // Permits the "img" element and its standard attributes | |||
| p.AllowImages() | |||
| // Permits ordered and unordered lists, and also definition lists | |||
| p.AllowLists() | |||
| // Permits HTML tables and all applicable elements and non-styling attributes | |||
| p.AllowTables() | |||
| ``` | |||
| ### Invalid Instructions | |||
| The following are invalid: | |||
| ```go | |||
| // This does not say where the attributes are allowed, you need to add | |||
| // .Globally() or .OnElements(...) | |||
| // This will be ignored without error. | |||
| p.AllowAttrs("value") | |||
| // This does not say where the attributes are allowed, you need to add | |||
| // .Globally() or .OnElements(...) | |||
| // This will be ignored without error. | |||
| p.AllowAttrs( | |||
| "type", | |||
| ).Matching( | |||
| regexp.MustCompile("(?i)^(circle|disc|square|a|A|i|I|1)$"), | |||
| ) | |||
| ``` | |||
| Both examples exhibit the same issue, they declare attributes but do not then specify whether they are allowed globally or only on specific elements (and which elements). Attributes belong to one or more elements, and the policy needs to declare this. | |||
| ## Limitations | |||
| We are not yet including any tools to help allow and sanitize CSS. Which means that unless you wish to do the heavy lifting in a single regular expression (inadvisable), **you should not allow the "style" attribute anywhere**. | |||
| In the same theme, both `<script>` and `<style>` are considered harmful. These elements (and their content) will not be rendered by default, and require you to explicitly set `p.AllowUnsafe(true)`. You should be aware that allowing these elements defeats the purpose of using a HTML sanitizer as you would be explicitly allowing either JavaScript (and any plainly written XSS) and CSS (which can modify a DOM to insert JS), and additionally but limitations in this library mean it is not aware of whether HTML is validly structured and that can allow these elements to bypass some of the safety mechanisms built into the [WhatWG HTML parser standard](https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselect). | |||
| It is not the job of bluemonday to fix your bad HTML, it is merely the job of bluemonday to prevent malicious HTML getting through. If you have mismatched HTML elements, or non-conforming nesting of elements, those will remain. But if you have well-structured HTML bluemonday will not break it. | |||
| ## TODO | |||
| * Investigate whether devs want to blacklist elements and attributes. This would allow devs to take an existing policy (such as the `bluemonday.UGCPolicy()` ) that encapsulates 90% of what they're looking for but does more than they need, and to remove the extra things they do not want to make it 100% what they want | |||
| * Investigate whether devs want a validating HTML mode, in which the HTML elements are not just transformed into a balanced tree (every start tag has a closing tag at the correct depth) but also that elements and character data appear only in their allowed context (i.e. that a `table` element isn't a descendent of a `caption`, that `colgroup`, `thead`, `tbody`, `tfoot` and `tr` are permitted, and that character data is not permitted) | |||
| ## Development | |||
| If you have cloned this repo you will probably need the dependency: | |||
| `go get golang.org/x/net/html` | |||
| Gophers can use their familiar tools: | |||
| `go build` | |||
| `go test` | |||
| I personally use a Makefile as it spares typing the same args over and over whilst providing consistency for those of us who jump from language to language and enjoy just typing `make` in a project directory and watch magic happen. | |||
| `make` will build, vet, test and install the library. | |||
| `make clean` will remove the library from a *single* `${GOPATH}/pkg` directory tree | |||
| `make test` will run the tests | |||
| `make cover` will run the tests and *open a browser window* with the coverage report | |||
| `make lint` will run golint (install via `go get github.com/golang/lint/golint`) | |||
| ## Long term goals | |||
| 1. Open the code to adversarial peer review similar to the [Attack Review Ground Rules](https://code.google.com/p/owasp-java-html-sanitizer/wiki/AttackReviewGroundRules) | |||
| 1. Raise funds and pay for an external security review | |||
| @@ -0,0 +1,15 @@ | |||
| # Security Policy | |||
| ## Supported Versions | |||
| Latest tag and tip are supported. | |||
| Older tags remain present but changes result in new tags and are not back ported... please verify any issue against the latest tag and tip. | |||
| ## Reporting a Vulnerability | |||
| Email: <bluemonday@buro9.com> | |||
| Bluemonday is pure OSS and not maintained by a company. As such there is no bug bounty program but security issues will be taken seriously and resolved as soon as possible. | |||
| The maintainer lives in the United Kingdom and whilst the email is monitored expect a reply or ACK when the maintainer is awake. | |||
| @@ -0,0 +1,104 @@ | |||
| // Copyright (c) 2014, David Kitchen <david@buro9.com> | |||
| // | |||
| // All rights reserved. | |||
| // | |||
| // Redistribution and use in source and binary forms, with or without | |||
| // modification, are permitted provided that the following conditions are met: | |||
| // | |||
| // * Redistributions of source code must retain the above copyright notice, this | |||
| // list of conditions and the following disclaimer. | |||
| // | |||
| // * Redistributions in binary form must reproduce the above copyright notice, | |||
| // this list of conditions and the following disclaimer in the documentation | |||
| // and/or other materials provided with the distribution. | |||
| // | |||
| // * Neither the name of the organisation (Microcosm) nor the names of its | |||
| // contributors may be used to endorse or promote products derived from | |||
| // this software without specific prior written permission. | |||
| // | |||
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |||
| // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| /* | |||
| Package bluemonday provides a way of describing an allowlist of HTML elements | |||
| and attributes as a policy, and for that policy to be applied to untrusted | |||
| strings from users that may contain markup. All elements and attributes not on | |||
| the allowlist will be stripped. | |||
| The default bluemonday.UGCPolicy().Sanitize() turns this: | |||
| Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World | |||
| Into the more harmless: | |||
| Hello World | |||
| And it turns this: | |||
| <a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a> | |||
| Into this: | |||
| XSS | |||
| Whilst still allowing this: | |||
| <a href="http://www.google.com/"> | |||
| <img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/> | |||
| </a> | |||
| To pass through mostly unaltered (it gained a rel="nofollow"): | |||
| <a href="http://www.google.com/" rel="nofollow"> | |||
| <img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/> | |||
| </a> | |||
| The primary purpose of bluemonday is to take potentially unsafe user generated | |||
| content (from things like Markdown, HTML WYSIWYG tools, etc) and make it safe | |||
| for you to put on your website. | |||
| It protects sites against XSS (http://en.wikipedia.org/wiki/Cross-site_scripting) | |||
| and other malicious content that a user interface may deliver. There are many | |||
| vectors for an XSS attack (https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet) | |||
| and the safest thing to do is to sanitize user input against a known safe list | |||
| of HTML elements and attributes. | |||
| Note: You should always run bluemonday after any other processing. | |||
| If you use blackfriday (https://github.com/russross/blackfriday) or | |||
| Pandoc (http://johnmacfarlane.net/pandoc/) then bluemonday should be run after | |||
| these steps. This ensures that no insecure HTML is introduced later in your | |||
| process. | |||
| bluemonday is heavily inspired by both the OWASP Java HTML Sanitizer | |||
| (https://code.google.com/p/owasp-java-html-sanitizer/) and the HTML Purifier | |||
| (http://htmlpurifier.org/). | |||
| We ship two default policies, one is bluemonday.StrictPolicy() and can be | |||
| thought of as equivalent to stripping all HTML elements and their attributes as | |||
| it has nothing on its allowlist. | |||
| The other is bluemonday.UGCPolicy() and allows a broad selection of HTML | |||
| elements and attributes that are safe for user generated content. Note that | |||
| this policy does not allow iframes, object, embed, styles, script, etc. | |||
| The essence of building a policy is to determine which HTML elements and | |||
| attributes are considered safe for your scenario. OWASP provide an XSS | |||
| prevention cheat sheet ( https://www.google.com/search?q=xss+prevention+cheat+sheet ) | |||
| to help explain the risks, but essentially: | |||
| 1. Avoid allowing anything other than plain HTML elements | |||
| 2. Avoid allowing `script`, `style`, `iframe`, `object`, `embed`, `base` | |||
| elements | |||
| 3. Avoid allowing anything other than plain HTML elements with simple | |||
| values that you can match to a regexp | |||
| */ | |||
| package bluemonday | |||
| @@ -0,0 +1,9 @@ | |||
| module github.com/microcosm-cc/bluemonday | |||
| go 1.16 | |||
| require ( | |||
| github.com/aymerick/douceur v0.2.0 | |||
| github.com/gorilla/css v1.0.0 // indirect | |||
| golang.org/x/net v0.0.0-20210614182718-04defd469f4e | |||
| ) | |||
| @@ -0,0 +1,11 @@ | |||
| github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= | |||
| github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= | |||
| github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= | |||
| github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= | |||
| golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q= | |||
| golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= | |||
| golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
| golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= | |||
| golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= | |||
| golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= | |||
| @@ -0,0 +1,303 @@ | |||
| // Copyright (c) 2014, David Kitchen <david@buro9.com> | |||
| // | |||
| // All rights reserved. | |||
| // | |||
| // Redistribution and use in source and binary forms, with or without | |||
| // modification, are permitted provided that the following conditions are met: | |||
| // | |||
| // * Redistributions of source code must retain the above copyright notice, this | |||
| // list of conditions and the following disclaimer. | |||
| // | |||
| // * Redistributions in binary form must reproduce the above copyright notice, | |||
| // this list of conditions and the following disclaimer in the documentation | |||
| // and/or other materials provided with the distribution. | |||
| // | |||
| // * Neither the name of the organisation (Microcosm) nor the names of its | |||
| // contributors may be used to endorse or promote products derived from | |||
| // this software without specific prior written permission. | |||
| // | |||
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |||
| // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| package bluemonday | |||
| import ( | |||
| "encoding/base64" | |||
| "net/url" | |||
| "regexp" | |||
| ) | |||
| // A selection of regular expressions that can be used as .Matching() rules on | |||
| // HTML attributes. | |||
| var ( | |||
| // CellAlign handles the `align` attribute | |||
| // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align | |||
| CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`) | |||
| // CellVerticalAlign handles the `valign` attribute | |||
| // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign | |||
| CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`) | |||
| // Direction handles the `dir` attribute | |||
| // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir | |||
| Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`) | |||
| // ImageAlign handles the `align` attribute on the `image` tag | |||
| // http://www.w3.org/MarkUp/Test/Img/imgtest.html | |||
| ImageAlign = regexp.MustCompile( | |||
| `(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`, | |||
| ) | |||
| // Integer describes whole positive integers (including 0) used in places | |||
| // like td.colspan | |||
| // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan | |||
| Integer = regexp.MustCompile(`^[0-9]+$`) | |||
| // ISO8601 according to the W3 group is only a subset of the ISO8601 | |||
| // standard: http://www.w3.org/TR/NOTE-datetime | |||
| // | |||
| // Used in places like time.datetime | |||
| // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime | |||
| // | |||
| // Matches patterns: | |||
| // Year: | |||
| // YYYY (eg 1997) | |||
| // Year and month: | |||
| // YYYY-MM (eg 1997-07) | |||
| // Complete date: | |||
| // YYYY-MM-DD (eg 1997-07-16) | |||
| // Complete date plus hours and minutes: | |||
| // YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00) | |||
| // Complete date plus hours, minutes and seconds: | |||
| // YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) | |||
| // Complete date plus hours, minutes, seconds and a decimal fraction of a | |||
| // second | |||
| // YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00) | |||
| ISO8601 = regexp.MustCompile( | |||
| `^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` + | |||
| `?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`, | |||
| ) | |||
| // ListType encapsulates the common value as well as the latest spec | |||
| // values for lists | |||
| // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type | |||
| ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`) | |||
| // SpaceSeparatedTokens is used in places like `a.rel` and the common attribute | |||
| // `class` which both contain space delimited lists of data tokens | |||
| // http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def | |||
| // Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers | |||
| SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`) | |||
| // Number is a double value used on HTML5 meter and progress elements | |||
| // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element | |||
| Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`) | |||
| // NumberOrPercent is used predominantly as units of measurement in width | |||
| // and height attributes | |||
| // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height | |||
| NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`) | |||
| // Paragraph of text in an attribute such as *.'title', img.alt, etc | |||
| // https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title | |||
| // Note that we are not allowing chars that could close tags like '>' | |||
| Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`) | |||
| // dataURIImagePrefix is used by AllowDataURIImages to define the acceptable | |||
| // prefix of data URIs that contain common web image formats. | |||
| // | |||
| // This is not exported as it's not useful by itself, and only has value | |||
| // within the AllowDataURIImages func | |||
| dataURIImagePrefix = regexp.MustCompile( | |||
| `^image/(gif|jpeg|png|webp);base64,`, | |||
| ) | |||
| ) | |||
| // AllowStandardURLs is a convenience function that will enable rel="nofollow" | |||
| // on "a", "area" and "link" (if you have allowed those elements) and will | |||
| // ensure that the URL values are parseable and either relative or belong to the | |||
| // "mailto", "http", or "https" schemes | |||
| func (p *Policy) AllowStandardURLs() { | |||
| // URLs must be parseable by net/url.Parse() | |||
| p.RequireParseableURLs(true) | |||
| // !url.IsAbs() is permitted | |||
| p.AllowRelativeURLs(true) | |||
| // Most common URL schemes only | |||
| p.AllowURLSchemes("mailto", "http", "https") | |||
| // For linking elements we will add rel="nofollow" if it does not already exist | |||
| // This applies to "a" "area" "link" | |||
| p.RequireNoFollowOnLinks(true) | |||
| } | |||
| // AllowStandardAttributes will enable "id", "title" and the language specific | |||
| // attributes "dir" and "lang" on all elements that are allowed | |||
| func (p *Policy) AllowStandardAttributes() { | |||
| // "dir" "lang" are permitted as both language attributes affect charsets | |||
| // and direction of text. | |||
| p.AllowAttrs("dir").Matching(Direction).Globally() | |||
| p.AllowAttrs( | |||
| "lang", | |||
| ).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally() | |||
| // "id" is permitted. This is pretty much as some HTML elements require this | |||
| // to work well ("dfn" is an example of a "id" being value) | |||
| // This does create a risk that JavaScript and CSS within your web page | |||
| // might identify the wrong elements. Ensure that you select things | |||
| // accurately | |||
| p.AllowAttrs("id").Matching( | |||
| regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`), | |||
| ).Globally() | |||
| // "title" is permitted as it improves accessibility. | |||
| p.AllowAttrs("title").Matching(Paragraph).Globally() | |||
| } | |||
| // AllowStyling presently enables the class attribute globally. | |||
| // | |||
| // Note: When bluemonday ships a CSS parser and we can safely sanitise that, | |||
| // this will also allow sanitized styling of elements via the style attribute. | |||
| func (p *Policy) AllowStyling() { | |||
| // "class" is permitted globally | |||
| p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally() | |||
| } | |||
| // AllowImages enables the img element and some popular attributes. It will also | |||
| // ensure that URL values are parseable. This helper does not enable data URI | |||
| // images, for that you should also use the AllowDataURIImages() helper. | |||
| func (p *Policy) AllowImages() { | |||
| // "img" is permitted | |||
| p.AllowAttrs("align").Matching(ImageAlign).OnElements("img") | |||
| p.AllowAttrs("alt").Matching(Paragraph).OnElements("img") | |||
| p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img") | |||
| // Standard URLs enabled | |||
| p.AllowStandardURLs() | |||
| p.AllowAttrs("src").OnElements("img") | |||
| } | |||
| // AllowDataURIImages permits the use of inline images defined in RFC2397 | |||
| // http://tools.ietf.org/html/rfc2397 | |||
| // http://en.wikipedia.org/wiki/Data_URI_scheme | |||
| // | |||
| // Images must have a mimetype matching: | |||
| // image/gif | |||
| // image/jpeg | |||
| // image/png | |||
| // image/webp | |||
| // | |||
| // NOTE: There is a potential security risk to allowing data URIs and you should | |||
| // only permit them on content you already trust. | |||
| // http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/ | |||
| // https://capec.mitre.org/data/definitions/244.html | |||
| func (p *Policy) AllowDataURIImages() { | |||
| // URLs must be parseable by net/url.Parse() | |||
| p.RequireParseableURLs(true) | |||
| // Supply a function to validate images contained within data URI | |||
| p.AllowURLSchemeWithCustomPolicy( | |||
| "data", | |||
| func(url *url.URL) (allowUrl bool) { | |||
| if url.RawQuery != "" || url.Fragment != "" { | |||
| return false | |||
| } | |||
| matched := dataURIImagePrefix.FindString(url.Opaque) | |||
| if matched == "" { | |||
| return false | |||
| } | |||
| _, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):]) | |||
| if err != nil { | |||
| return false | |||
| } | |||
| return true | |||
| }, | |||
| ) | |||
| } | |||
| // AllowLists will enabled ordered and unordered lists, as well as definition | |||
| // lists | |||
| func (p *Policy) AllowLists() { | |||
| // "ol" "ul" are permitted | |||
| p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul") | |||
| // "li" is permitted | |||
| p.AllowAttrs("type").Matching(ListType).OnElements("li") | |||
| p.AllowAttrs("value").Matching(Integer).OnElements("li") | |||
| // "dl" "dt" "dd" are permitted | |||
| p.AllowElements("dl", "dt", "dd") | |||
| } | |||
| // AllowTables will enable a rich set of elements and attributes to describe | |||
| // HTML tables | |||
| func (p *Policy) AllowTables() { | |||
| // "table" is permitted | |||
| p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table") | |||
| p.AllowAttrs("summary").Matching(Paragraph).OnElements("table") | |||
| // "caption" is permitted | |||
| p.AllowElements("caption") | |||
| // "col" "colgroup" are permitted | |||
| p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup") | |||
| p.AllowAttrs("height", "width").Matching( | |||
| NumberOrPercent, | |||
| ).OnElements("col", "colgroup") | |||
| p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col") | |||
| p.AllowAttrs("valign").Matching( | |||
| CellVerticalAlign, | |||
| ).OnElements("col", "colgroup") | |||
| // "thead" "tr" are permitted | |||
| p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr") | |||
| p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr") | |||
| // "td" "th" are permitted | |||
| p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th") | |||
| p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th") | |||
| p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th") | |||
| p.AllowAttrs("headers").Matching( | |||
| SpaceSeparatedTokens, | |||
| ).OnElements("td", "th") | |||
| p.AllowAttrs("height", "width").Matching( | |||
| NumberOrPercent, | |||
| ).OnElements("td", "th") | |||
| p.AllowAttrs( | |||
| "scope", | |||
| ).Matching( | |||
| regexp.MustCompile(`(?i)(?:row|col)(?:group)?`), | |||
| ).OnElements("td", "th") | |||
| p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th") | |||
| p.AllowAttrs("nowrap").Matching( | |||
| regexp.MustCompile(`(?i)|nowrap`), | |||
| ).OnElements("td", "th") | |||
| // "tbody" "tfoot" | |||
| p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot") | |||
| p.AllowAttrs("valign").Matching( | |||
| CellVerticalAlign, | |||
| ).OnElements("tbody", "tfoot") | |||
| } | |||
| func (p *Policy) AllowIFrames(vals ...SandboxValue) { | |||
| p.AllowAttrs("sandbox").OnElements("iframe") | |||
| p.RequireSandboxOnIFrame(vals...) | |||
| } | |||
| @@ -0,0 +1,253 @@ | |||
| // Copyright (c) 2014, David Kitchen <david@buro9.com> | |||
| // | |||
| // All rights reserved. | |||
| // | |||
| // Redistribution and use in source and binary forms, with or without | |||
| // modification, are permitted provided that the following conditions are met: | |||
| // | |||
| // * Redistributions of source code must retain the above copyright notice, this | |||
| // list of conditions and the following disclaimer. | |||
| // | |||
| // * Redistributions in binary form must reproduce the above copyright notice, | |||
| // this list of conditions and the following disclaimer in the documentation | |||
| // and/or other materials provided with the distribution. | |||
| // | |||
| // * Neither the name of the organisation (Microcosm) nor the names of its | |||
| // contributors may be used to endorse or promote products derived from | |||
| // this software without specific prior written permission. | |||
| // | |||
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |||
| // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| package bluemonday | |||
| import ( | |||
| "regexp" | |||
| ) | |||
| // StrictPolicy returns an empty policy, which will effectively strip all HTML | |||
| // elements and their attributes from a document. | |||
| func StrictPolicy() *Policy { | |||
| return NewPolicy() | |||
| } | |||
| // StripTagsPolicy is DEPRECATED. Use StrictPolicy instead. | |||
| func StripTagsPolicy() *Policy { | |||
| return StrictPolicy() | |||
| } | |||
| // UGCPolicy returns a policy aimed at user generated content that is a result | |||
| // of HTML WYSIWYG tools and Markdown conversions. | |||
| // | |||
| // This is expected to be a fairly rich document where as much markup as | |||
| // possible should be retained. Markdown permits raw HTML so we are basically | |||
| // providing a policy to sanitise HTML5 documents safely but with the | |||
| // least intrusion on the formatting expectations of the user. | |||
| func UGCPolicy() *Policy { | |||
| p := NewPolicy() | |||
| /////////////////////// | |||
| // Global attributes // | |||
| /////////////////////// | |||
| // "class" is not permitted as we are not allowing users to style their own | |||
| // content | |||
| p.AllowStandardAttributes() | |||
| ////////////////////////////// | |||
| // Global URL format policy // | |||
| ////////////////////////////// | |||
| p.AllowStandardURLs() | |||
| //////////////////////////////// | |||
| // Declarations and structure // | |||
| //////////////////////////////// | |||
| // "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are | |||
| // expecting user generated content to be a fragment of HTML and not a full | |||
| // document. | |||
| ////////////////////////// | |||
| // Sectioning root tags // | |||
| ////////////////////////// | |||
| // "article" and "aside" are permitted and takes no attributes | |||
| p.AllowElements("article", "aside") | |||
| // "body" is not permitted as we are expecting user generated content to be a fragment | |||
| // of HTML and not a full document. | |||
| // "details" is permitted, including the "open" attribute which can either | |||
| // be blank or the value "open". | |||
| p.AllowAttrs( | |||
| "open", | |||
| ).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details") | |||
| // "fieldset" is not permitted as we are not allowing forms to be created. | |||
| // "figure" is permitted and takes no attributes | |||
| p.AllowElements("figure") | |||
| // "nav" is not permitted as it is assumed that the site (and not the user) | |||
| // has defined navigation elements | |||
| // "section" is permitted and takes no attributes | |||
| p.AllowElements("section") | |||
| // "summary" is permitted and takes no attributes | |||
| p.AllowElements("summary") | |||
| ////////////////////////// | |||
| // Headings and footers // | |||
| ////////////////////////// | |||
| // "footer" is not permitted as we expect user content to be a fragment and | |||
| // not structural to this extent | |||
| // "h1" through "h6" are permitted and take no attributes | |||
| p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6") | |||
| // "header" is not permitted as we expect user content to be a fragment and | |||
| // not structural to this extent | |||
| // "hgroup" is permitted and takes no attributes | |||
| p.AllowElements("hgroup") | |||
| ///////////////////////////////////// | |||
| // Content grouping and separating // | |||
| ///////////////////////////////////// | |||
| // "blockquote" is permitted, including the "cite" attribute which must be | |||
| // a standard URL. | |||
| p.AllowAttrs("cite").OnElements("blockquote") | |||
| // "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes | |||
| p.AllowElements("br", "div", "hr", "p", "span", "wbr") | |||
| /////////// | |||
| // Links // | |||
| /////////// | |||
| // "a" is permitted | |||
| p.AllowAttrs("href").OnElements("a") | |||
| // "area" is permitted along with the attributes that map image maps work | |||
| p.AllowAttrs("name").Matching( | |||
| regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`), | |||
| ).OnElements("map") | |||
| p.AllowAttrs("alt").Matching(Paragraph).OnElements("area") | |||
| p.AllowAttrs("coords").Matching( | |||
| regexp.MustCompile(`^([0-9]+,)+[0-9]+$`), | |||
| ).OnElements("area") | |||
| p.AllowAttrs("href").OnElements("area") | |||
| p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area") | |||
| p.AllowAttrs("shape").Matching( | |||
| regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`), | |||
| ).OnElements("area") | |||
| p.AllowAttrs("usemap").Matching( | |||
| regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`), | |||
| ).OnElements("img") | |||
| // "link" is not permitted | |||
| ///////////////////// | |||
| // Phrase elements // | |||
| ///////////////////// | |||
| // The following are all inline phrasing elements | |||
| p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em", | |||
| "figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var") | |||
| // "q" is permitted and "cite" is a URL and handled by URL policies | |||
| p.AllowAttrs("cite").OnElements("q") | |||
| // "time" is permitted | |||
| p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time") | |||
| //////////////////// | |||
| // Style elements // | |||
| //////////////////// | |||
| // block and inline elements that impart no semantic meaning but style the | |||
| // document | |||
| p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u") | |||
| // "style" is not permitted as we are not yet sanitising CSS and it is an | |||
| // XSS attack vector | |||
| ////////////////////// | |||
| // HTML5 Formatting // | |||
| ////////////////////// | |||
| // "bdi" "bdo" are permitted | |||
| p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo") | |||
| // "rp" "rt" "ruby" are permitted | |||
| p.AllowElements("rp", "rt", "ruby") | |||
| /////////////////////////// | |||
| // HTML5 Change tracking // | |||
| /////////////////////////// | |||
| // "del" "ins" are permitted | |||
| p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins") | |||
| p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins") | |||
| /////////// | |||
| // Lists // | |||
| /////////// | |||
| p.AllowLists() | |||
| //////////// | |||
| // Tables // | |||
| //////////// | |||
| p.AllowTables() | |||
| /////////// | |||
| // Forms // | |||
| /////////// | |||
| // By and large, forms are not permitted. However there are some form | |||
| // elements that can be used to present data, and we do permit those | |||
| // | |||
| // "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist" | |||
| // "textarea" "optgroup" "option" are all not permitted | |||
| // "meter" is permitted | |||
| p.AllowAttrs( | |||
| "value", | |||
| "min", | |||
| "max", | |||
| "low", | |||
| "high", | |||
| "optimum", | |||
| ).Matching(Number).OnElements("meter") | |||
| // "progress" is permitted | |||
| p.AllowAttrs("value", "max").Matching(Number).OnElements("progress") | |||
| ////////////////////// | |||
| // Embedded content // | |||
| ////////////////////// | |||
| // Vast majority not permitted | |||
| // "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track" | |||
| // "video" are all not permitted | |||
| p.AllowImages() | |||
| return p | |||
| } | |||
| @@ -0,0 +1,939 @@ | |||
| // Copyright (c) 2014, David Kitchen <david@buro9.com> | |||
| // | |||
| // All rights reserved. | |||
| // | |||
| // Redistribution and use in source and binary forms, with or without | |||
| // modification, are permitted provided that the following conditions are met: | |||
| // | |||
| // * Redistributions of source code must retain the above copyright notice, this | |||
| // list of conditions and the following disclaimer. | |||
| // | |||
| // * Redistributions in binary form must reproduce the above copyright notice, | |||
| // this list of conditions and the following disclaimer in the documentation | |||
| // and/or other materials provided with the distribution. | |||
| // | |||
| // * Neither the name of the organisation (Microcosm) nor the names of its | |||
| // contributors may be used to endorse or promote products derived from | |||
| // this software without specific prior written permission. | |||
| // | |||
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |||
| // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| package bluemonday | |||
| //TODO sgutzwiller create map of styles to default handlers | |||
| //TODO sgutzwiller create handlers for various attributes | |||
| import ( | |||
| "net/url" | |||
| "regexp" | |||
| "strings" | |||
| "github.com/microcosm-cc/bluemonday/css" | |||
| ) | |||
| // Policy encapsulates the allowlist of HTML elements and attributes that will | |||
| // be applied to the sanitised HTML. | |||
| // | |||
| // You should use bluemonday.NewPolicy() to create a blank policy as the | |||
| // unexported fields contain maps that need to be initialized. | |||
| type Policy struct { | |||
| // Declares whether the maps have been initialized, used as a cheap check to | |||
| // ensure that those using Policy{} directly won't cause nil pointer | |||
| // exceptions | |||
| initialized bool | |||
| // If true then we add spaces when stripping tags, specifically the closing | |||
| // tag is replaced by a space character. | |||
| addSpaces bool | |||
| // When true, add rel="nofollow" to HTML a, area, and link tags | |||
| requireNoFollow bool | |||
| // When true, add rel="nofollow" to HTML a, area, and link tags | |||
| // Will add for href="http://foo" | |||
| // Will skip for href="/foo" or href="foo" | |||
| requireNoFollowFullyQualifiedLinks bool | |||
| // When true, add rel="noreferrer" to HTML a, area, and link tags | |||
| requireNoReferrer bool | |||
| // When true, add rel="noreferrer" to HTML a, area, and link tags | |||
| // Will add for href="http://foo" | |||
| // Will skip for href="/foo" or href="foo" | |||
| requireNoReferrerFullyQualifiedLinks bool | |||
| // When true, add crossorigin="anonymous" to HTML audio, img, link, script, and video tags | |||
| requireCrossOriginAnonymous bool | |||
| // When true, add and filter sandbox attribute on iframe tags | |||
| requireSandboxOnIFrame map[string]bool | |||
| // When true add target="_blank" to fully qualified links | |||
| // Will add for href="http://foo" | |||
| // Will skip for href="/foo" or href="foo" | |||
| addTargetBlankToFullyQualifiedLinks bool | |||
| // When true, URLs must be parseable by "net/url" url.Parse() | |||
| requireParseableURLs bool | |||
| // When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted | |||
| allowRelativeURLs bool | |||
| // When true, allow data attributes. | |||
| allowDataAttributes bool | |||
| // When true, allow comments. | |||
| allowComments bool | |||
| // map[htmlElementName]map[htmlAttributeName][]attrPolicy | |||
| elsAndAttrs map[string]map[string][]attrPolicy | |||
| // elsMatchingAndAttrs stores regex based element matches along with attributes | |||
| elsMatchingAndAttrs map[*regexp.Regexp]map[string][]attrPolicy | |||
| // map[htmlAttributeName][]attrPolicy | |||
| globalAttrs map[string][]attrPolicy | |||
| // map[htmlElementName]map[cssPropertyName][]stylePolicy | |||
| elsAndStyles map[string]map[string][]stylePolicy | |||
| // map[regex]map[cssPropertyName][]stylePolicy | |||
| elsMatchingAndStyles map[*regexp.Regexp]map[string][]stylePolicy | |||
| // map[cssPropertyName][]stylePolicy | |||
| globalStyles map[string][]stylePolicy | |||
| // If urlPolicy is nil, all URLs with matching schema are allowed. | |||
| // Otherwise, only the URLs with matching schema and urlPolicy(url) | |||
| // returning true are allowed. | |||
| allowURLSchemes map[string][]urlPolicy | |||
| // If an element has had all attributes removed as a result of a policy | |||
| // being applied, then the element would be removed from the output. | |||
| // | |||
| // However some elements are valid and have strong layout meaning without | |||
| // any attributes, i.e. <table>. To prevent those being removed we maintain | |||
| // a list of elements that are allowed to have no attributes and that will | |||
| // be maintained in the output HTML. | |||
| setOfElementsAllowedWithoutAttrs map[string]struct{} | |||
| // If an element has had all attributes removed as a result of a policy | |||
| // being applied, then the element would be removed from the output. | |||
| // | |||
| // However some elements are valid and have strong layout meaning without | |||
| // any attributes, i.e. <table>. | |||
| // | |||
| // In this case, any element matching a regular expression will be accepted without | |||
| // attributes added. | |||
| setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp | |||
| setOfElementsToSkipContent map[string]struct{} | |||
| // Permits fundamentally unsafe elements. | |||
| // | |||
| // If false (default) then elements such as `style` and `script` will not be | |||
| // permitted even if declared in a policy. These elements when combined with | |||
| // untrusted input cannot be safely handled by bluemonday at this point in | |||
| // time. | |||
| // | |||
| // If true then `style` and `script` would be permitted by bluemonday if a | |||
| // policy declares them. However this is not recommended under any circumstance | |||
| // and can lead to XSS being rendered thus defeating the purpose of using a | |||
| // HTML sanitizer. | |||
| allowUnsafe bool | |||
| } | |||
| type attrPolicy struct { | |||
| // optional pattern to match, when not nil the regexp needs to match | |||
| // otherwise the attribute is removed | |||
| regexp *regexp.Regexp | |||
| } | |||
| type stylePolicy struct { | |||
| // handler to validate | |||
| handler func(string) bool | |||
| // optional pattern to match, when not nil the regexp needs to match | |||
| // otherwise the property is removed | |||
| regexp *regexp.Regexp | |||
| // optional list of allowed property values, for properties which | |||
| // have a defined list of allowed values; property will be removed | |||
| // if the value is not allowed | |||
| enum []string | |||
| } | |||
| type attrPolicyBuilder struct { | |||
| p *Policy | |||
| attrNames []string | |||
| regexp *regexp.Regexp | |||
| allowEmpty bool | |||
| } | |||
| type stylePolicyBuilder struct { | |||
| p *Policy | |||
| propertyNames []string | |||
| regexp *regexp.Regexp | |||
| enum []string | |||
| handler func(string) bool | |||
| } | |||
| type urlPolicy func(url *url.URL) (allowUrl bool) | |||
| type SandboxValue int64 | |||
| const ( | |||
| SandboxAllowDownloads SandboxValue = iota | |||
| SandboxAllowDownloadsWithoutUserActivation | |||
| SandboxAllowForms | |||
| SandboxAllowModals | |||
| SandboxAllowOrientationLock | |||
| SandboxAllowPointerLock | |||
| SandboxAllowPopups | |||
| SandboxAllowPopupsToEscapeSandbox | |||
| SandboxAllowPresentation | |||
| SandboxAllowSameOrigin | |||
| SandboxAllowScripts | |||
| SandboxAllowStorageAccessByUserActivation | |||
| SandboxAllowTopNavigation | |||
| SandboxAllowTopNavigationByUserActivation | |||
| ) | |||
| // init initializes the maps if this has not been done already | |||
| func (p *Policy) init() { | |||
| if !p.initialized { | |||
| p.elsAndAttrs = make(map[string]map[string][]attrPolicy) | |||
| p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string][]attrPolicy) | |||
| p.globalAttrs = make(map[string][]attrPolicy) | |||
| p.elsAndStyles = make(map[string]map[string][]stylePolicy) | |||
| p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string][]stylePolicy) | |||
| p.globalStyles = make(map[string][]stylePolicy) | |||
| p.allowURLSchemes = make(map[string][]urlPolicy) | |||
| p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{}) | |||
| p.setOfElementsToSkipContent = make(map[string]struct{}) | |||
| p.initialized = true | |||
| } | |||
| } | |||
| // NewPolicy returns a blank policy with nothing allowed or permitted. This | |||
| // is the recommended way to start building a policy and you should now use | |||
| // AllowAttrs() and/or AllowElements() to construct the allowlist of HTML | |||
| // elements and attributes. | |||
| func NewPolicy() *Policy { | |||
| p := Policy{} | |||
| p.addDefaultElementsWithoutAttrs() | |||
| p.addDefaultSkipElementContent() | |||
| return &p | |||
| } | |||
| // AllowAttrs takes a range of HTML attribute names and returns an | |||
| // attribute policy builder that allows you to specify the pattern and scope of | |||
| // the allowed attribute. | |||
| // | |||
| // The attribute policy is only added to the core policy when either Globally() | |||
| // or OnElements(...) are called. | |||
| func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder { | |||
| p.init() | |||
| abp := attrPolicyBuilder{ | |||
| p: p, | |||
| allowEmpty: false, | |||
| } | |||
| for _, attrName := range attrNames { | |||
| abp.attrNames = append(abp.attrNames, strings.ToLower(attrName)) | |||
| } | |||
| return &abp | |||
| } | |||
| // AllowDataAttributes permits all data attributes. We can't specify the name | |||
| // of each attribute exactly as they are customized. | |||
| // | |||
| // NOTE: These values are not sanitized and applications that evaluate or process | |||
| // them without checking and verification of the input may be at risk if this option | |||
| // is enabled. This is a 'caveat emptor' option and the person enabling this option | |||
| // needs to fully understand the potential impact with regards to whatever application | |||
| // will be consuming the sanitized HTML afterwards, i.e. if you know you put a link in a | |||
| // data attribute and use that to automatically load some new window then you're giving | |||
| // the author of a HTML fragment the means to open a malicious destination automatically. | |||
| // Use with care! | |||
| func (p *Policy) AllowDataAttributes() { | |||
| p.allowDataAttributes = true | |||
| } | |||
| // AllowComments allows comments. | |||
| // | |||
| // Please note that only one type of comment will be allowed by this, this is the | |||
| // the standard HTML comment <!-- --> which includes the use of that to permit | |||
| // conditionals as per https://docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/compatibility/ms537512(v=vs.85)?redirectedfrom=MSDN | |||
| // | |||
| // What is not permitted are CDATA XML comments, as the x/net/html package we depend | |||
| // on does not handle this fully and we are not choosing to take on that work: | |||
| // https://pkg.go.dev/golang.org/x/net/html#Tokenizer.AllowCDATA . If the x/net/html | |||
| // package changes this then these will be considered, otherwise if you AllowComments | |||
| // but provide a CDATA comment, then as per the documentation in x/net/html this will | |||
| // be treated as a plain HTML comment. | |||
| func (p *Policy) AllowComments() { | |||
| p.allowComments = true | |||
| } | |||
| // AllowNoAttrs says that attributes on element are optional. | |||
| // | |||
| // The attribute policy is only added to the core policy when OnElements(...) | |||
| // are called. | |||
| func (p *Policy) AllowNoAttrs() *attrPolicyBuilder { | |||
| p.init() | |||
| abp := attrPolicyBuilder{ | |||
| p: p, | |||
| allowEmpty: true, | |||
| } | |||
| return &abp | |||
| } | |||
| // AllowNoAttrs says that attributes on element are optional. | |||
| // | |||
| // The attribute policy is only added to the core policy when OnElements(...) | |||
| // are called. | |||
| func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder { | |||
| abp.allowEmpty = true | |||
| return abp | |||
| } | |||
| // Matching allows a regular expression to be applied to a nascent attribute | |||
| // policy, and returns the attribute policy. | |||
| func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder { | |||
| abp.regexp = regex | |||
| return abp | |||
| } | |||
| // OnElements will bind an attribute policy to a given range of HTML elements | |||
| // and return the updated policy | |||
| func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy { | |||
| for _, element := range elements { | |||
| element = strings.ToLower(element) | |||
| for _, attr := range abp.attrNames { | |||
| if _, ok := abp.p.elsAndAttrs[element]; !ok { | |||
| abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy) | |||
| } | |||
| ap := attrPolicy{} | |||
| if abp.regexp != nil { | |||
| ap.regexp = abp.regexp | |||
| } | |||
| abp.p.elsAndAttrs[element][attr] = append(abp.p.elsAndAttrs[element][attr], ap) | |||
| } | |||
| if abp.allowEmpty { | |||
| abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{} | |||
| if _, ok := abp.p.elsAndAttrs[element]; !ok { | |||
| abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy) | |||
| } | |||
| } | |||
| } | |||
| return abp.p | |||
| } | |||
| // OnElementsMatching will bind an attribute policy to all elements matching a given regex | |||
| // and return the updated policy | |||
| func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy { | |||
| for _, attr := range abp.attrNames { | |||
| if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok { | |||
| abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy) | |||
| } | |||
| ap := attrPolicy{} | |||
| if abp.regexp != nil { | |||
| ap.regexp = abp.regexp | |||
| } | |||
| abp.p.elsMatchingAndAttrs[regex][attr] = append(abp.p.elsMatchingAndAttrs[regex][attr], ap) | |||
| } | |||
| if abp.allowEmpty { | |||
| abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex) | |||
| if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok { | |||
| abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy) | |||
| } | |||
| } | |||
| return abp.p | |||
| } | |||
| // Globally will bind an attribute policy to all HTML elements and return the | |||
| // updated policy | |||
| func (abp *attrPolicyBuilder) Globally() *Policy { | |||
| for _, attr := range abp.attrNames { | |||
| if _, ok := abp.p.globalAttrs[attr]; !ok { | |||
| abp.p.globalAttrs[attr] = []attrPolicy{} | |||
| } | |||
| ap := attrPolicy{} | |||
| if abp.regexp != nil { | |||
| ap.regexp = abp.regexp | |||
| } | |||
| abp.p.globalAttrs[attr] = append(abp.p.globalAttrs[attr], ap) | |||
| } | |||
| return abp.p | |||
| } | |||
| // AllowStyles takes a range of CSS property names and returns a | |||
| // style policy builder that allows you to specify the pattern and scope of | |||
| // the allowed property. | |||
| // | |||
| // The style policy is only added to the core policy when either Globally() | |||
| // or OnElements(...) are called. | |||
| func (p *Policy) AllowStyles(propertyNames ...string) *stylePolicyBuilder { | |||
| p.init() | |||
| abp := stylePolicyBuilder{ | |||
| p: p, | |||
| } | |||
| for _, propertyName := range propertyNames { | |||
| abp.propertyNames = append(abp.propertyNames, strings.ToLower(propertyName)) | |||
| } | |||
| return &abp | |||
| } | |||
| // Matching allows a regular expression to be applied to a nascent style | |||
| // policy, and returns the style policy. | |||
| func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilder { | |||
| spb.regexp = regex | |||
| return spb | |||
| } | |||
| // MatchingEnum allows a list of allowed values to be applied to a nascent style | |||
| // policy, and returns the style policy. | |||
| func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder { | |||
| spb.enum = enum | |||
| return spb | |||
| } | |||
| // MatchingHandler allows a handler to be applied to a nascent style | |||
| // policy, and returns the style policy. | |||
| func (spb *stylePolicyBuilder) MatchingHandler(handler func(string) bool) *stylePolicyBuilder { | |||
| spb.handler = handler | |||
| return spb | |||
| } | |||
| // OnElements will bind a style policy to a given range of HTML elements | |||
| // and return the updated policy | |||
| func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy { | |||
| for _, element := range elements { | |||
| element = strings.ToLower(element) | |||
| for _, attr := range spb.propertyNames { | |||
| if _, ok := spb.p.elsAndStyles[element]; !ok { | |||
| spb.p.elsAndStyles[element] = make(map[string][]stylePolicy) | |||
| } | |||
| sp := stylePolicy{} | |||
| if spb.handler != nil { | |||
| sp.handler = spb.handler | |||
| } else if len(spb.enum) > 0 { | |||
| sp.enum = spb.enum | |||
| } else if spb.regexp != nil { | |||
| sp.regexp = spb.regexp | |||
| } else { | |||
| sp.handler = css.GetDefaultHandler(attr) | |||
| } | |||
| spb.p.elsAndStyles[element][attr] = append(spb.p.elsAndStyles[element][attr], sp) | |||
| } | |||
| } | |||
| return spb.p | |||
| } | |||
| // OnElementsMatching will bind a style policy to any HTML elements matching the pattern | |||
| // and return the updated policy | |||
| func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy { | |||
| for _, attr := range spb.propertyNames { | |||
| if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok { | |||
| spb.p.elsMatchingAndStyles[regex] = make(map[string][]stylePolicy) | |||
| } | |||
| sp := stylePolicy{} | |||
| if spb.handler != nil { | |||
| sp.handler = spb.handler | |||
| } else if len(spb.enum) > 0 { | |||
| sp.enum = spb.enum | |||
| } else if spb.regexp != nil { | |||
| sp.regexp = spb.regexp | |||
| } else { | |||
| sp.handler = css.GetDefaultHandler(attr) | |||
| } | |||
| spb.p.elsMatchingAndStyles[regex][attr] = append(spb.p.elsMatchingAndStyles[regex][attr], sp) | |||
| } | |||
| return spb.p | |||
| } | |||
| // Globally will bind a style policy to all HTML elements and return the | |||
| // updated policy | |||
| func (spb *stylePolicyBuilder) Globally() *Policy { | |||
| for _, attr := range spb.propertyNames { | |||
| if _, ok := spb.p.globalStyles[attr]; !ok { | |||
| spb.p.globalStyles[attr] = []stylePolicy{} | |||
| } | |||
| // Use only one strategy for validating styles, fallback to default | |||
| sp := stylePolicy{} | |||
| if spb.handler != nil { | |||
| sp.handler = spb.handler | |||
| } else if len(spb.enum) > 0 { | |||
| sp.enum = spb.enum | |||
| } else if spb.regexp != nil { | |||
| sp.regexp = spb.regexp | |||
| } else { | |||
| sp.handler = css.GetDefaultHandler(attr) | |||
| } | |||
| spb.p.globalStyles[attr] = append(spb.p.globalStyles[attr], sp) | |||
| } | |||
| return spb.p | |||
| } | |||
| // AllowElements will append HTML elements to the allowlist without applying an | |||
| // attribute policy to those elements (the elements are permitted | |||
| // sans-attributes) | |||
| func (p *Policy) AllowElements(names ...string) *Policy { | |||
| p.init() | |||
| for _, element := range names { | |||
| element = strings.ToLower(element) | |||
| if _, ok := p.elsAndAttrs[element]; !ok { | |||
| p.elsAndAttrs[element] = make(map[string][]attrPolicy) | |||
| } | |||
| } | |||
| return p | |||
| } | |||
| // AllowElementsMatching will append HTML elements to the allowlist if they | |||
| // match a regexp. | |||
| func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy { | |||
| p.init() | |||
| if _, ok := p.elsMatchingAndAttrs[regex]; !ok { | |||
| p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy) | |||
| } | |||
| return p | |||
| } | |||
| // RequireNoFollowOnLinks will result in all a, area, link tags having a | |||
| // rel="nofollow"added to them if one does not already exist | |||
| // | |||
| // Note: This requires p.RequireParseableURLs(true) and will enable it. | |||
| func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy { | |||
| p.requireNoFollow = require | |||
| p.requireParseableURLs = true | |||
| return p | |||
| } | |||
| // RequireNoFollowOnFullyQualifiedLinks will result in all a, area, and link | |||
| // tags that point to a non-local destination (i.e. starts with a protocol and | |||
| // has a host) having a rel="nofollow" added to them if one does not already | |||
| // exist | |||
| // | |||
| // Note: This requires p.RequireParseableURLs(true) and will enable it. | |||
| func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy { | |||
| p.requireNoFollowFullyQualifiedLinks = require | |||
| p.requireParseableURLs = true | |||
| return p | |||
| } | |||
| // RequireNoReferrerOnLinks will result in all a, area, and link tags having a | |||
| // rel="noreferrrer" added to them if one does not already exist | |||
| // | |||
| // Note: This requires p.RequireParseableURLs(true) and will enable it. | |||
| func (p *Policy) RequireNoReferrerOnLinks(require bool) *Policy { | |||
| p.requireNoReferrer = require | |||
| p.requireParseableURLs = true | |||
| return p | |||
| } | |||
| // RequireNoReferrerOnFullyQualifiedLinks will result in all a, area, and link | |||
| // tags that point to a non-local destination (i.e. starts with a protocol and | |||
| // has a host) having a rel="noreferrer" added to them if one does not already | |||
| // exist | |||
| // | |||
| // Note: This requires p.RequireParseableURLs(true) and will enable it. | |||
| func (p *Policy) RequireNoReferrerOnFullyQualifiedLinks(require bool) *Policy { | |||
| p.requireNoReferrerFullyQualifiedLinks = require | |||
| p.requireParseableURLs = true | |||
| return p | |||
| } | |||
| // RequireCrossOriginAnonymous will result in all audio, img, link, script, and | |||
| // video tags having a crossorigin="anonymous" added to them if one does not | |||
| // already exist | |||
| func (p *Policy) RequireCrossOriginAnonymous(require bool) *Policy { | |||
| p.requireCrossOriginAnonymous = require | |||
| return p | |||
| } | |||
| // AddTargetBlankToFullyQualifiedLinks will result in all a, area and link tags | |||
| // that point to a non-local destination (i.e. starts with a protocol and has a | |||
| // host) having a target="_blank" added to them if one does not already exist | |||
| // | |||
| // Note: This requires p.RequireParseableURLs(true) and will enable it. | |||
| func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy { | |||
| p.addTargetBlankToFullyQualifiedLinks = require | |||
| p.requireParseableURLs = true | |||
| return p | |||
| } | |||
| // RequireParseableURLs will result in all URLs requiring that they be parseable | |||
| // by "net/url" url.Parse() | |||
| // This applies to: | |||
| // - a.href | |||
| // - area.href | |||
| // - blockquote.cite | |||
| // - img.src | |||
| // - link.href | |||
| // - script.src | |||
| func (p *Policy) RequireParseableURLs(require bool) *Policy { | |||
| p.requireParseableURLs = require | |||
| return p | |||
| } | |||
| // AllowRelativeURLs enables RequireParseableURLs and then permits URLs that | |||
| // are parseable, have no schema information and url.IsAbs() returns false | |||
| // This permits local URLs | |||
| func (p *Policy) AllowRelativeURLs(require bool) *Policy { | |||
| p.RequireParseableURLs(true) | |||
| p.allowRelativeURLs = require | |||
| return p | |||
| } | |||
| // AllowURLSchemes will append URL schemes to the allowlist | |||
| // Example: p.AllowURLSchemes("mailto", "http", "https") | |||
| func (p *Policy) AllowURLSchemes(schemes ...string) *Policy { | |||
| p.init() | |||
| p.RequireParseableURLs(true) | |||
| for _, scheme := range schemes { | |||
| scheme = strings.ToLower(scheme) | |||
| // Allow all URLs with matching scheme. | |||
| p.allowURLSchemes[scheme] = nil | |||
| } | |||
| return p | |||
| } | |||
| // AllowURLSchemeWithCustomPolicy will append URL schemes with | |||
| // a custom URL policy to the allowlist. | |||
| // Only the URLs with matching schema and urlPolicy(url) | |||
| // returning true will be allowed. | |||
| func (p *Policy) AllowURLSchemeWithCustomPolicy( | |||
| scheme string, | |||
| urlPolicy func(url *url.URL) (allowUrl bool), | |||
| ) *Policy { | |||
| p.init() | |||
| p.RequireParseableURLs(true) | |||
| scheme = strings.ToLower(scheme) | |||
| p.allowURLSchemes[scheme] = append(p.allowURLSchemes[scheme], urlPolicy) | |||
| return p | |||
| } | |||
| // RequireSandboxOnIFrame will result in all iframe tags having a sandbox="" tag | |||
| // Any sandbox values not specified here will be filtered from the generated HTML | |||
| func (p *Policy) RequireSandboxOnIFrame(vals ...SandboxValue) { | |||
| p.requireSandboxOnIFrame = make(map[string]bool) | |||
| for _, val := range vals { | |||
| switch SandboxValue(val) { | |||
| case SandboxAllowDownloads: | |||
| p.requireSandboxOnIFrame["allow-downloads"] = true | |||
| case SandboxAllowDownloadsWithoutUserActivation: | |||
| p.requireSandboxOnIFrame["allow-downloads-without-user-activation"] = true | |||
| case SandboxAllowForms: | |||
| p.requireSandboxOnIFrame["allow-forms"] = true | |||
| case SandboxAllowModals: | |||
| p.requireSandboxOnIFrame["allow-modals"] = true | |||
| case SandboxAllowOrientationLock: | |||
| p.requireSandboxOnIFrame["allow-orientation-lock"] = true | |||
| case SandboxAllowPointerLock: | |||
| p.requireSandboxOnIFrame["allow-pointer-lock"] = true | |||
| case SandboxAllowPopups: | |||
| p.requireSandboxOnIFrame["allow-popups"] = true | |||
| case SandboxAllowPopupsToEscapeSandbox: | |||
| p.requireSandboxOnIFrame["allow-popups-to-escape-sandbox"] = true | |||
| case SandboxAllowPresentation: | |||
| p.requireSandboxOnIFrame["allow-presentation"] = true | |||
| case SandboxAllowSameOrigin: | |||
| p.requireSandboxOnIFrame["allow-same-origin"] = true | |||
| case SandboxAllowScripts: | |||
| p.requireSandboxOnIFrame["allow-scripts"] = true | |||
| case SandboxAllowStorageAccessByUserActivation: | |||
| p.requireSandboxOnIFrame["allow-storage-access-by-user-activation"] = true | |||
| case SandboxAllowTopNavigation: | |||
| p.requireSandboxOnIFrame["allow-top-navigation"] = true | |||
| case SandboxAllowTopNavigationByUserActivation: | |||
| p.requireSandboxOnIFrame["allow-top-navigation-by-user-activation"] = true | |||
| } | |||
| } | |||
| } | |||
| // AddSpaceWhenStrippingTag states whether to add a single space " " when | |||
| // removing tags that are not allowed by the policy. | |||
| // | |||
| // This is useful if you expect to strip tags in dense markup and may lose the | |||
| // value of whitespace. | |||
| // | |||
| // For example: "<p>Hello</p><p>World</p>"" would be sanitized to "HelloWorld" | |||
| // with the default value of false, but you may wish to sanitize this to | |||
| // " Hello World " by setting AddSpaceWhenStrippingTag to true as this would | |||
| // retain the intent of the text. | |||
| func (p *Policy) AddSpaceWhenStrippingTag(allow bool) *Policy { | |||
| p.addSpaces = allow | |||
| return p | |||
| } | |||
| // SkipElementsContent adds the HTML elements whose tags is needed to be removed | |||
| // with its content. | |||
| func (p *Policy) SkipElementsContent(names ...string) *Policy { | |||
| p.init() | |||
| for _, element := range names { | |||
| element = strings.ToLower(element) | |||
| if _, ok := p.setOfElementsToSkipContent[element]; !ok { | |||
| p.setOfElementsToSkipContent[element] = struct{}{} | |||
| } | |||
| } | |||
| return p | |||
| } | |||
| // AllowElementsContent marks the HTML elements whose content should be | |||
| // retained after removing the tag. | |||
| func (p *Policy) AllowElementsContent(names ...string) *Policy { | |||
| p.init() | |||
| for _, element := range names { | |||
| delete(p.setOfElementsToSkipContent, strings.ToLower(element)) | |||
| } | |||
| return p | |||
| } | |||
| // AllowUnsafe permits fundamentally unsafe elements. | |||
| // | |||
| // If false (default) then elements such as `style` and `script` will not be | |||
| // permitted even if declared in a policy. These elements when combined with | |||
| // untrusted input cannot be safely handled by bluemonday at this point in | |||
| // time. | |||
| // | |||
| // If true then `style` and `script` would be permitted by bluemonday if a | |||
| // policy declares them. However this is not recommended under any circumstance | |||
| // and can lead to XSS being rendered thus defeating the purpose of using a | |||
| // HTML sanitizer. | |||
| func (p *Policy) AllowUnsafe(allowUnsafe bool) *Policy { | |||
| p.init() | |||
| p.allowUnsafe = allowUnsafe | |||
| return p | |||
| } | |||
| // addDefaultElementsWithoutAttrs adds the HTML elements that we know are valid | |||
| // without any attributes to an internal map. | |||
| // i.e. we know that <table> is valid, but <bdo> isn't valid as the "dir" attr | |||
| // is mandatory | |||
| func (p *Policy) addDefaultElementsWithoutAttrs() { | |||
| p.init() | |||
| p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["address"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["b"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["bdi"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["blockquote"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["body"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["br"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["center"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["colgroup"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["datalist"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["dd"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["del"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["details"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["dfn"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["div"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["dl"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["dt"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["em"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["fieldset"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["figcaption"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["figure"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["footer"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["h1"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["h2"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["h3"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["h4"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["h5"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["h6"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["head"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["header"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["hgroup"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["hr"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["html"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["i"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["ins"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["marquee"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["option"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["p"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["pre"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["q"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["rp"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["rt"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["script"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["span"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["strike"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["strong"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["style"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["sub"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["summary"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["sup"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["svg"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["table"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["tbody"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["td"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["textarea"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["tfoot"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["th"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["thead"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["title"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["time"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["tr"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["tt"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["u"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["ul"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["var"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["video"] = struct{}{} | |||
| p.setOfElementsAllowedWithoutAttrs["wbr"] = struct{}{} | |||
| } | |||
| // addDefaultSkipElementContent adds the HTML elements that we should skip | |||
| // rendering the character content of, if the element itself is not allowed. | |||
| // This is all character data that the end user would not normally see. | |||
| // i.e. if we exclude a <script> tag then we shouldn't render the JavaScript or | |||
| // anything else until we encounter the closing </script> tag. | |||
| func (p *Policy) addDefaultSkipElementContent() { | |||
| p.init() | |||
| p.setOfElementsToSkipContent["frame"] = struct{}{} | |||
| p.setOfElementsToSkipContent["frameset"] = struct{}{} | |||
| p.setOfElementsToSkipContent["iframe"] = struct{}{} | |||
| p.setOfElementsToSkipContent["noembed"] = struct{}{} | |||
| p.setOfElementsToSkipContent["noframes"] = struct{}{} | |||
| p.setOfElementsToSkipContent["noscript"] = struct{}{} | |||
| p.setOfElementsToSkipContent["nostyle"] = struct{}{} | |||
| p.setOfElementsToSkipContent["object"] = struct{}{} | |||
| p.setOfElementsToSkipContent["script"] = struct{}{} | |||
| p.setOfElementsToSkipContent["style"] = struct{}{} | |||
| p.setOfElementsToSkipContent["title"] = struct{}{} | |||
| } | |||
| @@ -0,0 +1,11 @@ | |||
| //go:build go1.12 | |||
| // +build go1.12 | |||
| package bluemonday | |||
| import "io" | |||
| type stringWriterWriter interface { | |||
| io.Writer | |||
| io.StringWriter | |||
| } | |||
| @@ -0,0 +1,15 @@ | |||
| //go:build go1.1 && !go1.12 | |||
| // +build go1.1,!go1.12 | |||
| package bluemonday | |||
| import "io" | |||
| type stringWriterWriter interface { | |||
| io.Writer | |||
| StringWriter | |||
| } | |||
| type StringWriter interface { | |||
| WriteString(s string) (n int, err error) | |||
| } | |||
| @@ -663,6 +663,24 @@ func inHeadIM(p *parser) bool { | |||
| // Ignore the token. | |||
| return true | |||
| case a.Template: | |||
| // TODO: remove this divergence from the HTML5 spec. | |||
| // | |||
| // We don't handle all of the corner cases when mixing foreign | |||
| // content (i.e. <math> or <svg>) with <template>. Without this | |||
| // early return, we can get into an infinite loop, possibly because | |||
| // of the "TODO... further divergence" a little below. | |||
| // | |||
| // As a workaround, if we are mixing foreign content and templates, | |||
| // just ignore the rest of the HTML. Foreign content is rare and a | |||
| // relatively old HTML feature. Templates are also rare and a | |||
| // relatively new HTML feature. Their combination is very rare. | |||
| for _, e := range p.oe { | |||
| if e.Namespace != "" { | |||
| p.im = ignoreTheRemainingTokens | |||
| return true | |||
| } | |||
| } | |||
| p.addElement() | |||
| p.afe = append(p.afe, &scopeMarker) | |||
| p.framesetOK = false | |||
| @@ -683,7 +701,7 @@ func inHeadIM(p *parser) bool { | |||
| if !p.oe.contains(a.Template) { | |||
| return true | |||
| } | |||
| // TODO: remove this divergence from the HTML5 spec. | |||
| // TODO: remove this further divergence from the HTML5 spec. | |||
| // | |||
| // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 | |||
| p.generateImpliedEndTags() | |||
| @@ -2127,6 +2145,10 @@ func afterAfterFramesetIM(p *parser) bool { | |||
| return true | |||
| } | |||
| func ignoreTheRemainingTokens(p *parser) bool { | |||
| return true | |||
| } | |||
| const whitespaceOrNUL = whitespace + "\x00" | |||
| // Section 12.2.6.5 | |||
| @@ -137,11 +137,13 @@ func trimOWS(x string) string { | |||
| // contains token amongst its comma-separated tokens, ASCII | |||
| // case-insensitively. | |||
| func headerValueContainsToken(v string, token string) bool { | |||
| v = trimOWS(v) | |||
| if comma := strings.IndexByte(v, ','); comma != -1 { | |||
| return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token) | |||
| for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') { | |||
| if tokenEqual(trimOWS(v[:comma]), token) { | |||
| return true | |||
| } | |||
| v = v[comma+1:] | |||
| } | |||
| return tokenEqual(v, token) | |||
| return tokenEqual(trimOWS(v), token) | |||
| } | |||
| // lowerASCII returns the ASCII lowercase version of b. | |||
| @@ -38,7 +38,7 @@ RUN make | |||
| RUN make install | |||
| WORKDIR /root | |||
| RUN wget http://curl.haxx.se/download/curl-7.45.0.tar.gz | |||
| RUN wget https://curl.se/download/curl-7.45.0.tar.gz | |||
| RUN tar -zxvf curl-7.45.0.tar.gz | |||
| WORKDIR /root/curl-7.45.0 | |||
| RUN ./configure --with-ssl --with-nghttp2=/usr/local | |||
| @@ -0,0 +1,53 @@ | |||
| // Copyright 2021 The Go Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style | |||
| // license that can be found in the LICENSE file. | |||
| package http2 | |||
| import "strings" | |||
| // The HTTP protocols are defined in terms of ASCII, not Unicode. This file | |||
| // contains helper functions which may use Unicode-aware functions which would | |||
| // otherwise be unsafe and could introduce vulnerabilities if used improperly. | |||
| // asciiEqualFold is strings.EqualFold, ASCII only. It reports whether s and t | |||
| // are equal, ASCII-case-insensitively. | |||
| func asciiEqualFold(s, t string) bool { | |||
| if len(s) != len(t) { | |||
| return false | |||
| } | |||
| for i := 0; i < len(s); i++ { | |||
| if lower(s[i]) != lower(t[i]) { | |||
| return false | |||
| } | |||
| } | |||
| return true | |||
| } | |||
| // lower returns the ASCII lowercase version of b. | |||
| func lower(b byte) byte { | |||
| if 'A' <= b && b <= 'Z' { | |||
| return b + ('a' - 'A') | |||
| } | |||
| return b | |||
| } | |||
| // isASCIIPrint returns whether s is ASCII and printable according to | |||
| // https://tools.ietf.org/html/rfc20#section-4.2. | |||
| func isASCIIPrint(s string) bool { | |||
| for i := 0; i < len(s); i++ { | |||
| if s[i] < ' ' || s[i] > '~' { | |||
| return false | |||
| } | |||
| } | |||
| return true | |||
| } | |||
| // asciiToLower returns the lowercase version of s if s is ASCII and printable, | |||
| // and whether or not it was. | |||
| func asciiToLower(s string) (lower string, ok bool) { | |||
| if !isASCIIPrint(s) { | |||
| return "", false | |||
| } | |||
| return strings.ToLower(s), true | |||
| } | |||
| @@ -7,7 +7,9 @@ | |||
| package http2 | |||
| import ( | |||
| "context" | |||
| "crypto/tls" | |||
| "errors" | |||
| "net/http" | |||
| "sync" | |||
| ) | |||
| @@ -78,61 +80,69 @@ func (p *clientConnPool) getClientConn(req *http.Request, addr string, dialOnMis | |||
| // It gets its own connection. | |||
| traceGetConn(req, addr) | |||
| const singleUse = true | |||
| cc, err := p.t.dialClientConn(addr, singleUse) | |||
| cc, err := p.t.dialClientConn(req.Context(), addr, singleUse) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| return cc, nil | |||
| } | |||
| p.mu.Lock() | |||
| for _, cc := range p.conns[addr] { | |||
| if st := cc.idleState(); st.canTakeNewRequest { | |||
| if p.shouldTraceGetConn(st) { | |||
| traceGetConn(req, addr) | |||
| for { | |||
| p.mu.Lock() | |||
| for _, cc := range p.conns[addr] { | |||
| if st := cc.idleState(); st.canTakeNewRequest { | |||
| if p.shouldTraceGetConn(st) { | |||
| traceGetConn(req, addr) | |||
| } | |||
| p.mu.Unlock() | |||
| return cc, nil | |||
| } | |||
| } | |||
| if !dialOnMiss { | |||
| p.mu.Unlock() | |||
| return cc, nil | |||
| return nil, ErrNoCachedConn | |||
| } | |||
| } | |||
| if !dialOnMiss { | |||
| traceGetConn(req, addr) | |||
| call := p.getStartDialLocked(req.Context(), addr) | |||
| p.mu.Unlock() | |||
| return nil, ErrNoCachedConn | |||
| <-call.done | |||
| if shouldRetryDial(call, req) { | |||
| continue | |||
| } | |||
| return call.res, call.err | |||
| } | |||
| traceGetConn(req, addr) | |||
| call := p.getStartDialLocked(addr) | |||
| p.mu.Unlock() | |||
| <-call.done | |||
| return call.res, call.err | |||
| } | |||
| // dialCall is an in-flight Transport dial call to a host. | |||
| type dialCall struct { | |||
| _ incomparable | |||
| p *clientConnPool | |||
| _ incomparable | |||
| p *clientConnPool | |||
| // the context associated with the request | |||
| // that created this dialCall | |||
| ctx context.Context | |||
| done chan struct{} // closed when done | |||
| res *ClientConn // valid after done is closed | |||
| err error // valid after done is closed | |||
| } | |||
| // requires p.mu is held. | |||
| func (p *clientConnPool) getStartDialLocked(addr string) *dialCall { | |||
| func (p *clientConnPool) getStartDialLocked(ctx context.Context, addr string) *dialCall { | |||
| if call, ok := p.dialing[addr]; ok { | |||
| // A dial is already in-flight. Don't start another. | |||
| return call | |||
| } | |||
| call := &dialCall{p: p, done: make(chan struct{})} | |||
| call := &dialCall{p: p, done: make(chan struct{}), ctx: ctx} | |||
| if p.dialing == nil { | |||
| p.dialing = make(map[string]*dialCall) | |||
| } | |||
| p.dialing[addr] = call | |||
| go call.dial(addr) | |||
| go call.dial(call.ctx, addr) | |||
| return call | |||
| } | |||
| // run in its own goroutine. | |||
| func (c *dialCall) dial(addr string) { | |||
| func (c *dialCall) dial(ctx context.Context, addr string) { | |||
| const singleUse = false // shared conn | |||
| c.res, c.err = c.p.t.dialClientConn(addr, singleUse) | |||
| c.res, c.err = c.p.t.dialClientConn(ctx, addr, singleUse) | |||
| close(c.done) | |||
| c.p.mu.Lock() | |||
| @@ -276,3 +286,28 @@ type noDialClientConnPool struct{ *clientConnPool } | |||
| func (p noDialClientConnPool) GetClientConn(req *http.Request, addr string) (*ClientConn, error) { | |||
| return p.getClientConn(req, addr, noDialOnMiss) | |||
| } | |||
| // shouldRetryDial reports whether the current request should | |||
| // retry dialing after the call finished unsuccessfully, for example | |||
| // if the dial was canceled because of a context cancellation or | |||
| // deadline expiry. | |||
| func shouldRetryDial(call *dialCall, req *http.Request) bool { | |||
| if call.err == nil { | |||
| // No error, no need to retry | |||
| return false | |||
| } | |||
| if call.ctx == req.Context() { | |||
| // If the call has the same context as the request, the dial | |||
| // should not be retried, since any cancellation will have come | |||
| // from this request. | |||
| return false | |||
| } | |||
| if !errors.Is(call.err, context.Canceled) && !errors.Is(call.err, context.DeadlineExceeded) { | |||
| // If the call error is not because of a context cancellation or a deadline expiry, | |||
| // the dial should not be retried. | |||
| return false | |||
| } | |||
| // Only retry if the error is a context cancellation error or deadline expiry | |||
| // and the context associated with the call was canceled or expired. | |||
| return call.ctx.Err() != nil | |||
| } | |||
| @@ -0,0 +1,27 @@ | |||
| // Copyright 2021 The Go Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style | |||
| // license that can be found in the LICENSE file. | |||
| //go:build go1.15 | |||
| // +build go1.15 | |||
| package http2 | |||
| import ( | |||
| "context" | |||
| "crypto/tls" | |||
| ) | |||
| // dialTLSWithContext uses tls.Dialer, added in Go 1.15, to open a TLS | |||
| // connection. | |||
| func (t *Transport) dialTLSWithContext(ctx context.Context, network, addr string, cfg *tls.Config) (*tls.Conn, error) { | |||
| dialer := &tls.Dialer{ | |||
| Config: cfg, | |||
| } | |||
| cn, err := dialer.DialContext(ctx, network, addr) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| tlsCn := cn.(*tls.Conn) // DialContext comment promises this will always succeed | |||
| return tlsCn, nil | |||
| } | |||
| @@ -6,7 +6,6 @@ package http2 | |||
| import ( | |||
| "net/http" | |||
| "strings" | |||
| "sync" | |||
| ) | |||
| @@ -79,10 +78,10 @@ func buildCommonHeaderMaps() { | |||
| } | |||
| } | |||
| func lowerHeader(v string) string { | |||
| func lowerHeader(v string) (lower string, ascii bool) { | |||
| buildCommonHeaderMapsOnce() | |||
| if s, ok := commonLowerHeader[v]; ok { | |||
| return s | |||
| return s, true | |||
| } | |||
| return strings.ToLower(v) | |||
| return asciiToLower(v) | |||
| } | |||
| @@ -0,0 +1,31 @@ | |||
| // Copyright 2021 The Go Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style | |||
| // license that can be found in the LICENSE file. | |||
| //go:build !go1.15 | |||
| // +build !go1.15 | |||
| package http2 | |||
| import ( | |||
| "context" | |||
| "crypto/tls" | |||
| ) | |||
| // dialTLSWithContext opens a TLS connection. | |||
| func (t *Transport) dialTLSWithContext(ctx context.Context, network, addr string, cfg *tls.Config) (*tls.Conn, error) { | |||
| cn, err := tls.Dial(network, addr, cfg) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| if err := cn.Handshake(); err != nil { | |||
| return nil, err | |||
| } | |||
| if cfg.InsecureSkipVerify { | |||
| return cn, nil | |||
| } | |||
| if err := cn.VerifyHostname(cfg.ServerName); err != nil { | |||
| return nil, err | |||
| } | |||
| return cn, nil | |||
| } | |||
| @@ -231,13 +231,12 @@ func ConfigureServer(s *http.Server, conf *Server) error { | |||
| if s.TLSConfig == nil { | |||
| s.TLSConfig = new(tls.Config) | |||
| } else if s.TLSConfig.CipherSuites != nil { | |||
| // If they already provided a CipherSuite list, return | |||
| // an error if it has a bad order or is missing | |||
| // ECDHE_RSA_WITH_AES_128_GCM_SHA256 or ECDHE_ECDSA_WITH_AES_128_GCM_SHA256. | |||
| } else if s.TLSConfig.CipherSuites != nil && s.TLSConfig.MinVersion < tls.VersionTLS13 { | |||
| // If they already provided a TLS 1.0–1.2 CipherSuite list, return an | |||
| // error if it is missing ECDHE_RSA_WITH_AES_128_GCM_SHA256 or | |||
| // ECDHE_ECDSA_WITH_AES_128_GCM_SHA256. | |||
| haveRequired := false | |||
| sawBad := false | |||
| for i, cs := range s.TLSConfig.CipherSuites { | |||
| for _, cs := range s.TLSConfig.CipherSuites { | |||
| switch cs { | |||
| case tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, | |||
| // Alternative MTI cipher to not discourage ECDSA-only servers. | |||
| @@ -245,14 +244,9 @@ func ConfigureServer(s *http.Server, conf *Server) error { | |||
| tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256: | |||
| haveRequired = true | |||
| } | |||
| if isBadCipher(cs) { | |||
| sawBad = true | |||
| } else if sawBad { | |||
| return fmt.Errorf("http2: TLSConfig.CipherSuites index %d contains an HTTP/2-approved cipher suite (%#04x), but it comes after unapproved cipher suites. With this configuration, clients that don't support previous, approved cipher suites may be given an unapproved one and reject the connection.", i, cs) | |||
| } | |||
| } | |||
| if !haveRequired { | |||
| return fmt.Errorf("http2: TLSConfig.CipherSuites is missing an HTTP/2-required AES_128_GCM_SHA256 cipher (need at least one of TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 or TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256).") | |||
| return fmt.Errorf("http2: TLSConfig.CipherSuites is missing an HTTP/2-required AES_128_GCM_SHA256 cipher (need at least one of TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 or TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256)") | |||
| } | |||
| } | |||
| @@ -265,16 +259,12 @@ func ConfigureServer(s *http.Server, conf *Server) error { | |||
| s.TLSConfig.PreferServerCipherSuites = true | |||
| haveNPN := false | |||
| for _, p := range s.TLSConfig.NextProtos { | |||
| if p == NextProtoTLS { | |||
| haveNPN = true | |||
| break | |||
| } | |||
| } | |||
| if !haveNPN { | |||
| if !strSliceContains(s.TLSConfig.NextProtos, NextProtoTLS) { | |||
| s.TLSConfig.NextProtos = append(s.TLSConfig.NextProtos, NextProtoTLS) | |||
| } | |||
| if !strSliceContains(s.TLSConfig.NextProtos, "http/1.1") { | |||
| s.TLSConfig.NextProtos = append(s.TLSConfig.NextProtos, "http/1.1") | |||
| } | |||
| if s.TLSNextProto == nil { | |||
| s.TLSNextProto = map[string]func(*http.Server, *tls.Conn, http.Handler){} | |||
| @@ -1293,7 +1283,9 @@ func (sc *serverConn) startGracefulShutdown() { | |||
| sc.shutdownOnce.Do(func() { sc.sendServeMsg(gracefulShutdownMsg) }) | |||
| } | |||
| // After sending GOAWAY, the connection will close after goAwayTimeout. | |||
| // After sending GOAWAY with an error code (non-graceful shutdown), the | |||
| // connection will close after goAwayTimeout. | |||
| // | |||
| // If we close the connection immediately after sending GOAWAY, there may | |||
| // be unsent data in our kernel receive buffer, which will cause the kernel | |||
| // to send a TCP RST on close() instead of a FIN. This RST will abort the | |||
| @@ -1629,23 +1621,37 @@ func (sc *serverConn) processSettingInitialWindowSize(val uint32) error { | |||
| func (sc *serverConn) processData(f *DataFrame) error { | |||
| sc.serveG.check() | |||
| if sc.inGoAway && sc.goAwayCode != ErrCodeNo { | |||
| id := f.Header().StreamID | |||
| if sc.inGoAway && (sc.goAwayCode != ErrCodeNo || id > sc.maxClientStreamID) { | |||
| // Discard all DATA frames if the GOAWAY is due to an | |||
| // error, or: | |||
| // | |||
| // Section 6.8: After sending a GOAWAY frame, the sender | |||
| // can discard frames for streams initiated by the | |||
| // receiver with identifiers higher than the identified | |||
| // last stream. | |||
| return nil | |||
| } | |||
| data := f.Data() | |||
| // "If a DATA frame is received whose stream is not in "open" | |||
| // or "half closed (local)" state, the recipient MUST respond | |||
| // with a stream error (Section 5.4.2) of type STREAM_CLOSED." | |||
| id := f.Header().StreamID | |||
| data := f.Data() | |||
| state, st := sc.state(id) | |||
| if id == 0 || state == stateIdle { | |||
| // Section 6.1: "DATA frames MUST be associated with a | |||
| // stream. If a DATA frame is received whose stream | |||
| // identifier field is 0x0, the recipient MUST respond | |||
| // with a connection error (Section 5.4.1) of type | |||
| // PROTOCOL_ERROR." | |||
| // | |||
| // Section 5.1: "Receiving any frame other than HEADERS | |||
| // or PRIORITY on a stream in this state MUST be | |||
| // treated as a connection error (Section 5.4.1) of | |||
| // type PROTOCOL_ERROR." | |||
| return ConnectionError(ErrCodeProtocol) | |||
| } | |||
| // "If a DATA frame is received whose stream is not in "open" | |||
| // or "half closed (local)" state, the recipient MUST respond | |||
| // with a stream error (Section 5.4.2) of type STREAM_CLOSED." | |||
| if st == nil || state != stateOpen || st.gotTrailerHeader || st.resetQueued { | |||
| // This includes sending a RST_STREAM if the stream is | |||
| // in stateHalfClosedLocal (which currently means that | |||
| @@ -2773,8 +2779,12 @@ func (w *responseWriter) Push(target string, opts *http.PushOptions) error { | |||
| // but PUSH_PROMISE requests cannot have a body. | |||
| // http://tools.ietf.org/html/rfc7540#section-8.2 | |||
| // Also disallow Host, since the promised URL must be absolute. | |||
| switch strings.ToLower(k) { | |||
| case "content-length", "content-encoding", "trailer", "te", "expect", "host": | |||
| if asciiEqualFold(k, "content-length") || | |||
| asciiEqualFold(k, "content-encoding") || | |||
| asciiEqualFold(k, "trailer") || | |||
| asciiEqualFold(k, "te") || | |||
| asciiEqualFold(k, "expect") || | |||
| asciiEqualFold(k, "host") { | |||
| return fmt.Errorf("promised request headers cannot include %q", k) | |||
| } | |||
| } | |||
| @@ -264,9 +264,8 @@ type ClientConn struct { | |||
| peerMaxHeaderListSize uint64 | |||
| initialWindowSize uint32 | |||
| hbuf bytes.Buffer // HPACK encoder writes into this | |||
| henc *hpack.Encoder | |||
| freeBuf [][]byte | |||
| hbuf bytes.Buffer // HPACK encoder writes into this | |||
| henc *hpack.Encoder | |||
| wmu sync.Mutex // held while writing; acquire AFTER mu if holding both | |||
| werr error // first write error that has occurred | |||
| @@ -564,12 +563,12 @@ func canRetryError(err error) bool { | |||
| return false | |||
| } | |||
| func (t *Transport) dialClientConn(addr string, singleUse bool) (*ClientConn, error) { | |||
| func (t *Transport) dialClientConn(ctx context.Context, addr string, singleUse bool) (*ClientConn, error) { | |||
| host, _, err := net.SplitHostPort(addr) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| tconn, err := t.dialTLS()("tcp", addr, t.newTLSConfig(host)) | |||
| tconn, err := t.dialTLS(ctx)("tcp", addr, t.newTLSConfig(host)) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| @@ -590,34 +589,24 @@ func (t *Transport) newTLSConfig(host string) *tls.Config { | |||
| return cfg | |||
| } | |||
| func (t *Transport) dialTLS() func(string, string, *tls.Config) (net.Conn, error) { | |||
| func (t *Transport) dialTLS(ctx context.Context) func(string, string, *tls.Config) (net.Conn, error) { | |||
| if t.DialTLS != nil { | |||
| return t.DialTLS | |||
| } | |||
| return t.dialTLSDefault | |||
| } | |||
| func (t *Transport) dialTLSDefault(network, addr string, cfg *tls.Config) (net.Conn, error) { | |||
| cn, err := tls.Dial(network, addr, cfg) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| if err := cn.Handshake(); err != nil { | |||
| return nil, err | |||
| } | |||
| if !cfg.InsecureSkipVerify { | |||
| if err := cn.VerifyHostname(cfg.ServerName); err != nil { | |||
| return func(network, addr string, cfg *tls.Config) (net.Conn, error) { | |||
| tlsCn, err := t.dialTLSWithContext(ctx, network, addr, cfg) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| state := tlsCn.ConnectionState() | |||
| if p := state.NegotiatedProtocol; p != NextProtoTLS { | |||
| return nil, fmt.Errorf("http2: unexpected ALPN protocol %q; want %q", p, NextProtoTLS) | |||
| } | |||
| if !state.NegotiatedProtocolIsMutual { | |||
| return nil, errors.New("http2: could not negotiate protocol mutually") | |||
| } | |||
| return tlsCn, nil | |||
| } | |||
| state := cn.ConnectionState() | |||
| if p := state.NegotiatedProtocol; p != NextProtoTLS { | |||
| return nil, fmt.Errorf("http2: unexpected ALPN protocol %q; want %q", p, NextProtoTLS) | |||
| } | |||
| if !state.NegotiatedProtocolIsMutual { | |||
| return nil, errors.New("http2: could not negotiate protocol mutually") | |||
| } | |||
| return cn, nil | |||
| } | |||
| // disableKeepAlives reports whether connections should be closed as | |||
| @@ -923,46 +912,6 @@ func (cc *ClientConn) closeForLostPing() error { | |||
| return cc.closeForError(err) | |||
| } | |||
| const maxAllocFrameSize = 512 << 10 | |||
| // frameBuffer returns a scratch buffer suitable for writing DATA frames. | |||
| // They're capped at the min of the peer's max frame size or 512KB | |||
| // (kinda arbitrarily), but definitely capped so we don't allocate 4GB | |||
| // bufers. | |||
| func (cc *ClientConn) frameScratchBuffer() []byte { | |||
| cc.mu.Lock() | |||
| size := cc.maxFrameSize | |||
| if size > maxAllocFrameSize { | |||
| size = maxAllocFrameSize | |||
| } | |||
| for i, buf := range cc.freeBuf { | |||
| if len(buf) >= int(size) { | |||
| cc.freeBuf[i] = nil | |||
| cc.mu.Unlock() | |||
| return buf[:size] | |||
| } | |||
| } | |||
| cc.mu.Unlock() | |||
| return make([]byte, size) | |||
| } | |||
| func (cc *ClientConn) putFrameScratchBuffer(buf []byte) { | |||
| cc.mu.Lock() | |||
| defer cc.mu.Unlock() | |||
| const maxBufs = 4 // arbitrary; 4 concurrent requests per conn? investigate. | |||
| if len(cc.freeBuf) < maxBufs { | |||
| cc.freeBuf = append(cc.freeBuf, buf) | |||
| return | |||
| } | |||
| for i, old := range cc.freeBuf { | |||
| if old == nil { | |||
| cc.freeBuf[i] = buf | |||
| return | |||
| } | |||
| } | |||
| // forget about it. | |||
| } | |||
| // errRequestCanceled is a copy of net/http's errRequestCanceled because it's not | |||
| // exported. At least they'll be DeepEqual for h1-vs-h2 comparisons tests. | |||
| var errRequestCanceled = errors.New("net/http: request canceled") | |||
| @@ -1005,7 +954,7 @@ func checkConnHeaders(req *http.Request) error { | |||
| if vv := req.Header["Transfer-Encoding"]; len(vv) > 0 && (len(vv) > 1 || vv[0] != "" && vv[0] != "chunked") { | |||
| return fmt.Errorf("http2: invalid Transfer-Encoding request header: %q", vv) | |||
| } | |||
| if vv := req.Header["Connection"]; len(vv) > 0 && (len(vv) > 1 || vv[0] != "" && !strings.EqualFold(vv[0], "close") && !strings.EqualFold(vv[0], "keep-alive")) { | |||
| if vv := req.Header["Connection"]; len(vv) > 0 && (len(vv) > 1 || vv[0] != "" && !asciiEqualFold(vv[0], "close") && !asciiEqualFold(vv[0], "keep-alive")) { | |||
| return fmt.Errorf("http2: invalid Connection request header: %q", vv) | |||
| } | |||
| return nil | |||
| @@ -1305,11 +1254,35 @@ var ( | |||
| errReqBodyTooLong = errors.New("http2: request body larger than specified content length") | |||
| ) | |||
| // frameScratchBufferLen returns the length of a buffer to use for | |||
| // outgoing request bodies to read/write to/from. | |||
| // | |||
| // It returns max(1, min(peer's advertised max frame size, | |||
| // Request.ContentLength+1, 512KB)). | |||
| func (cs *clientStream) frameScratchBufferLen(maxFrameSize int) int { | |||
| const max = 512 << 10 | |||
| n := int64(maxFrameSize) | |||
| if n > max { | |||
| n = max | |||
| } | |||
| if cl := actualContentLength(cs.req); cl != -1 && cl+1 < n { | |||
| // Add an extra byte past the declared content-length to | |||
| // give the caller's Request.Body io.Reader a chance to | |||
| // give us more bytes than they declared, so we can catch it | |||
| // early. | |||
| n = cl + 1 | |||
| } | |||
| if n < 1 { | |||
| return 1 | |||
| } | |||
| return int(n) // doesn't truncate; max is 512K | |||
| } | |||
| var bufPool sync.Pool // of *[]byte | |||
| func (cs *clientStream) writeRequestBody(body io.Reader, bodyCloser io.Closer) (err error) { | |||
| cc := cs.cc | |||
| sentEnd := false // whether we sent the final DATA frame w/ END_STREAM | |||
| buf := cc.frameScratchBuffer() | |||
| defer cc.putFrameScratchBuffer(buf) | |||
| defer func() { | |||
| traceWroteRequest(cs.trace, err) | |||
| @@ -1328,9 +1301,24 @@ func (cs *clientStream) writeRequestBody(body io.Reader, bodyCloser io.Closer) ( | |||
| remainLen := actualContentLength(req) | |||
| hasContentLen := remainLen != -1 | |||
| cc.mu.Lock() | |||
| maxFrameSize := int(cc.maxFrameSize) | |||
| cc.mu.Unlock() | |||
| // Scratch buffer for reading into & writing from. | |||
| scratchLen := cs.frameScratchBufferLen(maxFrameSize) | |||
| var buf []byte | |||
| if bp, ok := bufPool.Get().(*[]byte); ok && len(*bp) >= scratchLen { | |||
| defer bufPool.Put(bp) | |||
| buf = *bp | |||
| } else { | |||
| buf = make([]byte, scratchLen) | |||
| defer bufPool.Put(&buf) | |||
| } | |||
| var sawEOF bool | |||
| for !sawEOF { | |||
| n, err := body.Read(buf[:len(buf)-1]) | |||
| n, err := body.Read(buf[:len(buf)]) | |||
| if hasContentLen { | |||
| remainLen -= int64(n) | |||
| if remainLen == 0 && err == nil { | |||
| @@ -1341,8 +1329,9 @@ func (cs *clientStream) writeRequestBody(body io.Reader, bodyCloser io.Closer) ( | |||
| // to send the END_STREAM bit early, double-check that we're actually | |||
| // at EOF. Subsequent reads should return (0, EOF) at this point. | |||
| // If either value is different, we return an error in one of two ways below. | |||
| var scratch [1]byte | |||
| var n1 int | |||
| n1, err = body.Read(buf[n:]) | |||
| n1, err = body.Read(scratch[:]) | |||
| remainLen -= int64(n1) | |||
| } | |||
| if remainLen < 0 { | |||
| @@ -1412,10 +1401,6 @@ func (cs *clientStream) writeRequestBody(body io.Reader, bodyCloser io.Closer) ( | |||
| } | |||
| } | |||
| cc.mu.Lock() | |||
| maxFrameSize := int(cc.maxFrameSize) | |||
| cc.mu.Unlock() | |||
| cc.wmu.Lock() | |||
| defer cc.wmu.Unlock() | |||
| @@ -1531,19 +1516,21 @@ func (cc *ClientConn) encodeHeaders(req *http.Request, addGzipHeader bool, trail | |||
| var didUA bool | |||
| for k, vv := range req.Header { | |||
| if strings.EqualFold(k, "host") || strings.EqualFold(k, "content-length") { | |||
| if asciiEqualFold(k, "host") || asciiEqualFold(k, "content-length") { | |||
| // Host is :authority, already sent. | |||
| // Content-Length is automatic, set below. | |||
| continue | |||
| } else if strings.EqualFold(k, "connection") || strings.EqualFold(k, "proxy-connection") || | |||
| strings.EqualFold(k, "transfer-encoding") || strings.EqualFold(k, "upgrade") || | |||
| strings.EqualFold(k, "keep-alive") { | |||
| } else if asciiEqualFold(k, "connection") || | |||
| asciiEqualFold(k, "proxy-connection") || | |||
| asciiEqualFold(k, "transfer-encoding") || | |||
| asciiEqualFold(k, "upgrade") || | |||
| asciiEqualFold(k, "keep-alive") { | |||
| // Per 8.1.2.2 Connection-Specific Header | |||
| // Fields, don't send connection-specific | |||
| // fields. We have already checked if any | |||
| // are error-worthy so just ignore the rest. | |||
| continue | |||
| } else if strings.EqualFold(k, "user-agent") { | |||
| } else if asciiEqualFold(k, "user-agent") { | |||
| // Match Go's http1 behavior: at most one | |||
| // User-Agent. If set to nil or empty string, | |||
| // then omit it. Otherwise if not mentioned, | |||
| @@ -1556,7 +1543,7 @@ func (cc *ClientConn) encodeHeaders(req *http.Request, addGzipHeader bool, trail | |||
| if vv[0] == "" { | |||
| continue | |||
| } | |||
| } else if strings.EqualFold(k, "cookie") { | |||
| } else if asciiEqualFold(k, "cookie") { | |||
| // Per 8.1.2.5 To allow for better compression efficiency, the | |||
| // Cookie header field MAY be split into separate header fields, | |||
| // each with one or more cookie-pairs. | |||
| @@ -1615,7 +1602,12 @@ func (cc *ClientConn) encodeHeaders(req *http.Request, addGzipHeader bool, trail | |||
| // Header list size is ok. Write the headers. | |||
| enumerateHeaders(func(name, value string) { | |||
| name = strings.ToLower(name) | |||
| name, ascii := asciiToLower(name) | |||
| if !ascii { | |||
| // Skip writing invalid headers. Per RFC 7540, Section 8.1.2, header | |||
| // field names have to be ASCII characters (just as in HTTP/1.x). | |||
| return | |||
| } | |||
| cc.writeHeader(name, value) | |||
| if traceHeaders { | |||
| traceWroteHeaderField(trace, name, value) | |||
| @@ -1663,9 +1655,14 @@ func (cc *ClientConn) encodeTrailers(req *http.Request) ([]byte, error) { | |||
| } | |||
| for k, vv := range req.Trailer { | |||
| lowKey, ascii := asciiToLower(k) | |||
| if !ascii { | |||
| // Skip writing invalid headers. Per RFC 7540, Section 8.1.2, header | |||
| // field names have to be ASCII characters (just as in HTTP/1.x). | |||
| continue | |||
| } | |||
| // Transfer-Encoding, etc.. have already been filtered at the | |||
| // start of RoundTrip | |||
| lowKey := strings.ToLower(k) | |||
| for _, v := range vv { | |||
| cc.writeHeader(lowKey, v) | |||
| } | |||
| @@ -341,7 +341,12 @@ func encodeHeaders(enc *hpack.Encoder, h http.Header, keys []string) { | |||
| } | |||
| for _, k := range keys { | |||
| vv := h[k] | |||
| k = lowerHeader(k) | |||
| k, ascii := lowerHeader(k) | |||
| if !ascii { | |||
| // Skip writing invalid headers. Per RFC 7540, Section 8.1.2, header | |||
| // field names have to be ASCII characters (just as in HTTP/1.x). | |||
| continue | |||
| } | |||
| if !validWireHeaderFieldName(k) { | |||
| // Skip it as backup paranoia. Per | |||
| // golang.org/issue/14048, these should | |||
| @@ -67,15 +67,14 @@ func Transitional(transitional bool) Option { | |||
| // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts | |||
| // are longer than allowed by the RFC. | |||
| // | |||
| // This option corresponds to the VerifyDnsLength flag in UTS #46. | |||
| func VerifyDNSLength(verify bool) Option { | |||
| return func(o *options) { o.verifyDNSLength = verify } | |||
| } | |||
| // RemoveLeadingDots removes leading label separators. Leading runes that map to | |||
| // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. | |||
| // | |||
| // This is the behavior suggested by the UTS #46 and is adopted by some | |||
| // browsers. | |||
| func RemoveLeadingDots(remove bool) Option { | |||
| return func(o *options) { o.removeLeadingDots = remove } | |||
| } | |||
| @@ -83,6 +82,8 @@ func RemoveLeadingDots(remove bool) Option { | |||
| // ValidateLabels sets whether to check the mandatory label validation criteria | |||
| // as defined in Section 5.4 of RFC 5891. This includes testing for correct use | |||
| // of hyphens ('-'), normalization, validity of runes, and the context rules. | |||
| // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags | |||
| // in UTS #46. | |||
| func ValidateLabels(enable bool) Option { | |||
| return func(o *options) { | |||
| // Don't override existing mappings, but set one that at least checks | |||
| @@ -91,25 +92,48 @@ func ValidateLabels(enable bool) Option { | |||
| o.mapping = normalize | |||
| } | |||
| o.trie = trie | |||
| o.validateLabels = enable | |||
| o.fromPuny = validateFromPunycode | |||
| o.checkJoiners = enable | |||
| o.checkHyphens = enable | |||
| if enable { | |||
| o.fromPuny = validateFromPunycode | |||
| } else { | |||
| o.fromPuny = nil | |||
| } | |||
| } | |||
| } | |||
| // CheckHyphens sets whether to check for correct use of hyphens ('-') in | |||
| // labels. Most web browsers do not have this option set, since labels such as | |||
| // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use. | |||
| // | |||
| // This option corresponds to the CheckHyphens flag in UTS #46. | |||
| func CheckHyphens(enable bool) Option { | |||
| return func(o *options) { o.checkHyphens = enable } | |||
| } | |||
| // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix | |||
| // A of RFC 5892, concerning the use of joiner runes. | |||
| // | |||
| // This option corresponds to the CheckJoiners flag in UTS #46. | |||
| func CheckJoiners(enable bool) Option { | |||
| return func(o *options) { | |||
| o.trie = trie | |||
| o.checkJoiners = enable | |||
| } | |||
| } | |||
| // StrictDomainName limits the set of permissible ASCII characters to those | |||
| // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the | |||
| // hyphen). This is set by default for MapForLookup and ValidateForRegistration. | |||
| // hyphen). This is set by default for MapForLookup and ValidateForRegistration, | |||
| // but is only useful if ValidateLabels is set. | |||
| // | |||
| // This option is useful, for instance, for browsers that allow characters | |||
| // outside this range, for example a '_' (U+005F LOW LINE). See | |||
| // http://www.rfc-editor.org/std/std3.txt for more details This option | |||
| // corresponds to the UseSTD3ASCIIRules option in UTS #46. | |||
| // http://www.rfc-editor.org/std/std3.txt for more details. | |||
| // | |||
| // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46. | |||
| func StrictDomainName(use bool) Option { | |||
| return func(o *options) { | |||
| o.trie = trie | |||
| o.useSTD3Rules = use | |||
| o.fromPuny = validateFromPunycode | |||
| } | |||
| return func(o *options) { o.useSTD3Rules = use } | |||
| } | |||
| // NOTE: the following options pull in tables. The tables should not be linked | |||
| @@ -117,6 +141,8 @@ func StrictDomainName(use bool) Option { | |||
| // BidiRule enables the Bidi rule as defined in RFC 5893. Any application | |||
| // that relies on proper validation of labels should include this rule. | |||
| // | |||
| // This option corresponds to the CheckBidi flag in UTS #46. | |||
| func BidiRule() Option { | |||
| return func(o *options) { o.bidirule = bidirule.ValidString } | |||
| } | |||
| @@ -152,7 +178,8 @@ func MapForLookup() Option { | |||
| type options struct { | |||
| transitional bool | |||
| useSTD3Rules bool | |||
| validateLabels bool | |||
| checkHyphens bool | |||
| checkJoiners bool | |||
| verifyDNSLength bool | |||
| removeLeadingDots bool | |||
| @@ -225,8 +252,11 @@ func (p *Profile) String() string { | |||
| if p.useSTD3Rules { | |||
| s += ":UseSTD3Rules" | |||
| } | |||
| if p.validateLabels { | |||
| s += ":ValidateLabels" | |||
| if p.checkHyphens { | |||
| s += ":CheckHyphens" | |||
| } | |||
| if p.checkJoiners { | |||
| s += ":CheckJoiners" | |||
| } | |||
| if p.verifyDNSLength { | |||
| s += ":VerifyDNSLength" | |||
| @@ -254,26 +284,29 @@ var ( | |||
| punycode = &Profile{} | |||
| lookup = &Profile{options{ | |||
| transitional: true, | |||
| useSTD3Rules: true, | |||
| validateLabels: true, | |||
| trie: trie, | |||
| fromPuny: validateFromPunycode, | |||
| mapping: validateAndMap, | |||
| bidirule: bidirule.ValidString, | |||
| transitional: true, | |||
| useSTD3Rules: true, | |||
| checkHyphens: true, | |||
| checkJoiners: true, | |||
| trie: trie, | |||
| fromPuny: validateFromPunycode, | |||
| mapping: validateAndMap, | |||
| bidirule: bidirule.ValidString, | |||
| }} | |||
| display = &Profile{options{ | |||
| useSTD3Rules: true, | |||
| validateLabels: true, | |||
| trie: trie, | |||
| fromPuny: validateFromPunycode, | |||
| mapping: validateAndMap, | |||
| bidirule: bidirule.ValidString, | |||
| useSTD3Rules: true, | |||
| checkHyphens: true, | |||
| checkJoiners: true, | |||
| trie: trie, | |||
| fromPuny: validateFromPunycode, | |||
| mapping: validateAndMap, | |||
| bidirule: bidirule.ValidString, | |||
| }} | |||
| registration = &Profile{options{ | |||
| useSTD3Rules: true, | |||
| validateLabels: true, | |||
| verifyDNSLength: true, | |||
| checkHyphens: true, | |||
| checkJoiners: true, | |||
| trie: trie, | |||
| fromPuny: validateFromPunycode, | |||
| mapping: validateRegistration, | |||
| @@ -340,7 +373,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) { | |||
| } | |||
| isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight | |||
| labels.set(u) | |||
| if err == nil && p.validateLabels { | |||
| if err == nil && p.fromPuny != nil { | |||
| err = p.fromPuny(p, u) | |||
| } | |||
| if err == nil { | |||
| @@ -681,16 +714,18 @@ func (p *Profile) validateLabel(s string) (err error) { | |||
| } | |||
| return nil | |||
| } | |||
| if !p.validateLabels { | |||
| return nil | |||
| } | |||
| trie := p.trie // p.validateLabels is only set if trie is set. | |||
| if len(s) > 4 && s[2] == '-' && s[3] == '-' { | |||
| return &labelError{s, "V2"} | |||
| if p.checkHyphens { | |||
| if len(s) > 4 && s[2] == '-' && s[3] == '-' { | |||
| return &labelError{s, "V2"} | |||
| } | |||
| if s[0] == '-' || s[len(s)-1] == '-' { | |||
| return &labelError{s, "V3"} | |||
| } | |||
| } | |||
| if s[0] == '-' || s[len(s)-1] == '-' { | |||
| return &labelError{s, "V3"} | |||
| if !p.checkJoiners { | |||
| return nil | |||
| } | |||
| trie := p.trie // p.checkJoiners is only set if trie is set. | |||
| // TODO: merge the use of this in the trie. | |||
| v, sz := trie.lookupString(s) | |||
| x := info(v) | |||
| @@ -66,15 +66,14 @@ func Transitional(transitional bool) Option { | |||
| // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts | |||
| // are longer than allowed by the RFC. | |||
| // | |||
| // This option corresponds to the VerifyDnsLength flag in UTS #46. | |||
| func VerifyDNSLength(verify bool) Option { | |||
| return func(o *options) { o.verifyDNSLength = verify } | |||
| } | |||
| // RemoveLeadingDots removes leading label separators. Leading runes that map to | |||
| // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. | |||
| // | |||
| // This is the behavior suggested by the UTS #46 and is adopted by some | |||
| // browsers. | |||
| func RemoveLeadingDots(remove bool) Option { | |||
| return func(o *options) { o.removeLeadingDots = remove } | |||
| } | |||
| @@ -82,6 +81,8 @@ func RemoveLeadingDots(remove bool) Option { | |||
| // ValidateLabels sets whether to check the mandatory label validation criteria | |||
| // as defined in Section 5.4 of RFC 5891. This includes testing for correct use | |||
| // of hyphens ('-'), normalization, validity of runes, and the context rules. | |||
| // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags | |||
| // in UTS #46. | |||
| func ValidateLabels(enable bool) Option { | |||
| return func(o *options) { | |||
| // Don't override existing mappings, but set one that at least checks | |||
| @@ -90,25 +91,48 @@ func ValidateLabels(enable bool) Option { | |||
| o.mapping = normalize | |||
| } | |||
| o.trie = trie | |||
| o.validateLabels = enable | |||
| o.fromPuny = validateFromPunycode | |||
| o.checkJoiners = enable | |||
| o.checkHyphens = enable | |||
| if enable { | |||
| o.fromPuny = validateFromPunycode | |||
| } else { | |||
| o.fromPuny = nil | |||
| } | |||
| } | |||
| } | |||
| // CheckHyphens sets whether to check for correct use of hyphens ('-') in | |||
| // labels. Most web browsers do not have this option set, since labels such as | |||
| // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use. | |||
| // | |||
| // This option corresponds to the CheckHyphens flag in UTS #46. | |||
| func CheckHyphens(enable bool) Option { | |||
| return func(o *options) { o.checkHyphens = enable } | |||
| } | |||
| // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix | |||
| // A of RFC 5892, concerning the use of joiner runes. | |||
| // | |||
| // This option corresponds to the CheckJoiners flag in UTS #46. | |||
| func CheckJoiners(enable bool) Option { | |||
| return func(o *options) { | |||
| o.trie = trie | |||
| o.checkJoiners = enable | |||
| } | |||
| } | |||
| // StrictDomainName limits the set of permissable ASCII characters to those | |||
| // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the | |||
| // hyphen). This is set by default for MapForLookup and ValidateForRegistration. | |||
| // hyphen). This is set by default for MapForLookup and ValidateForRegistration, | |||
| // but is only useful if ValidateLabels is set. | |||
| // | |||
| // This option is useful, for instance, for browsers that allow characters | |||
| // outside this range, for example a '_' (U+005F LOW LINE). See | |||
| // http://www.rfc-editor.org/std/std3.txt for more details This option | |||
| // corresponds to the UseSTD3ASCIIRules option in UTS #46. | |||
| // http://www.rfc-editor.org/std/std3.txt for more details. | |||
| // | |||
| // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46. | |||
| func StrictDomainName(use bool) Option { | |||
| return func(o *options) { | |||
| o.trie = trie | |||
| o.useSTD3Rules = use | |||
| o.fromPuny = validateFromPunycode | |||
| } | |||
| return func(o *options) { o.useSTD3Rules = use } | |||
| } | |||
| // NOTE: the following options pull in tables. The tables should not be linked | |||
| @@ -116,6 +140,8 @@ func StrictDomainName(use bool) Option { | |||
| // BidiRule enables the Bidi rule as defined in RFC 5893. Any application | |||
| // that relies on proper validation of labels should include this rule. | |||
| // | |||
| // This option corresponds to the CheckBidi flag in UTS #46. | |||
| func BidiRule() Option { | |||
| return func(o *options) { o.bidirule = bidirule.ValidString } | |||
| } | |||
| @@ -152,7 +178,8 @@ func MapForLookup() Option { | |||
| type options struct { | |||
| transitional bool | |||
| useSTD3Rules bool | |||
| validateLabels bool | |||
| checkHyphens bool | |||
| checkJoiners bool | |||
| verifyDNSLength bool | |||
| removeLeadingDots bool | |||
| @@ -225,8 +252,11 @@ func (p *Profile) String() string { | |||
| if p.useSTD3Rules { | |||
| s += ":UseSTD3Rules" | |||
| } | |||
| if p.validateLabels { | |||
| s += ":ValidateLabels" | |||
| if p.checkHyphens { | |||
| s += ":CheckHyphens" | |||
| } | |||
| if p.checkJoiners { | |||
| s += ":CheckJoiners" | |||
| } | |||
| if p.verifyDNSLength { | |||
| s += ":VerifyDNSLength" | |||
| @@ -255,9 +285,10 @@ var ( | |||
| punycode = &Profile{} | |||
| lookup = &Profile{options{ | |||
| transitional: true, | |||
| useSTD3Rules: true, | |||
| validateLabels: true, | |||
| removeLeadingDots: true, | |||
| useSTD3Rules: true, | |||
| checkHyphens: true, | |||
| checkJoiners: true, | |||
| trie: trie, | |||
| fromPuny: validateFromPunycode, | |||
| mapping: validateAndMap, | |||
| @@ -265,8 +296,9 @@ var ( | |||
| }} | |||
| display = &Profile{options{ | |||
| useSTD3Rules: true, | |||
| validateLabels: true, | |||
| removeLeadingDots: true, | |||
| checkHyphens: true, | |||
| checkJoiners: true, | |||
| trie: trie, | |||
| fromPuny: validateFromPunycode, | |||
| mapping: validateAndMap, | |||
| @@ -274,8 +306,9 @@ var ( | |||
| }} | |||
| registration = &Profile{options{ | |||
| useSTD3Rules: true, | |||
| validateLabels: true, | |||
| verifyDNSLength: true, | |||
| checkHyphens: true, | |||
| checkJoiners: true, | |||
| trie: trie, | |||
| fromPuny: validateFromPunycode, | |||
| mapping: validateRegistration, | |||
| @@ -339,7 +372,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) { | |||
| continue | |||
| } | |||
| labels.set(u) | |||
| if err == nil && p.validateLabels { | |||
| if err == nil && p.fromPuny != nil { | |||
| err = p.fromPuny(p, u) | |||
| } | |||
| if err == nil { | |||
| @@ -629,16 +662,18 @@ func (p *Profile) validateLabel(s string) error { | |||
| if p.bidirule != nil && !p.bidirule(s) { | |||
| return &labelError{s, "B"} | |||
| } | |||
| if !p.validateLabels { | |||
| return nil | |||
| } | |||
| trie := p.trie // p.validateLabels is only set if trie is set. | |||
| if len(s) > 4 && s[2] == '-' && s[3] == '-' { | |||
| return &labelError{s, "V2"} | |||
| if p.checkHyphens { | |||
| if len(s) > 4 && s[2] == '-' && s[3] == '-' { | |||
| return &labelError{s, "V2"} | |||
| } | |||
| if s[0] == '-' || s[len(s)-1] == '-' { | |||
| return &labelError{s, "V3"} | |||
| } | |||
| } | |||
| if s[0] == '-' || s[len(s)-1] == '-' { | |||
| return &labelError{s, "V3"} | |||
| if !p.checkJoiners { | |||
| return nil | |||
| } | |||
| trie := p.trie // p.checkJoiners is only set if trie is set. | |||
| // TODO: merge the use of this in the trie. | |||
| v, sz := trie.lookupString(s) | |||
| x := info(v) | |||
| @@ -57,7 +57,7 @@ loop: | |||
| err = transform.ErrShortSrc | |||
| break loop | |||
| } | |||
| r = utf8.RuneError | |||
| r, size = utf8.RuneError, 1 | |||
| goto write | |||
| } | |||
| size = 2 | |||
| @@ -303,9 +303,17 @@ func (t Tag) Extensions() []string { | |||
| // are of the allowed values defined for the Unicode locale extension ('u') in | |||
| // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. | |||
| // TypeForKey will traverse the inheritance chain to get the correct value. | |||
| // | |||
| // If there are multiple types associated with a key, only the first will be | |||
| // returned. If there is no type associated with a key, it returns the empty | |||
| // string. | |||
| func (t Tag) TypeForKey(key string) string { | |||
| if start, end, _ := t.findTypeForKey(key); end != start { | |||
| return t.str[start:end] | |||
| if _, start, end, _ := t.findTypeForKey(key); end != start { | |||
| s := t.str[start:end] | |||
| if p := strings.IndexByte(s, '-'); p >= 0 { | |||
| s = s[:p] | |||
| } | |||
| return s | |||
| } | |||
| return "" | |||
| } | |||
| @@ -329,13 +337,13 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) { | |||
| // Remove the setting if value is "". | |||
| if value == "" { | |||
| start, end, _ := t.findTypeForKey(key) | |||
| if start != end { | |||
| // Remove key tag and leading '-'. | |||
| start -= 4 | |||
| start, sep, end, _ := t.findTypeForKey(key) | |||
| if start != sep { | |||
| // Remove a possible empty extension. | |||
| if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' { | |||
| switch { | |||
| case t.str[start-2] != '-': // has previous elements. | |||
| case end == len(t.str), // end of string | |||
| end+2 < len(t.str) && t.str[end+2] == '-': // end of extension | |||
| start -= 2 | |||
| } | |||
| if start == int(t.pVariant) && end == len(t.str) { | |||
| @@ -381,14 +389,14 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) { | |||
| t.str = string(buf[:uStart+len(b)]) | |||
| } else { | |||
| s := t.str | |||
| start, end, hasExt := t.findTypeForKey(key) | |||
| if start == end { | |||
| start, sep, end, hasExt := t.findTypeForKey(key) | |||
| if start == sep { | |||
| if hasExt { | |||
| b = b[2:] | |||
| } | |||
| t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:]) | |||
| t.str = fmt.Sprintf("%s-%s%s", s[:sep], b, s[end:]) | |||
| } else { | |||
| t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:]) | |||
| t.str = fmt.Sprintf("%s-%s%s", s[:start+3], value, s[end:]) | |||
| } | |||
| } | |||
| return t, nil | |||
| @@ -399,10 +407,10 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) { | |||
| // wasn't found. The hasExt return value reports whether an -u extension was present. | |||
| // Note: the extensions are typically very small and are likely to contain | |||
| // only one key-type pair. | |||
| func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) { | |||
| func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) { | |||
| p := int(t.pExt) | |||
| if len(key) != 2 || p == len(t.str) || p == 0 { | |||
| return p, p, false | |||
| return p, p, p, false | |||
| } | |||
| s := t.str | |||
| @@ -410,10 +418,10 @@ func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) { | |||
| for p++; s[p] != 'u'; p++ { | |||
| if s[p] > 'u' { | |||
| p-- | |||
| return p, p, false | |||
| return p, p, p, false | |||
| } | |||
| if p = nextExtension(s, p); p == len(s) { | |||
| return len(s), len(s), false | |||
| return len(s), len(s), len(s), false | |||
| } | |||
| } | |||
| // Proceed to the hyphen following the extension name. | |||
| @@ -424,40 +432,28 @@ func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) { | |||
| // Iterate over keys until we get the end of a section. | |||
| for { | |||
| // p points to the hyphen preceding the current token. | |||
| if p3 := p + 3; s[p3] == '-' { | |||
| // Found a key. | |||
| // Check whether we just processed the key that was requested. | |||
| if curKey == key { | |||
| return start, p, true | |||
| } | |||
| // Set to the next key and continue scanning type tokens. | |||
| curKey = s[p+1 : p3] | |||
| if curKey > key { | |||
| return p, p, true | |||
| } | |||
| // Start of the type token sequence. | |||
| start = p + 4 | |||
| // A type is at least 3 characters long. | |||
| p += 7 // 4 + 3 | |||
| } else { | |||
| // Attribute or type, which is at least 3 characters long. | |||
| p += 4 | |||
| } | |||
| // p points past the third character of a type or attribute. | |||
| max := p + 5 // maximum length of token plus hyphen. | |||
| if len(s) < max { | |||
| max = len(s) | |||
| end = p | |||
| for p++; p < len(s) && s[p] != '-'; p++ { | |||
| } | |||
| for ; p < max && s[p] != '-'; p++ { | |||
| n := p - end - 1 | |||
| if n <= 2 && curKey == key { | |||
| if sep < end { | |||
| sep++ | |||
| } | |||
| return start, sep, end, true | |||
| } | |||
| // Bail if we have exhausted all tokens or if the next token starts | |||
| // a new extension. | |||
| if p == len(s) || s[p+2] == '-' { | |||
| if curKey == key { | |||
| return start, p, true | |||
| switch n { | |||
| case 0, // invalid string | |||
| 1: // next extension | |||
| return end, end, end, true | |||
| case 2: | |||
| // next key | |||
| curKey = s[end+1 : p] | |||
| if curKey > key { | |||
| return end, end, end, true | |||
| } | |||
| return p, p, true | |||
| start = end | |||
| sep = p | |||
| } | |||
| } | |||
| } | |||
| @@ -133,14 +133,15 @@ func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) { | |||
| s.start = oldStart | |||
| if end := oldStart + newSize; end != oldEnd { | |||
| diff := end - oldEnd | |||
| if end < cap(s.b) { | |||
| b := make([]byte, len(s.b)+diff) | |||
| var b []byte | |||
| if n := len(s.b) + diff; n > cap(s.b) { | |||
| b = make([]byte, n) | |||
| copy(b, s.b[:oldStart]) | |||
| copy(b[end:], s.b[oldEnd:]) | |||
| s.b = b | |||
| } else { | |||
| s.b = append(s.b[end:], s.b[oldEnd:]...) | |||
| b = s.b[:n] | |||
| } | |||
| copy(b[end:], s.b[oldEnd:]) | |||
| s.b = b | |||
| s.next = end + (s.next - s.end) | |||
| s.end = end | |||
| } | |||
| @@ -482,7 +483,7 @@ func parseExtensions(scan *scanner) int { | |||
| func parseExtension(scan *scanner) int { | |||
| start, end := scan.start, scan.end | |||
| switch scan.token[0] { | |||
| case 'u': | |||
| case 'u': // https://www.ietf.org/rfc/rfc6067.txt | |||
| attrStart := end | |||
| scan.scan() | |||
| for last := []byte{}; len(scan.token) > 2; scan.scan() { | |||
| @@ -502,27 +503,29 @@ func parseExtension(scan *scanner) int { | |||
| last = scan.token | |||
| end = scan.end | |||
| } | |||
| // Scan key-type sequences. A key is of length 2 and may be followed | |||
| // by 0 or more "type" subtags from 3 to the maximum of 8 letters. | |||
| var last, key []byte | |||
| for attrEnd := end; len(scan.token) == 2; last = key { | |||
| key = scan.token | |||
| keyEnd := scan.end | |||
| end = scan.acceptMinSize(3) | |||
| end = scan.end | |||
| for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() { | |||
| end = scan.end | |||
| } | |||
| // TODO: check key value validity | |||
| if keyEnd == end || bytes.Compare(key, last) != 1 { | |||
| if bytes.Compare(key, last) != 1 || scan.err != nil { | |||
| // We have an invalid key or the keys are not sorted. | |||
| // Start scanning keys from scratch and reorder. | |||
| p := attrEnd + 1 | |||
| scan.next = p | |||
| keys := [][]byte{} | |||
| for scan.scan(); len(scan.token) == 2; { | |||
| keyStart, keyEnd := scan.start, scan.end | |||
| end = scan.acceptMinSize(3) | |||
| if keyEnd != end { | |||
| keys = append(keys, scan.b[keyStart:end]) | |||
| } else { | |||
| scan.setError(ErrSyntax) | |||
| end = keyStart | |||
| keyStart := scan.start | |||
| end = scan.end | |||
| for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() { | |||
| end = scan.end | |||
| } | |||
| keys = append(keys, scan.b[keyStart:end]) | |||
| } | |||
| sort.Stable(bytesSort{keys, 2}) | |||
| if n := len(keys); n > 0 { | |||
| @@ -546,7 +549,7 @@ func parseExtension(scan *scanner) int { | |||
| break | |||
| } | |||
| } | |||
| case 't': | |||
| case 't': // https://www.ietf.org/rfc/rfc6497.txt | |||
| scan.scan() | |||
| if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { | |||
| _, end = parseTag(scan) | |||
| @@ -2,6 +2,7 @@ | |||
| // Use of this source code is governed by a BSD-style | |||
| // license that can be found in the LICENSE file. | |||
| //go:build !go1.2 | |||
| // +build !go1.2 | |||
| package language | |||
| @@ -2,6 +2,7 @@ | |||
| // Use of this source code is governed by a BSD-style | |||
| // license that can be found in the LICENSE file. | |||
| //go:build go1.2 | |||
| // +build go1.2 | |||
| package language | |||
| @@ -412,6 +412,10 @@ func (t Tag) Extensions() []Extension { | |||
| // are of the allowed values defined for the Unicode locale extension ('u') in | |||
| // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. | |||
| // TypeForKey will traverse the inheritance chain to get the correct value. | |||
| // | |||
| // If there are multiple types associated with a key, only the first will be | |||
| // returned. If there is no type associated with a key, it returns the empty | |||
| // string. | |||
| func (t Tag) TypeForKey(key string) string { | |||
| if !compact.Tag(t).MayHaveExtensions() { | |||
| if key != "rg" && key != "va" { | |||
| @@ -47,7 +47,7 @@ const ( | |||
| _Zzzz = 251 | |||
| ) | |||
| var regionToGroups = []uint8{ // 357 elements | |||
| var regionToGroups = []uint8{ // 358 elements | |||
| // Entry 0 - 3F | |||
| 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04, | |||
| 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00, | |||
| @@ -98,8 +98,8 @@ var regionToGroups = []uint8{ // 357 elements | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, | |||
| } // Size: 381 bytes | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |||
| } // Size: 382 bytes | |||
| var paradigmLocales = [][3]uint16{ // 3 elements | |||
| 0: [3]uint16{0x139, 0x0, 0x7b}, | |||
| @@ -295,4 +295,4 @@ var matchRegion = []regionIntelligibility{ // 15 elements | |||
| 14: {lang: 0x529, script: 0x3c, group: 0x80, distance: 0x5}, | |||
| } // Size: 114 bytes | |||
| // Total table size 1471 bytes (1KiB); checksum: 4CB1CD46 | |||
| // Total table size 1472 bytes (1KiB); checksum: F86C669 | |||
| @@ -2,6 +2,7 @@ | |||
| // Use of this source code is governed by a BSD-style | |||
| // license that can be found in the LICENSE file. | |||
| //go:build go1.10 | |||
| // +build go1.10 | |||
| package bidirule | |||
| @@ -2,6 +2,7 @@ | |||
| // Use of this source code is governed by a BSD-style | |||
| // license that can be found in the LICENSE file. | |||
| //go:build !go1.10 | |||
| // +build !go1.10 | |||
| package bidirule | |||
| @@ -12,15 +12,14 @@ | |||
| // and without notice. | |||
| package bidi // import "golang.org/x/text/unicode/bidi" | |||
| // TODO: | |||
| // The following functionality would not be hard to implement, but hinges on | |||
| // the definition of a Segmenter interface. For now this is up to the user. | |||
| // - Iterate over paragraphs | |||
| // - Segmenter to iterate over runs directly from a given text. | |||
| // Also: | |||
| // TODO | |||
| // - Transformer for reordering? | |||
| // - Transformer (validator, really) for Bidi Rule. | |||
| import ( | |||
| "bytes" | |||
| ) | |||
| // This API tries to avoid dealing with embedding levels for now. Under the hood | |||
| // these will be computed, but the question is to which extent the user should | |||
| // know they exist. We should at some point allow the user to specify an | |||
| @@ -49,7 +48,9 @@ const ( | |||
| Neutral | |||
| ) | |||
| type options struct{} | |||
| type options struct { | |||
| defaultDirection Direction | |||
| } | |||
| // An Option is an option for Bidi processing. | |||
| type Option func(*options) | |||
| @@ -66,12 +67,62 @@ type Option func(*options) | |||
| // DefaultDirection sets the default direction for a Paragraph. The direction is | |||
| // overridden if the text contains directional characters. | |||
| func DefaultDirection(d Direction) Option { | |||
| panic("unimplemented") | |||
| return func(opts *options) { | |||
| opts.defaultDirection = d | |||
| } | |||
| } | |||
| // A Paragraph holds a single Paragraph for Bidi processing. | |||
| type Paragraph struct { | |||
| // buffers | |||
| p []byte | |||
| o Ordering | |||
| opts []Option | |||
| types []Class | |||
| pairTypes []bracketType | |||
| pairValues []rune | |||
| runes []rune | |||
| options options | |||
| } | |||
| // Initialize the p.pairTypes, p.pairValues and p.types from the input previously | |||
| // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph | |||
| // separator (bidi class B). | |||
| // | |||
| // The function p.Order() needs these values to be set, so this preparation could be postponed. | |||
| // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph | |||
| // separator, the whole input needs to be processed anyway and should not be done twice. | |||
| // | |||
| // The function has the same return values as SetBytes() / SetString() | |||
| func (p *Paragraph) prepareInput() (n int, err error) { | |||
| p.runes = bytes.Runes(p.p) | |||
| bytecount := 0 | |||
| // clear slices from previous SetString or SetBytes | |||
| p.pairTypes = nil | |||
| p.pairValues = nil | |||
| p.types = nil | |||
| for _, r := range p.runes { | |||
| props, i := LookupRune(r) | |||
| bytecount += i | |||
| cls := props.Class() | |||
| if cls == B { | |||
| return bytecount, nil | |||
| } | |||
| p.types = append(p.types, cls) | |||
| if props.IsOpeningBracket() { | |||
| p.pairTypes = append(p.pairTypes, bpOpen) | |||
| p.pairValues = append(p.pairValues, r) | |||
| } else if props.IsBracket() { | |||
| // this must be a closing bracket, | |||
| // since IsOpeningBracket is not true | |||
| p.pairTypes = append(p.pairTypes, bpClose) | |||
| p.pairValues = append(p.pairValues, r) | |||
| } else { | |||
| p.pairTypes = append(p.pairTypes, bpNone) | |||
| p.pairValues = append(p.pairValues, 0) | |||
| } | |||
| } | |||
| return bytecount, nil | |||
| } | |||
| // SetBytes configures p for the given paragraph text. It replaces text | |||
| @@ -80,70 +131,150 @@ type Paragraph struct { | |||
| // consumed from b including this separator. Error may be non-nil if options are | |||
| // given. | |||
| func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) { | |||
| panic("unimplemented") | |||
| p.p = b | |||
| p.opts = opts | |||
| return p.prepareInput() | |||
| } | |||
| // SetString configures p for the given paragraph text. It replaces text | |||
| // previously set by SetBytes or SetString. If b contains a paragraph separator | |||
| // SetString configures s for the given paragraph text. It replaces text | |||
| // previously set by SetBytes or SetString. If s contains a paragraph separator | |||
| // it will only process the first paragraph and report the number of bytes | |||
| // consumed from b including this separator. Error may be non-nil if options are | |||
| // consumed from s including this separator. Error may be non-nil if options are | |||
| // given. | |||
| func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) { | |||
| panic("unimplemented") | |||
| p.p = []byte(s) | |||
| p.opts = opts | |||
| return p.prepareInput() | |||
| } | |||
| // IsLeftToRight reports whether the principle direction of rendering for this | |||
| // paragraphs is left-to-right. If this returns false, the principle direction | |||
| // of rendering is right-to-left. | |||
| func (p *Paragraph) IsLeftToRight() bool { | |||
| panic("unimplemented") | |||
| return p.Direction() == LeftToRight | |||
| } | |||
| // Direction returns the direction of the text of this paragraph. | |||
| // | |||
| // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. | |||
| func (p *Paragraph) Direction() Direction { | |||
| panic("unimplemented") | |||
| return p.o.Direction() | |||
| } | |||
| // TODO: what happens if the position is > len(input)? This should return an error. | |||
| // RunAt reports the Run at the given position of the input text. | |||
| // | |||
| // This method can be used for computing line breaks on paragraphs. | |||
| func (p *Paragraph) RunAt(pos int) Run { | |||
| panic("unimplemented") | |||
| c := 0 | |||
| runNumber := 0 | |||
| for i, r := range p.o.runes { | |||
| c += len(r) | |||
| if pos < c { | |||
| runNumber = i | |||
| } | |||
| } | |||
| return p.o.Run(runNumber) | |||
| } | |||
| func calculateOrdering(levels []level, runes []rune) Ordering { | |||
| var curDir Direction | |||
| prevDir := Neutral | |||
| prevI := 0 | |||
| o := Ordering{} | |||
| // lvl = 0,2,4,...: left to right | |||
| // lvl = 1,3,5,...: right to left | |||
| for i, lvl := range levels { | |||
| if lvl%2 == 0 { | |||
| curDir = LeftToRight | |||
| } else { | |||
| curDir = RightToLeft | |||
| } | |||
| if curDir != prevDir { | |||
| if i > 0 { | |||
| o.runes = append(o.runes, runes[prevI:i]) | |||
| o.directions = append(o.directions, prevDir) | |||
| o.startpos = append(o.startpos, prevI) | |||
| } | |||
| prevI = i | |||
| prevDir = curDir | |||
| } | |||
| } | |||
| o.runes = append(o.runes, runes[prevI:]) | |||
| o.directions = append(o.directions, prevDir) | |||
| o.startpos = append(o.startpos, prevI) | |||
| return o | |||
| } | |||
| // Order computes the visual ordering of all the runs in a Paragraph. | |||
| func (p *Paragraph) Order() (Ordering, error) { | |||
| panic("unimplemented") | |||
| if len(p.types) == 0 { | |||
| return Ordering{}, nil | |||
| } | |||
| for _, fn := range p.opts { | |||
| fn(&p.options) | |||
| } | |||
| lvl := level(-1) | |||
| if p.options.defaultDirection == RightToLeft { | |||
| lvl = 1 | |||
| } | |||
| para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl) | |||
| if err != nil { | |||
| return Ordering{}, err | |||
| } | |||
| levels := para.getLevels([]int{len(p.types)}) | |||
| p.o = calculateOrdering(levels, p.runes) | |||
| return p.o, nil | |||
| } | |||
| // Line computes the visual ordering of runs for a single line starting and | |||
| // ending at the given positions in the original text. | |||
| func (p *Paragraph) Line(start, end int) (Ordering, error) { | |||
| panic("unimplemented") | |||
| lineTypes := p.types[start:end] | |||
| para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1) | |||
| if err != nil { | |||
| return Ordering{}, err | |||
| } | |||
| levels := para.getLevels([]int{len(lineTypes)}) | |||
| o := calculateOrdering(levels, p.runes[start:end]) | |||
| return o, nil | |||
| } | |||
| // An Ordering holds the computed visual order of runs of a Paragraph. Calling | |||
| // SetBytes or SetString on the originating Paragraph invalidates an Ordering. | |||
| // The methods of an Ordering should only be called by one goroutine at a time. | |||
| type Ordering struct{} | |||
| type Ordering struct { | |||
| runes [][]rune | |||
| directions []Direction | |||
| startpos []int | |||
| } | |||
| // Direction reports the directionality of the runs. | |||
| // | |||
| // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. | |||
| func (o *Ordering) Direction() Direction { | |||
| panic("unimplemented") | |||
| return o.directions[0] | |||
| } | |||
| // NumRuns returns the number of runs. | |||
| func (o *Ordering) NumRuns() int { | |||
| panic("unimplemented") | |||
| return len(o.runes) | |||
| } | |||
| // Run returns the ith run within the ordering. | |||
| func (o *Ordering) Run(i int) Run { | |||
| panic("unimplemented") | |||
| r := Run{ | |||
| runes: o.runes[i], | |||
| direction: o.directions[i], | |||
| startpos: o.startpos[i], | |||
| } | |||
| return r | |||
| } | |||
| // TODO: perhaps with options. | |||
| @@ -155,16 +286,19 @@ func (o *Ordering) Run(i int) Run { | |||
| // A Run is a continuous sequence of characters of a single direction. | |||
| type Run struct { | |||
| runes []rune | |||
| direction Direction | |||
| startpos int | |||
| } | |||
| // String returns the text of the run in its original order. | |||
| func (r *Run) String() string { | |||
| panic("unimplemented") | |||
| return string(r.runes) | |||
| } | |||
| // Bytes returns the text of the run in its original order. | |||
| func (r *Run) Bytes() []byte { | |||
| panic("unimplemented") | |||
| return []byte(r.String()) | |||
| } | |||
| // TODO: methods for | |||
| @@ -174,25 +308,52 @@ func (r *Run) Bytes() []byte { | |||
| // Direction reports the direction of the run. | |||
| func (r *Run) Direction() Direction { | |||
| panic("unimplemented") | |||
| return r.direction | |||
| } | |||
| // Position of the Run within the text passed to SetBytes or SetString of the | |||
| // Pos returns the position of the Run within the text passed to SetBytes or SetString of the | |||
| // originating Paragraph value. | |||
| func (r *Run) Pos() (start, end int) { | |||
| panic("unimplemented") | |||
| return r.startpos, r.startpos + len(r.runes) - 1 | |||
| } | |||
| // AppendReverse reverses the order of characters of in, appends them to out, | |||
| // and returns the result. Modifiers will still follow the runes they modify. | |||
| // Brackets are replaced with their counterparts. | |||
| func AppendReverse(out, in []byte) []byte { | |||
| panic("unimplemented") | |||
| ret := make([]byte, len(in)+len(out)) | |||
| copy(ret, out) | |||
| inRunes := bytes.Runes(in) | |||
| for i, r := range inRunes { | |||
| prop, _ := LookupRune(r) | |||
| if prop.IsBracket() { | |||
| inRunes[i] = prop.reverseBracket(r) | |||
| } | |||
| } | |||
| for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 { | |||
| inRunes[i], inRunes[j] = inRunes[j], inRunes[i] | |||
| } | |||
| copy(ret[len(out):], string(inRunes)) | |||
| return ret | |||
| } | |||
| // ReverseString reverses the order of characters in s and returns a new string. | |||
| // Modifiers will still follow the runes they modify. Brackets are replaced with | |||
| // their counterparts. | |||
| func ReverseString(s string) string { | |||
| panic("unimplemented") | |||
| input := []rune(s) | |||
| li := len(input) | |||
| ret := make([]rune, li) | |||
| for i, r := range input { | |||
| prop, _ := LookupRune(r) | |||
| if prop.IsBracket() { | |||
| ret[li-i-1] = prop.reverseBracket(r) | |||
| } else { | |||
| ret[li-i-1] = r | |||
| } | |||
| } | |||
| return string(ret) | |||
| } | |||
| @@ -4,7 +4,10 @@ | |||
| package bidi | |||
| import "log" | |||
| import ( | |||
| "fmt" | |||
| "log" | |||
| ) | |||
| // This implementation is a port based on the reference implementation found at: | |||
| // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/ | |||
| @@ -97,13 +100,20 @@ type paragraph struct { | |||
| // rune (suggested is the rune of the open bracket for opening and matching | |||
| // close brackets, after normalization). The embedding levels are optional, but | |||
| // may be supplied to encode embedding levels of styled text. | |||
| // | |||
| // TODO: return an error. | |||
| func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph { | |||
| validateTypes(types) | |||
| validatePbTypes(pairTypes) | |||
| validatePbValues(pairValues, pairTypes) | |||
| validateParagraphEmbeddingLevel(levels) | |||
| func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) (*paragraph, error) { | |||
| var err error | |||
| if err = validateTypes(types); err != nil { | |||
| return nil, err | |||
| } | |||
| if err = validatePbTypes(pairTypes); err != nil { | |||
| return nil, err | |||
| } | |||
| if err = validatePbValues(pairValues, pairTypes); err != nil { | |||
| return nil, err | |||
| } | |||
| if err = validateParagraphEmbeddingLevel(levels); err != nil { | |||
| return nil, err | |||
| } | |||
| p := ¶graph{ | |||
| initialTypes: append([]Class(nil), types...), | |||
| @@ -115,7 +125,7 @@ func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, lev | |||
| resultTypes: append([]Class(nil), types...), | |||
| } | |||
| p.run() | |||
| return p | |||
| return p, nil | |||
| } | |||
| func (p *paragraph) Len() int { return len(p.initialTypes) } | |||
| @@ -1001,58 +1011,61 @@ func typeForLevel(level level) Class { | |||
| return R | |||
| } | |||
| // TODO: change validation to not panic | |||
| func validateTypes(types []Class) { | |||
| func validateTypes(types []Class) error { | |||
| if len(types) == 0 { | |||
| log.Panic("types is null") | |||
| return fmt.Errorf("types is null") | |||
| } | |||
| for i, t := range types[:len(types)-1] { | |||
| if t == B { | |||
| log.Panicf("B type before end of paragraph at index: %d", i) | |||
| return fmt.Errorf("B type before end of paragraph at index: %d", i) | |||
| } | |||
| } | |||
| return nil | |||
| } | |||
| func validateParagraphEmbeddingLevel(embeddingLevel level) { | |||
| func validateParagraphEmbeddingLevel(embeddingLevel level) error { | |||
| if embeddingLevel != implicitLevel && | |||
| embeddingLevel != 0 && | |||
| embeddingLevel != 1 { | |||
| log.Panicf("illegal paragraph embedding level: %d", embeddingLevel) | |||
| return fmt.Errorf("illegal paragraph embedding level: %d", embeddingLevel) | |||
| } | |||
| return nil | |||
| } | |||
| func validateLineBreaks(linebreaks []int, textLength int) { | |||
| func validateLineBreaks(linebreaks []int, textLength int) error { | |||
| prev := 0 | |||
| for i, next := range linebreaks { | |||
| if next <= prev { | |||
| log.Panicf("bad linebreak: %d at index: %d", next, i) | |||
| return fmt.Errorf("bad linebreak: %d at index: %d", next, i) | |||
| } | |||
| prev = next | |||
| } | |||
| if prev != textLength { | |||
| log.Panicf("last linebreak was %d, want %d", prev, textLength) | |||
| return fmt.Errorf("last linebreak was %d, want %d", prev, textLength) | |||
| } | |||
| return nil | |||
| } | |||
| func validatePbTypes(pairTypes []bracketType) { | |||
| func validatePbTypes(pairTypes []bracketType) error { | |||
| if len(pairTypes) == 0 { | |||
| log.Panic("pairTypes is null") | |||
| return fmt.Errorf("pairTypes is null") | |||
| } | |||
| for i, pt := range pairTypes { | |||
| switch pt { | |||
| case bpNone, bpOpen, bpClose: | |||
| default: | |||
| log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i]) | |||
| return fmt.Errorf("illegal pairType value at %d: %v", i, pairTypes[i]) | |||
| } | |||
| } | |||
| return nil | |||
| } | |||
| func validatePbValues(pairValues []rune, pairTypes []bracketType) { | |||
| func validatePbValues(pairValues []rune, pairTypes []bracketType) error { | |||
| if pairValues == nil { | |||
| log.Panic("pairValues is null") | |||
| return fmt.Errorf("pairValues is null") | |||
| } | |||
| if len(pairTypes) != len(pairValues) { | |||
| log.Panic("pairTypes is different length from pairValues") | |||
| return fmt.Errorf("pairTypes is different length from pairValues") | |||
| } | |||
| return nil | |||
| } | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.10 && !go1.13 | |||
| // +build go1.10,!go1.13 | |||
| package bidi | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.13 && !go1.14 | |||
| // +build go1.13,!go1.14 | |||
| package bidi | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.14 && !go1.16 | |||
| // +build go1.14,!go1.16 | |||
| package bidi | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.16 | |||
| // +build go1.16 | |||
| package bidi | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build !go1.10 | |||
| // +build !go1.10 | |||
| package bidi | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.10 && !go1.13 | |||
| // +build go1.10,!go1.13 | |||
| package norm | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.13 && !go1.14 | |||
| // +build go1.13,!go1.14 | |||
| package norm | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.14 && !go1.16 | |||
| // +build go1.14,!go1.16 | |||
| package norm | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.16 | |||
| // +build go1.16 | |||
| package norm | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build !go1.10 | |||
| // +build !go1.10 | |||
| package norm | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.10 && !go1.13 | |||
| // +build go1.10,!go1.13 | |||
| package width | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.13 && !go1.14 | |||
| // +build go1.13,!go1.14 | |||
| package width | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.14 && !go1.16 | |||
| // +build go1.14,!go1.16 | |||
| package width | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build go1.16 | |||
| // +build go1.16 | |||
| package width | |||
| @@ -1,5 +1,6 @@ | |||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | |||
| //go:build !go1.10 | |||
| // +build !go1.10 | |||
| package width | |||
| @@ -5,6 +5,9 @@ github.com/Azure/go-ansiterm/winterm | |||
| github.com/PuerkitoBio/purell | |||
| # github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 | |||
| github.com/PuerkitoBio/urlesc | |||
| # github.com/aymerick/douceur v0.2.0 | |||
| github.com/aymerick/douceur/css | |||
| github.com/aymerick/douceur/parser | |||
| # github.com/davecgh/go-spew v1.1.1 | |||
| github.com/davecgh/go-spew/spew | |||
| # github.com/docker/distribution v2.7.1+incompatible | |||
| @@ -54,6 +57,8 @@ github.com/google/uuid | |||
| github.com/googleapis/gnostic/compiler | |||
| github.com/googleapis/gnostic/extensions | |||
| github.com/googleapis/gnostic/openapiv2 | |||
| # github.com/gorilla/css v1.0.0 | |||
| github.com/gorilla/css/scanner | |||
| # github.com/gorilla/websocket v1.4.2 | |||
| ## explicit | |||
| github.com/gorilla/websocket | |||
| @@ -84,6 +89,10 @@ github.com/mailru/easyjson/jlexer | |||
| github.com/mailru/easyjson/jwriter | |||
| # github.com/mattn/go-sqlite3 v1.14.5 | |||
| github.com/mattn/go-sqlite3 | |||
| # github.com/microcosm-cc/bluemonday v1.0.18 | |||
| ## explicit | |||
| github.com/microcosm-cc/bluemonday | |||
| github.com/microcosm-cc/bluemonday/css | |||
| # github.com/minio/md5-simd v1.1.0 | |||
| github.com/minio/md5-simd | |||
| # github.com/minio/minio-go/v7 v7.0.10 | |||
| @@ -173,7 +182,7 @@ golang.org/x/crypto/blake2b | |||
| # golang.org/x/mod v0.3.1-0.20200828183125-ce943fd02449 | |||
| golang.org/x/mod/module | |||
| golang.org/x/mod/semver | |||
| # golang.org/x/net v0.0.0-20210224082022-3d97a244fca7 | |||
| # golang.org/x/net v0.0.0-20210614182718-04defd469f4e | |||
| golang.org/x/net/context | |||
| golang.org/x/net/context/ctxhttp | |||
| golang.org/x/net/html | |||
| @@ -196,7 +205,7 @@ golang.org/x/sys/unix | |||
| golang.org/x/sys/windows | |||
| # golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d | |||
| golang.org/x/term | |||
| # golang.org/x/text v0.3.4 | |||
| # golang.org/x/text v0.3.6 | |||
| golang.org/x/text/encoding | |||
| golang.org/x/text/encoding/charmap | |||
| golang.org/x/text/encoding/htmlindex | |||