Skip to content

Commit

Permalink
Merge pull request kubernetes#11038 from lavalamp/doclinks
Browse files Browse the repository at this point in the history
Link verification
  • Loading branch information
rjnagal committed Jul 10, 2015
2 parents affba42 + 59dca5b commit 8df6c5c
Show file tree
Hide file tree
Showing 51 changed files with 284 additions and 133 deletions.
143 changes: 143 additions & 0 deletions cmd/mungedocs/links.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"fmt"
"net/url"
"os"
"path"
"regexp"
"strings"
)

var (
// Finds markdown links of the form [foo](bar "alt-text").
linkRE = regexp.MustCompile(`\[([^]]*)\]\(([^)]*)\)`)
// Splits the link target into link target and alt-text.
altTextRE = regexp.MustCompile(`(.*)( ".*")`)
)

// checkLinks assumes fileBytes has links in markdown syntax, and verifies that
// any relative links actually point to files that exist.
func checkLinks(filePath string, fileBytes []byte) ([]byte, error) {
dir := path.Dir(filePath)
errors := []string{}

output := linkRE.ReplaceAllFunc(fileBytes, func(in []byte) (out []byte) {
match := linkRE.FindSubmatch(in)
// match[0] is the entire expression; [1] is the visible text and [2] is the link text.
visibleText := string(match[1])
linkText := string(match[2])
altText := ""
if parts := altTextRE.FindStringSubmatch(linkText); parts != nil {
linkText = parts[1]
altText = parts[2]
}

// clean up some random garbage I found in our docs.
linkText = strings.Trim(linkText, " ")
linkText = strings.Trim(linkText, "\n")
linkText = strings.Trim(linkText, " ")

u, err := url.Parse(linkText)
if err != nil {
errors = append(
errors,
fmt.Sprintf("%v, link %q is unparsable: %v", filePath, linkText, err),
)
return in
}

if u.Host != "" {
// We only care about relative links.
return in
}

suggestedVisibleText := visibleText
if u.Path != "" && !strings.HasPrefix(linkText, "TODO:") {
newPath, targetExists := checkPath(filePath, path.Clean(u.Path))
if !targetExists {
errors = append(
errors,
fmt.Sprintf("%v, %q: target not found\n", filePath, linkText),
)
}
u.Path = newPath
// Make the visible text show the absolute path if it's
// not nested in or beneath the current directory.
if strings.HasPrefix(u.Path, "..") {
suggestedVisibleText = makeRepoRelative(path.Join(dir, u.Path))
} else {
suggestedVisibleText = u.Path
}
if unescaped, err := url.QueryUnescape(u.String()); err != nil {
// Remove %28 type stuff, be nice to humans.
// And don't fight with the toc generator.
linkText = unescaped
} else {
linkText = u.String()
}
}
// If the current visible text is trying to be a file name, use
// the correct file name.
if (strings.Contains(visibleText, ".md") || strings.Contains(visibleText, "/")) && !strings.ContainsAny(visibleText, ` '"`+"`") {
visibleText = suggestedVisibleText
}

return []byte(fmt.Sprintf("[%s](%s)", visibleText, linkText+altText))
})
err := error(nil)
if len(errors) != 0 {
err = fmt.Errorf("%s", strings.Join(errors, "\n"))
}
return output, err
}

func makeRepoRelative(path string) string {
parts := strings.Split(path, "github.com/GoogleCloudPlatform/kubernetes/")
if len(parts) > 1 {
// Take out anything that is specific to the local filesystem.
return parts[1]
}
return path
}

func checkPath(filePath, linkPath string) (newPath string, ok bool) {
dir := path.Dir(filePath)
if strings.HasPrefix(linkPath, "/") {
if !strings.HasPrefix(linkPath, "/GoogleCloudPlatform") {
// Any absolute paths that aren't relative to github.com are wrong.
// Try to fix.
linkPath = linkPath[1:]
}
}

newPath = linkPath
for i := 0; i < 5; i++ {
// The file must exist.
target := path.Join(dir, newPath)
if info, err := os.Stat(target); err == nil {
if info.IsDir() {
return newPath + "/", true
}
return newPath, true
}
newPath = path.Join("..", newPath)
}
return linkPath, false
}
78 changes: 44 additions & 34 deletions cmd/mungedocs/mungedocs.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,52 +33,62 @@ var (
rootDir = flag.String("root-dir", "", "Root directory containing documents to be processed.")

ErrChangesNeeded = errors.New("mungedocs: changes required")

// TODO: allow selection from command line. (e.g., just check links in the examples directory.)
mungesToMake = munges{
munger(updateTOC),
munger(checkLinks),
}
)

func visitAndVerify(path string, i os.FileInfo, e error) error {
return visitAndChangeOrVerify(path, i, e, false)
}
// Munger processes a document, returning an updated document xor an error.
// Munger is NOT allowed to mutate 'before', if changes are needed it must copy
// data into a new byte array.
type munger func(filePath string, before []byte) (after []byte, err error)

type munges []munger

func visitAndChange(path string, i os.FileInfo, e error) error {
return visitAndChangeOrVerify(path, i, e, true)
type fileProcessor struct {
// Which munge functions should we call?
munges munges

// Are we allowed to make changes?
verifyOnly bool
}

// Either change a file or verify that it needs no changes (according to modify argument)
func visitAndChangeOrVerify(path string, i os.FileInfo, e error, modify bool) error {
func (f fileProcessor) visit(path string, i os.FileInfo, e error) error {
if !strings.HasSuffix(path, ".md") {
return nil
}
file, err := os.Open(path)
if err != nil {
return err
}
defer file.Close()

before, err := ioutil.ReadAll(file)
fileBytes, err := ioutil.ReadFile(path)
if err != nil {
return err
}

after, err := updateTOC(before)
if err != nil {
return err
}
if modify {
// Write out new file with any changes.
if !bytes.Equal(after, before) {
file.Close()
ioutil.WriteFile(path, after, 0644)
modificationsMade := false
for _, munge := range f.munges {
after, err := munge(path, fileBytes)
if err != nil {
return err
}
} else {
// Just verify that there are no changes.
if !bytes.Equal(after, before) {
return ErrChangesNeeded
if !modificationsMade {
if !bytes.Equal(after, fileBytes) {
modificationsMade = true
if f.verifyOnly {
// We're not allowed to make changes.
return ErrChangesNeeded
}
}
}
fileBytes = after
}

// TODO(erictune): more types of passes, such as:
// Linkify terms
// Verify links point to files.
// Write out new file with any changes.
if modificationsMade {
ioutil.WriteFile(path, fileBytes, 0644)
}

return nil
}
Expand All @@ -91,19 +101,19 @@ func main() {
os.Exit(1)
}

fp := fileProcessor{
munges: mungesToMake,
verifyOnly: *verify,
}

// For each markdown file under source docs root, process the doc.
// If any error occurs, will exit with failure.
// If verify is true, then status is 0 for no changes needed, 1 for changes needed
// and >1 for an error during processing.
// If verify is false, then status is 0 if changes successfully made or no changes needed,
// 1 if changes were needed but require human intervention, and >1 for an unexpected
// error during processing.
var err error
if *verify {
err = filepath.Walk(*rootDir, visitAndVerify)
} else {
err = filepath.Walk(*rootDir, visitAndChange)
}
err := filepath.Walk(*rootDir, fp.visit)
if err != nil {
if err == ErrChangesNeeded {
if *verify {
Expand Down
2 changes: 1 addition & 1 deletion cmd/mungedocs/toc.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
// the ToC, thereby updating any previously inserted ToC.
//
// TODO(erictune): put this in own package with tests
func updateTOC(markdown []byte) ([]byte, error) {
func updateTOC(filePath string, markdown []byte) ([]byte, error) {
toc, err := buildTOC(markdown)
if err != nil {
return nil, err
Expand Down
2 changes: 1 addition & 1 deletion cmd/mungedocs/toc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func Test_updateTOC(t *testing.T) {
"# Title\nLorem ipsum \n**table of contents**\n<!-- BEGIN GENERATED TOC -->\n- [Title](#title)\n - [Section Heading](#section-heading)\n\n<!-- END GENERATED TOC -->\n## Section Heading\ndolor sit amet\n"},
}
for _, c := range cases {
actual, err := updateTOC([]byte(c.in))
actual, err := updateTOC("filename.md", []byte(c.in))
assert.NoError(t, err)
if c.out != string(actual) {
t.Errorf("Expected TOC '%v' but got '%v'", c.out, string(actual))
Expand Down
4 changes: 2 additions & 2 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ certainly want the docs that go with that version.</h1>
* The [API object documentation](http://kubernetes.io/third_party/swagger-ui/)
is a detailed description of all fields found in core API objects.

* An overview of the [Design of Kubernetes](design)
* An overview of the [Design of Kubernetes](design/)

* There are example files and walkthroughs in the [examples](../examples)
* There are example files and walkthroughs in the [examples](../examples/)
folder.


Expand Down
2 changes: 1 addition & 1 deletion docs/accessing-the-cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ kubernetes CLI, `kubectl`.

To access a cluster, you need to know the location of the cluster and have credentials
to access it. Typically, this is automatically set-up when you work through
though a [Getting started guide](../docs/getting-started-guide/README.md),
though a [Getting started guide](../docs/getting-started-guides/README.md),
or someone else setup the cluster and provided you with credentials and a location.

Check the location and credentials that kubectl knows about with this command:
Expand Down
4 changes: 2 additions & 2 deletions docs/accessing_the_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ By default the Kubernetes APIserver serves HTTP on 2 ports:
- default is port 6443, change with `--secure-port` flag.
- default IP is first non-localhost network interface, change with `--bind-address` flag.
- serves HTTPS. Set cert with `--tls-cert-file` and key with `--tls-private-key-file` flag.
- uses token-file or client-certificate based [authentication](./authentication.md).
- uses policy-based [authorization](./authorization.md).
- uses token-file or client-certificate based [authentication](authentication.md).
- uses policy-based [authorization](authorization.md).
3. Removed: ReadOnly Port
- For security reasons, this had to be removed. Use the service account feature instead.

Expand Down
6 changes: 3 additions & 3 deletions docs/admission_controllers.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ commands in those containers, we strongly encourage enabling this plug-in.

### ServiceAccount

This plug-in implements automation for [serviceAccounts]( service_accounts.md).
This plug-in implements automation for [serviceAccounts](service_accounts.md).
We strongly recommend using this plug-in if you intend to make use of Kubernetes ```ServiceAccount``` objects.

### SecurityContextDeny
Expand All @@ -59,7 +59,7 @@ This plug-in will observe the incoming request and ensure that it does not viola
enumerated in the ```ResourceQuota``` object in a ```Namespace```. If you are using ```ResourceQuota```
objects in your Kubernetes deployment, you MUST use this plug-in to enforce quota constraints.

See the [resourceQuota design doc]( design/admission_control_resource_quota.md).
See the [resourceQuota design doc](design/admission_control_resource_quota.md).

It is strongly encouraged that this plug-in is configured last in the sequence of admission control plug-ins. This is
so that quota is not prematurely incremented only for the request to be rejected later in admission control.
Expand All @@ -70,7 +70,7 @@ This plug-in will observe the incoming request and ensure that it does not viola
enumerated in the ```LimitRange``` object in a ```Namespace```. If you are using ```LimitRange``` objects in
your Kubernetes deployment, you MUST use this plug-in to enforce those constraints.

See the [limitRange design doc]( design/admission_control_limit_range.md).
See the [limitRange design doc](design/admission_control_limit_range.md).

### NamespaceExists

Expand Down
2 changes: 1 addition & 1 deletion docs/api-conventions.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ In order to preserve extensibility, in the future, we intend to explicitly conve

Note that historical information status (e.g., last transition time, failure counts) is only provided at best effort, and is not guaranteed to not be lost.

Status information that may be large (especially unbounded in size, such as lists of references to other objects -- see below) and/or rapidly changing, such as [resource usage](./design/resources.md#usage-data), should be put into separate objects, with possibly a reference from the original object. This helps to ensure that GETs and watch remain reasonably efficient for the majority of clients, which may not need that data.
Status information that may be large (especially unbounded in size, such as lists of references to other objects -- see below) and/or rapidly changing, such as [resource usage](design/resources.md#usage-data), should be put into separate objects, with possibly a reference from the original object. This helps to ensure that GETs and watch remain reasonably efficient for the majority of clients, which may not need that data.

#### References to related objects

Expand Down
2 changes: 1 addition & 1 deletion docs/authorization.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


In Kubernetes, authorization happens as a separate step from authentication.
See the [authentication documentation](./authentication.md) for an
See the [authentication documentation](authentication.md) for an
overview of authentication.

Authorization applies to all HTTP accesses on the main apiserver port. (The
Expand Down
2 changes: 1 addition & 1 deletion docs/availability.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ then you need `R + U` clusters. If it is not (e.g you want to ensure low latenc
cluster failure), then you need to have `R * U` clusters (`U` in each of `R` regions). In any case, try to put each cluster in a different zone.

Finally, if any of your clusters would need more than the maximum recommended number of nodes for a Kubernetes cluster, then
you may need even more clusters. Our [roadmap](./roadmap.md)
you may need even more clusters. Our [roadmap](roadmap.md)
calls for maximum 100 node clusters at v1.0 and maximum 1000 node clusters in the middle of 2015.

## Working with multiple clusters
Expand Down
8 changes: 4 additions & 4 deletions docs/cluster-admin-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ project.](salt.md).
Describes the environment for Kubelet managed containers on a Kubernetes
node.

* **Securing access to the API Server** [accessing the api]( accessing_the_api.md)
* **Securing access to the API Server** [accessing the api](accessing_the_api.md)

* **Authentication** [authentication]( authentication.md)
* **Authentication** [authentication](authentication.md)

* **Authorization** [authorization]( authorization.md)
* **Authorization** [authorization](authorization.md)

* **Admission Controllers** [admission_controllers]( admission_controllers.md)
* **Admission Controllers** [admission_controllers](admission_controllers.md)



Expand Down
Loading

0 comments on commit 8df6c5c

Please sign in to comment.