Merge pull request kubernetes#11038 from lavalamp/doclinks

Link verification
Telmo · Jul 10, 2015 · 8df6c5c · 8df6c5c
2 parents affba42 + 59dca5b
commit 8df6c5c
Show file tree

Hide file tree

Showing 51 changed files with 284 additions and 133 deletions.
diff --git a/cmd/mungedocs/links.go b/cmd/mungedocs/links.go
@@ -0,0 +1,143 @@
+/*
+Copyright 2015 The Kubernetes Authors All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"fmt"
+	"net/url"
+	"os"
+	"path"
+	"regexp"
+	"strings"
+)
+
+var (
+	// Finds markdown links of the form [foo](bar "alt-text").
+	linkRE = regexp.MustCompile(`\[([^]]*)\]\(([^)]*)\)`)
+	// Splits the link target into link target and alt-text.
+	altTextRE = regexp.MustCompile(`(.*)( ".*")`)
+)
+
+// checkLinks assumes fileBytes has links in markdown syntax, and verifies that
+// any relative links actually point to files that exist.
+func checkLinks(filePath string, fileBytes []byte) ([]byte, error) {
+	dir := path.Dir(filePath)
+	errors := []string{}
+
+	output := linkRE.ReplaceAllFunc(fileBytes, func(in []byte) (out []byte) {
+		match := linkRE.FindSubmatch(in)
+		// match[0] is the entire expression; [1] is the visible text and [2] is the link text.
+		visibleText := string(match[1])
+		linkText := string(match[2])
+		altText := ""
+		if parts := altTextRE.FindStringSubmatch(linkText); parts != nil {
+			linkText = parts[1]
+			altText = parts[2]
+		}
+
+		// clean up some random garbage I found in our docs.
+		linkText = strings.Trim(linkText, " ")
+		linkText = strings.Trim(linkText, "\n")
+		linkText = strings.Trim(linkText, " ")
+
+		u, err := url.Parse(linkText)
+		if err != nil {
+			errors = append(
+				errors,
+				fmt.Sprintf("%v, link %q is unparsable: %v", filePath, linkText, err),
+			)
+			return in
+		}
+
+		if u.Host != "" {
+			// We only care about relative links.
+			return in
+		}
+
+		suggestedVisibleText := visibleText
+		if u.Path != "" && !strings.HasPrefix(linkText, "TODO:") {
+			newPath, targetExists := checkPath(filePath, path.Clean(u.Path))
+			if !targetExists {
+				errors = append(
+					errors,
+					fmt.Sprintf("%v, %q: target not found\n", filePath, linkText),
+				)
+			}
+			u.Path = newPath
+			// Make the visible text show the absolute path if it's
+			// not nested in or beneath the current directory.
+			if strings.HasPrefix(u.Path, "..") {
+				suggestedVisibleText = makeRepoRelative(path.Join(dir, u.Path))
+			} else {
+				suggestedVisibleText = u.Path
+			}
+			if unescaped, err := url.QueryUnescape(u.String()); err != nil {
+				// Remove %28 type stuff, be nice to humans.
+				// And don't fight with the toc generator.
+				linkText = unescaped
+			} else {
+				linkText = u.String()
+			}
+		}
+		// If the current visible text is trying to be a file name, use
+		// the correct file name.
+		if (strings.Contains(visibleText, ".md") || strings.Contains(visibleText, "/")) && !strings.ContainsAny(visibleText, ` '"`+"`") {
+			visibleText = suggestedVisibleText
+		}
+
+		return []byte(fmt.Sprintf("[%s](%s)", visibleText, linkText+altText))
+	})
+	err := error(nil)
+	if len(errors) != 0 {
+		err = fmt.Errorf("%s", strings.Join(errors, "\n"))
+	}
+	return output, err
+}
+
+func makeRepoRelative(path string) string {
+	parts := strings.Split(path, "github.com/GoogleCloudPlatform/kubernetes/")
+	if len(parts) > 1 {
+		// Take out anything that is specific to the local filesystem.
+		return parts[1]
+	}
+	return path
+}
+
+func checkPath(filePath, linkPath string) (newPath string, ok bool) {
+	dir := path.Dir(filePath)
+	if strings.HasPrefix(linkPath, "/") {
+		if !strings.HasPrefix(linkPath, "/GoogleCloudPlatform") {
+			// Any absolute paths that aren't relative to github.com are wrong.
+			// Try to fix.
+			linkPath = linkPath[1:]
+		}
+	}
+
+	newPath = linkPath
+	for i := 0; i < 5; i++ {
+		// The file must exist.
+		target := path.Join(dir, newPath)
+		if info, err := os.Stat(target); err == nil {
+			if info.IsDir() {
+				return newPath + "/", true
+			}
+			return newPath, true
+		}
+		newPath = path.Join("..", newPath)
+	}
+	return linkPath, false
+}
diff --git a/cmd/mungedocs/mungedocs.go b/cmd/mungedocs/mungedocs.go
@@ -33,52 +33,62 @@ var (
 	rootDir = flag.String("root-dir", "", "Root directory containing documents to be processed.")
 
 	ErrChangesNeeded = errors.New("mungedocs: changes required")
+
+	// TODO: allow selection from command line. (e.g., just check links in the examples directory.)
+	mungesToMake = munges{
+		munger(updateTOC),
+		munger(checkLinks),
+	}
 )
 
-func visitAndVerify(path string, i os.FileInfo, e error) error {
-	return visitAndChangeOrVerify(path, i, e, false)
-}
+// Munger processes a document, returning an updated document xor an error.
+// Munger is NOT allowed to mutate 'before', if changes are needed it must copy
+// data into a new byte array.
+type munger func(filePath string, before []byte) (after []byte, err error)
+
+type munges []munger
 
-func visitAndChange(path string, i os.FileInfo, e error) error {
-	return visitAndChangeOrVerify(path, i, e, true)
+type fileProcessor struct {
+	// Which munge functions should we call?
+	munges munges
+
+	// Are we allowed to make changes?
+	verifyOnly bool
 }
 
 // Either change a file or verify that it needs no changes (according to modify argument)
-func visitAndChangeOrVerify(path string, i os.FileInfo, e error, modify bool) error {
+func (f fileProcessor) visit(path string, i os.FileInfo, e error) error {
 	if !strings.HasSuffix(path, ".md") {
 		return nil
 	}
-	file, err := os.Open(path)
-	if err != nil {
-		return err
-	}
-	defer file.Close()
 
-	before, err := ioutil.ReadAll(file)
+	fileBytes, err := ioutil.ReadFile(path)
 	if err != nil {
 		return err
 	}
 
-	after, err := updateTOC(before)
-	if err != nil {
-		return err
-	}
-	if modify {
-		// Write out new file with any changes.
-		if !bytes.Equal(after, before) {
-			file.Close()
-			ioutil.WriteFile(path, after, 0644)
+	modificationsMade := false
+	for _, munge := range f.munges {
+		after, err := munge(path, fileBytes)
+		if err != nil {
+			return err
 		}
-	} else {
-		// Just verify that there are no changes.
-		if !bytes.Equal(after, before) {
-			return ErrChangesNeeded
+		if !modificationsMade {
+			if !bytes.Equal(after, fileBytes) {
+				modificationsMade = true
+				if f.verifyOnly {
+					// We're not allowed to make changes.
+					return ErrChangesNeeded
+				}
+			}
 		}
+		fileBytes = after
 	}
 
-	// TODO(erictune): more types of passes, such as:
-	// Linkify terms
-	// Verify links point to files.
+	// Write out new file with any changes.
+	if modificationsMade {
+		ioutil.WriteFile(path, fileBytes, 0644)
+	}
 
 	return nil
 }
@@ -91,19 +101,19 @@ func main() {
 		os.Exit(1)
 	}
 
+	fp := fileProcessor{
+		munges:     mungesToMake,
+		verifyOnly: *verify,
+	}
+
 	// For each markdown file under source docs root, process the doc.
 	// If any error occurs, will exit with failure.
 	// If verify is true, then status is 0 for no changes needed, 1 for changes needed
 	// and >1 for an error during processing.
 	// If verify is false, then status is 0 if changes successfully made or no changes needed,
 	// 1 if changes were needed but require human intervention, and >1 for an unexpected
 	// error during processing.
-	var err error
-	if *verify {
-		err = filepath.Walk(*rootDir, visitAndVerify)
-	} else {
-		err = filepath.Walk(*rootDir, visitAndChange)
-	}
+	err := filepath.Walk(*rootDir, fp.visit)
 	if err != nil {
 		if err == ErrChangesNeeded {
 			if *verify {

diff --git a/cmd/mungedocs/toc.go b/cmd/mungedocs/toc.go
@@ -30,7 +30,7 @@ import (
 // the ToC, thereby updating any previously inserted ToC.
 //
 // TODO(erictune): put this in own package with tests
-func updateTOC(markdown []byte) ([]byte, error) {
+func updateTOC(filePath string, markdown []byte) ([]byte, error) {
 	toc, err := buildTOC(markdown)
 	if err != nil {
 		return nil, err

diff --git a/cmd/mungedocs/toc_test.go b/cmd/mungedocs/toc_test.go
@@ -92,7 +92,7 @@ func Test_updateTOC(t *testing.T) {
 			"# Title\nLorem ipsum \n**table of contents**\n<!-- BEGIN GENERATED TOC -->\n- [Title](#title)\n  - [Section Heading](#section-heading)\n\n<!-- END GENERATED TOC -->\n## Section Heading\ndolor sit amet\n"},
 	}
 	for _, c := range cases {
-		actual, err := updateTOC([]byte(c.in))
+		actual, err := updateTOC("filename.md", []byte(c.in))
 		assert.NoError(t, err)
 		if c.out != string(actual) {
 			t.Errorf("Expected TOC '%v' but got '%v'", c.out, string(actual))

diff --git a/docs/README.md b/docs/README.md
@@ -29,9 +29,9 @@ certainly want the docs that go with that version.</h1>
 * The [API object documentation](http://kubernetes.io/third_party/swagger-ui/)
   is a detailed description of all fields found in core API objects.
 
-* An overview of the [Design of Kubernetes](design)
+* An overview of the [Design of Kubernetes](design/)
 
-* There are example files and walkthroughs in the [examples](../examples)
+* There are example files and walkthroughs in the [examples](../examples/)
   folder.
 
 

diff --git a/docs/accessing-the-cluster.md b/docs/accessing-the-cluster.md
@@ -10,7 +10,7 @@ kubernetes CLI, `kubectl`.
 
 To access a cluster, you need to know the location of the cluster and have credentials
 to access it.  Typically, this is automatically set-up when you work through
-though a [Getting started guide](../docs/getting-started-guide/README.md),
+though a [Getting started guide](../docs/getting-started-guides/README.md),
 or someone else setup the cluster and provided you with credentials and a location.
 
 Check the location and credentials that kubectl knows about with this command:

diff --git a/docs/accessing_the_api.md b/docs/accessing_the_api.md
@@ -24,8 +24,8 @@ By default the Kubernetes APIserver serves HTTP on 2 ports:
     - default is port 6443, change with `--secure-port` flag.
     - default IP is first non-localhost network interface, change with `--bind-address` flag.
     - serves HTTPS.  Set cert with `--tls-cert-file` and key with `--tls-private-key-file` flag.
-    - uses token-file or client-certificate based [authentication](./authentication.md).
-    - uses policy-based [authorization](./authorization.md).
+    - uses token-file or client-certificate based [authentication](authentication.md).
+    - uses policy-based [authorization](authorization.md).
   3. Removed: ReadOnly Port
     - For security reasons, this had to be removed. Use the service account feature instead.
 

diff --git a/docs/admission_controllers.md b/docs/admission_controllers.md
@@ -46,7 +46,7 @@ commands in those containers, we strongly encourage enabling this plug-in.
 
 ### ServiceAccount
 
-This plug-in implements automation for [serviceAccounts]( service_accounts.md).
+This plug-in implements automation for [serviceAccounts](service_accounts.md).
 We strongly recommend using this plug-in if you intend to make use of Kubernetes ```ServiceAccount``` objects.
 
 ### SecurityContextDeny
@@ -59,7 +59,7 @@ This plug-in will observe the incoming request and ensure that it does not viola
 enumerated in the ```ResourceQuota``` object in a ```Namespace```.  If you are using ```ResourceQuota```
 objects in your Kubernetes deployment, you MUST use this plug-in to enforce quota constraints.
 
-See the [resourceQuota design doc]( design/admission_control_resource_quota.md).
+See the [resourceQuota design doc](design/admission_control_resource_quota.md).
 
 It is strongly encouraged that this plug-in is configured last in the sequence of admission control plug-ins.  This is
 so that quota is not prematurely incremented only for the request to be rejected later in admission control.
@@ -70,7 +70,7 @@ This plug-in will observe the incoming request and ensure that it does not viola
 enumerated in the ```LimitRange``` object in a ```Namespace```.  If you are using ```LimitRange``` objects in
 your Kubernetes deployment, you MUST use this plug-in to enforce those constraints.
 
-See the [limitRange design doc]( design/admission_control_limit_range.md).
+See the [limitRange design doc](design/admission_control_limit_range.md).
 
 ### NamespaceExists
 

diff --git a/docs/api-conventions.md b/docs/api-conventions.md
@@ -118,7 +118,7 @@ In order to preserve extensibility, in the future, we intend to explicitly conve
 
 Note that historical information status (e.g., last transition time, failure counts) is only provided at best effort, and is not guaranteed to not be lost.
 
-Status information that may be large (especially unbounded in size, such as lists of references to other objects -- see below) and/or rapidly changing, such as [resource usage](./design/resources.md#usage-data), should be put into separate objects, with possibly a reference from the original object. This helps to ensure that GETs and watch remain reasonably efficient for the majority of clients, which may not need that data.
+Status information that may be large (especially unbounded in size, such as lists of references to other objects -- see below) and/or rapidly changing, such as [resource usage](design/resources.md#usage-data), should be put into separate objects, with possibly a reference from the original object. This helps to ensure that GETs and watch remain reasonably efficient for the majority of clients, which may not need that data.
 
 #### References to related objects
 

diff --git a/docs/authorization.md b/docs/authorization.md
@@ -2,7 +2,7 @@
 
 
 In Kubernetes, authorization happens as a separate step from authentication.
-See the [authentication documentation](./authentication.md) for an 
+See the [authentication documentation](authentication.md) for an 
 overview of authentication.
 
 Authorization applies to all HTTP accesses on the main apiserver port. (The

diff --git a/docs/availability.md b/docs/availability.md
@@ -120,7 +120,7 @@ then you need `R + U` clusters.  If it is not (e.g you want to ensure low latenc
 cluster failure), then you need to have `R * U` clusters (`U` in each of `R` regions).  In any case, try to put each cluster in a different zone.
 
 Finally, if any of your clusters would need more than the maximum recommended number of nodes for a Kubernetes cluster, then
-you may need even more clusters.  Our [roadmap](./roadmap.md)
+you may need even more clusters.  Our [roadmap](roadmap.md)
 calls for maximum 100 node clusters at v1.0 and maximum 1000 node clusters in the middle of 2015.
 
 ## Working with multiple clusters

diff --git a/docs/cluster-admin-guide.md b/docs/cluster-admin-guide.md
@@ -64,13 +64,13 @@ project.](salt.md).
   Describes the environment for Kubelet managed containers on a Kubernetes
   node.
 
-* **Securing access to the API Server** [accessing the api]( accessing_the_api.md)
+* **Securing access to the API Server** [accessing the api](accessing_the_api.md)
 
-* **Authentication**  [authentication]( authentication.md)
+* **Authentication**  [authentication](authentication.md)
 
-* **Authorization** [authorization]( authorization.md)
+* **Authorization** [authorization](authorization.md)
 
-* **Admission Controllers** [admission_controllers]( admission_controllers.md)
+* **Admission Controllers** [admission_controllers](admission_controllers.md)