# Generates an index page for cataloging different versions of the Docs
Param (

Write-Verbose "Name Reccuring paths with variable names"
$DocFxTool = "${RepoRoot}/docfx/docfx.exe"
$DocOutDir = "${RepoRoot}/docfx_project"

Write-Verbose "Initializing Default DocFx Site..."
& "${DocFxTool}" init -q -o "${DocOutDir}"

Write-Verbose "Copying template and configuration..."
New-Item -Path "${DocOutDir}" -Name "templates" -ItemType "directory"
Copy-Item "${DocGenDir}/templates/*" -Destination "${DocOutDir}/templates" -Force -Recurse
Copy-Item "${DocGenDir}/docfx.json" -Destination "${DocOutDir}/" -Force

Write-Verbose "Creating Index using service directory and package names from repo..."
# The service mapper is used to map the directory names to the service names to produce
# a more friendly index. If something isn't in the mapper then the default will just be
# the service name in all caps
$serviceMapHash = Get-Content -Path "${DocGenDir}/service-mapper.json" | ConvertFrom-Json -AsHashtable

# There are some artifact that show up, due to the way we do discovery, that are never shipped.
# Keep a list of those here and anything we don't want to ship can be added to here which will
# cause them to get skipped when generating the DocIndex
$ArtifactsToSkip = (

# The list of services is being constructed from the directory list under the sdk folder
# which, right now, only contains client/data directories. When management is moved to
# the under sdk it'll automatically get picked up.
$ServiceListData = Get-ChildItem "${RepoRoot}/sdk" -Directory
$YmlPath = "${DocOutDir}/api"
New-Item -Path $YmlPath -Name "toc.yml" -Force
foreach ($Dir in $ServiceListData)
$mappedDir = ""
if ($serviceMapHash.ContainsKey($Dir.Name))
$mappedDir = $serviceMapHash[$Dir.Name]
$mappedDir = $Dir.Name.ToUpper()
. "${PSScriptRoot}\..\common\scripts\common.ps1"
$GetGithubIoDocIndexFn = "Get-${Language}-GithubIoDocIndex"

# Given the metadata url under,
# the function will return the csv metadata back as part of response.
function Get-CSVMetadata ([string]$MetadataUri) {
$metadataResponse = Invoke-RestMethod -Uri $MetadataUri -method "GET" -MaximumRetryCount 3 -RetryIntervalSec 10 | ConvertFrom-Csv
return $metadataResponse

# Given the github io blob storage url and language regex,
# the helper function will return a list of artifact names.
function Get-BlobStorage-Artifacts($blobStorageUrl, $blobDirectoryRegex, $blobArtifactsReplacement) {
LogDebug "Reading artifact from storage blob ..."
$returnedArtifacts = @()
$pageToken = ""
Do {
$resp = ""
if (!$pageToken) {
# First page call.
$resp = Invoke-RestMethod -Method Get -Uri $blobStorageUrl
else {
# Next page call
$blobStorageUrlPageToken = $blobStorageUrl + "&marker=$pageToken"
$resp = Invoke-RestMethod -Method Get -Uri $blobStorageUrlPageToken
# Convert to xml documents.
$xmlDoc = [xml](removeBomFromString $resp)
foreach ($elem in $xmlDoc.EnumerationResults.Blobs.BlobPrefix) {
# What service return like "dotnet/Azure.AI.Anomalydetector/", needs to fetch out "Azure.AI.Anomalydetector"
$artifact = $elem.Name -replace $blobDirectoryRegex, $blobArtifactsReplacement
$returnedArtifacts += $artifact
# Fetch page token
$pageToken = $xmlDoc.EnumerationResults.NextMarker
} while ($pageToken)
return $returnedArtifacts

# The sequence of Bom bytes differs by different encoding.
# The helper function here is only to strip the utf-8 encoding system as it is used by blob storage list api.
# Return the original string if not in BOM utf-8 sequence.
function RemoveBomFromString([string]$bomAwareString) {
if ($bomAwareString.length -le 3) {
return $bomAwareString

# Store the list of artifacts into the arrays and write them into the .md file
# after processing the list of subdirectories. This will allow the correct
# division of the artifacts under the Client or Management headings
$clientArr = @()
$mgmtArr = @()

$PkgList = Get-ChildItem $Dir.FullName -Directory -Exclude changelog, faq, .github, build
if (($PkgList | Measure-Object).count -eq 0)
$bomPatternByteArray = [byte[]] (0xef, 0xbb, 0xbf)
# The default encoding for powershell is ISO-8859-1, so converting bytes with the encoding.
$bomAwareBytes = [Text.Encoding]::GetEncoding(28591).GetBytes($bomAwareString.Substring(0, 3))
if (@(Compare-Object $bomPatternByteArray $bomAwareBytes -SyncWindow 0).Length -eq 0) {
return $bomAwareString.Substring(3)
foreach ($Pkg in $PkgList)
# Load the pom file to pull the artifact name and grab the
# parent's relative path to see which parent pom is being
# used to determine whether or not the artifact is client
# or management.
$PomPath = Join-Path -Path $Pkg -ChildPath "pom.xml"

# no pom file = nothing to process
if (Test-Path -path $PomPath)
$xml = New-Object xml

# Get the artifactId from the POM
$artifactId = $xml.project.artifactId

$parent = $xml.project.parent.relativePath

# If this is an artifact that isn't shipping then just
# move on to the next one
if ($ArtifactsToSkip -contains $artifactId)
Write-Output "skipping $artifactId"

# If the parent is null or empty then the pom isn't directly including
# one of the pom.[client|data|management].xml and needs to be specially
# handled
if (("" -eq $parent) -or ($null -eq $parent))
# Cosmos has a root pom which includes pom.client.xml that won't
# be detected by this logic. It's easier to deal with specially
# than it is to try and climb the pom chain here.
if ($Dir.BaseName -eq 'cosmos')
$clientArr += $artifactId
Write-Host "*snowflake* Pom $PomPath, has a null or empty relative path."
if (($parent.IndexOf('azure-client-sdk-parent') -ne -1) -or ($parent.IndexOf('azure-data-sdk-parent') -ne -1))
$clientArr += $artifactId
$mgmtArr += $artifactId
return $bomAwareString

function Get-TocMapping {
Param (
[Parameter(Mandatory = $true)] [Object[]] $metadata,
[Parameter(Mandatory = $true)] [String[]] $artifacts
# Used for sorting the toc display order
$orderServiceMapping = @{}

foreach ($artifact in $artifacts) {
$packageInfo = $metadata | ? {$_.Package -eq $artifact}

if ($packageInfo -and $packageInfo[0].Hide -eq 'true') {
LogDebug "The artifact $artifact set 'Hide' to 'true'."
$serviceName = ""
if (!$packageInfo -or !$packageInfo[0].ServiceName) {
LogWarning "There is no service name for artifact $artifact. Please check csv of Azure/azure-sdk/_data/release/latest repo if this is intended. "
# If no service name retrieved, print out warning message, and put it into Other page.
$serviceName = "Other"
else {
if ($packageInfo.Length -gt 1) {
LogWarning "There are more than 1 packages fetched out for artifact $artifact. Please check csv of Azure/azure-sdk/_data/release/latest repo if this is intended. "
$serviceName = $packageInfo[0].ServiceName.Trim()
$orderServiceMapping[$artifact] = $serviceName
# Only create this if there's something to create
#if (($clientArr.Count -gt 0) -or ($mgmtArr.Count -gt 0))
if ($clientArr.Count -gt 0)
New-Item -Path $YmlPath -Name "$($Dir.Name).md" -Force
Add-Content -Path "$($YmlPath)/toc.yml" -Value "- name: $($mappedDir)`r`n href: $($Dir.Name).md"
# loop through the arrays and add the appropriate artifacts under the appropriate headings
if ($clientArr.Count -gt 0)
Add-Content -Path "$($YmlPath)/$($Dir.Name).md" -Value "# Client Libraries"
foreach($lib in $clientArr)
Write-Host "Write $($lib) to $($Dir.Name).md"
Add-Content -Path "$($YmlPath)/$($Dir.Name).md" -Value "#### $lib"
return $orderServiceMapping

function GenerateDocfxTocContent([Hashtable]$tocContent, [String]$lang) {
LogDebug "Start generating the docfx toc and build docfx site..."
$DocOutDir = "${RepoRoot}/docfx_project"

LogDebug "Initializing Default DocFx Site..."
& $($DocFx) init -q -o "${DocOutDir}"
# The line below is used for testing in local
# docfx init -q -o "${DocOutDir}"
LogDebug "Copying template and configuration..."
New-Item -Path "${DocOutDir}" -Name "templates" -ItemType "directory" -Force
Copy-Item "${DocGenDir}/templates/*" -Destination "${DocOutDir}/templates" -Force -Recurse
Copy-Item "${DocGenDir}/docfx.json" -Destination "${DocOutDir}/" -Force
$YmlPath = "${DocOutDir}/api"
New-Item -Path $YmlPath -Name "toc.yml" -Force
$visitedService = @{}
# Sort and display toc service name by alphabetical order.
foreach ($serviceMapping in $tocContent.getEnumerator() | Sort Value) {
$artifact = $serviceMapping.Key
$serviceName = $serviceMapping.Value
$fileName = ($serviceName -replace '\s', '').ToLower().Trim()
if ($visitedService.ContainsKey($serviceName)) {
Add-Content -Path "$($YmlPath)/${fileName}.md" -Value "#### $artifact"
else {
Add-Content -Path "$($YmlPath)/toc.yml" -Value "- name: ${serviceName}`r`n href: ${fileName}.md"
New-Item -Path $YmlPath -Name "${fileName}.md" -Force
Add-Content -Path "$($YmlPath)/${fileName}.md" -Value "#### $artifact"
$visitedService[$serviceName] = $true
# For the moment there are no management docs and with the way some of the libraries
# in management are versioned is a bit wonky. They aren't versioned by releasing a new
# version with the same groupId/artifactId, they're versioned with the same artifactId
# and version with a different groupId and the groupId happens to include the date. For
# example, the artifact/version of azure-mgmt-storage:1.0.0-beta has several different
# groupIds.,,
# etc.
#if ($mgmtArr.Count -gt 0)
# Add-Content -Path "$($YmlPath)/$($Dir.Name).md" -Value "# Management Libraries"
# foreach($lib in $mgmtArr)
# {
# Write-Output "Write $($lib) to $($Dir.Name).md"
# Add-Content -Path "$($YmlPath)/$($Dir.Name).md" -Value "#### $lib"
# }

Write-Verbose "Creating Site Title and Navigation..."
New-Item -Path "${DocOutDir}" -Name "toc.yml" -Force
Add-Content -Path "${DocOutDir}/toc.yml" -Value "- name: Azure SDK for Java APIs`r`n href: api/`r`n homepage: api/"
# Generate toc homepage.
LogDebug "Creating Site Title and Navigation..."
New-Item -Path "${DocOutDir}" -Name "toc.yml" -Force
Add-Content -Path "${DocOutDir}/toc.yml" -Value "- name: Azure SDK for $lang APIs`r`n href: api/`r`n homepage: api/"

Write-Verbose "Copying root markdowns"
Copy-Item "$($RepoRoot)/" -Destination "${DocOutDir}/api/" -Force
LogDebug "Copying root markdowns"
Copy-Item "$($RepoRoot)/" -Destination "${DocOutDir}/api/" -Force
Copy-Item "$($RepoRoot)/" -Destination "${DocOutDir}/api/" -Force

Write-Verbose "Building site..."
& "${DocFxTool}" build "${DocOutDir}/docfx.json"
LogDebug "Building site..."
& $($DocFx) build "${DocOutDir}/docfx.json"
# The line below is used for testing in local
# docfx build "${DocOutDir}/docfx.json"
Copy-Item "${DocGenDir}/assets/logo.svg" -Destination "${DocOutDir}/_site/" -Force

Copy-Item "${DocGenDir}/assets/logo.svg" -Destination "${DocOutDir}/_site/" -Force
if ((Get-ChildItem -Path Function: | ? { $_.Name -eq $GetGithubIoDocIndexFn }).Count -gt 0)
LogWarning "The function '$GetGithubIoDocIndexFn' was not found."
21 changes: 12 additions & 9 deletions eng/pipelines/docindex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,24 @@ jobs:
versionSpec: '3.6'

- template: /eng/common/pipelines/templates/steps/replace-relative-links.yml
TargetFolder: .
RootFolder: .
BuildSHA: $(Build.SourceVersion)

- pwsh: |
Invoke-WebRequest -Uri "" `
-OutFile "" | Wait-Process; Expand-Archive -Path "" -DestinationPath "./docfx/"
-OutFile "" | Wait-Process; Expand-Archive -Path "" -DestinationPath ./docfx
echo "##vso[task.setvariable variable=docfxPath;isOutput=true]$(Build.SourcesDirectory)/docfx/docfx.exe"
workingDirectory: $(Build.SourcesDirectory)
displayName: Download and Extract DocFX
name: setupDocfxTool
- pwsh: |
$(Build.SourcesDirectory)/eng/docgeneration/Generate-DocIndex.ps1 -RepoRoot $(Build.SourcesDirectory) -DocGenDir "$(Build.SourcesDirectory)/eng/docgeneration" -verbose
- task: PowerShell@2
displayName: 'Generate Doc Index'
pwsh: true
filePath: $(Build.SourcesDirectory)/eng/docgeneration/Generate-DocIndex.ps1
arguments: >
-Docfx $(setupDocfxTool.docfxPath)
-RepoRoot $(Build.SourcesDirectory)
-DocGenDir "$(Build.SourcesDirectory)/eng/docgeneration"
- task: UsePythonVersion@0
displayName: 'Use Python 3.6'
Expand Down
18 changes: 17 additions & 1 deletion eng/scripts/Language-Settings.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ $Language = "java"
$PackageRepository = "Maven"
$packagePattern = "*.pom"
$MetadataUri = ""
$BlobStorageUrl = ""

function Get-java-PackageInfoFromRepo ($pkgPath, $serviceDirectory, $pkgName)
Expand Down Expand Up @@ -152,4 +153,19 @@ function Publish-java-GithubIODocs ($DocLocation, $PublicArtifactLocation)

function Get-java-GithubIoDocIndex() {
# Fetch out all package metadata from csv file.
$metadata = Get-CSVMetadata -MetadataUri $MetadataUri
# Leave the track 2 packages if multiple packages fetched out.
$clientPackages = $metadata | Where-Object { $_.GroupId -eq '' }
$nonClientPackages = $metadata | Where-Object { $_.GroupId -ne '' -and !$clientPackages.Package.Contains($_.Package) }
$uniquePackages = $clientPackages + $nonClientPackages
# Get the artifacts name from blob storage
$artifacts = Get-BlobStorage-Artifacts -blobStorageUrl $BlobStorageUrl -blobDirectoryRegex "^java/(.*)/$" -blobArtifactsReplacement '$1'
# Build up the artifact to service name mapping for GithubIo toc.
$tocContent = Get-TocMapping -metadata $uniquePackages -artifacts $artifacts
# Generate yml/md toc files and build site.
GenerateDocfxTocContent -tocContent $tocContent -lang "Java"

