Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions eng/pipelines/scripts/Get-Test-Logs.ps1
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
<#
.SYNOPSIS
Captures any test.log files in the build directory and moves them to a staging directory for artifact publishing.
Captures any test.log files, JVM crash logs, surefire dumpstream files, and jstack dumps in the build directory
and moves them to a staging directory for artifact publishing.
.DESCRIPTION
This script is used to capture any test.log files in the build directory and move them to a staging directory for
artifact publishing. It also sets a pipeline variable to indicate whether any test.log files were found.
This script is used to capture diagnostic files from the build directory and move them to a staging directory for
artifact publishing. It also sets a pipeline variable to indicate whether any diagnostic files were found.
Collected files include:
- *test.log (test logs)
- hs_err_pid*.log (JVM crash reports)
- *.dumpstream (Surefire forked JVM crash/corruption reports)
- jstack-dumps.log (periodic jstack thread dumps from the Java process monitor)
.PARAMETER StagingDirectory
The directory where the test.log files will be moved to.
The directory where the diagnostic files will be moved to.
.PARAMETER TestLogsArtifactName
The name of the artifact to be created.
Expand All @@ -22,11 +28,21 @@ param(
)

$testLogs = Get-ChildItem -Path . -Recurse -Filter *test.log -File -Depth 4
$jvmCrashLogs = Get-ChildItem -Path . -Recurse -Filter hs_err_pid*.log -File -Depth 6
$dumpstreamFiles = Get-ChildItem -Path . -Recurse -Filter *.dumpstream -File -Depth 6
$jstackDumps = Get-ChildItem -Path "$StagingDirectory/troubleshooting" -Filter jstack-dumps.log -File -ErrorAction SilentlyContinue

if ($testLogs.Count -gt 0) {
$allFiles = @()
if ($testLogs) { $allFiles += $testLogs }
if ($jvmCrashLogs) { $allFiles += $jvmCrashLogs }
if ($dumpstreamFiles) { $allFiles += $dumpstreamFiles }
if ($jstackDumps) { $allFiles += $jstackDumps }

if ($allFiles.Count -gt 0) {
if (-not (Test-Path "$StagingDirectory/troubleshooting")) {
New-Item -ItemType Directory -Path "$StagingDirectory/troubleshooting" | Out-Null
}
Write-Host "##vso[task.setvariable variable=HAS_TROUBLESHOOTING]true"
Compress-Archive -Path $testLogs -DestinationPath "$StagingDirectory/troubleshooting/$TestLogsArtifactName.zip"
Write-Host "Found $($testLogs.Count) test log(s), $($jvmCrashLogs.Count) JVM crash log(s), $($dumpstreamFiles.Count) dumpstream file(s), $($jstackDumps.Count) jstack dump(s)"
Compress-Archive -Path $allFiles -DestinationPath "$StagingDirectory/troubleshooting/$TestLogsArtifactName.zip"
}
105 changes: 105 additions & 0 deletions eng/pipelines/scripts/Monitor-Java-Processes.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
<#
.SYNOPSIS
Monitors Java processes by taking periodic jstack thread dumps.

.DESCRIPTION
This script runs in the background, periodically capturing thread dumps of all running Java processes.
It uses both 'ps' (to reliably find Java processes on Linux) and 'jstack' (for thread dumps).
It writes the output to a log file in the troubleshooting directory. This is useful for diagnosing CI pipeline
hangs caused by deadlocked or stuck Java processes.

.PARAMETER StagingDirectory
The directory where jstack dump files will be written.

.PARAMETER IntervalSeconds
The interval in seconds between captures. Default is 120 (2 minutes).

.PARAMETER DurationMinutes
The maximum duration in minutes to run the monitor. Default is 55 minutes.
#>

param(
[Parameter(Mandatory = $true)]
[string]$StagingDirectory,

[Parameter(Mandatory = $false)]
[int]$IntervalSeconds = 120,

[Parameter(Mandatory = $false)]
[int]$DurationMinutes = 55
)

$troubleshootingDir = "$StagingDirectory/troubleshooting"
if (-not (Test-Path $troubleshootingDir)) {
New-Item -ItemType Directory -Path $troubleshootingDir | Out-Null
}

$outputFile = "$troubleshootingDir/jstack-dumps.log"
$endTime = (Get-Date).AddMinutes($DurationMinutes)

Add-Content -Path $outputFile -Value "Monitor started at $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')"
Add-Content -Path $outputFile -Value "JAVA_HOME=$($env:JAVA_HOME)"

while ((Get-Date) -lt $endTime) {
Start-Sleep -Seconds $IntervalSeconds

$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
Add-Content -Path $outputFile -Value "`n========== Snapshot at $timestamp =========="

# Use 'ps' to find Java processes (more reliable than jps on CI agents)
try {
if ($IsLinux -or $IsMacOS) {
$psOutput = bash -c "ps aux | grep '[j]ava'" 2>&1
} else {
$psOutput = Get-Process -Name java -ErrorAction SilentlyContinue | Format-Table Id, CPU, WorkingSet64, CommandLine -AutoSize | Out-String
}
Add-Content -Path $outputFile -Value "`n--- Java processes (ps) ---"
if ($psOutput) {
Add-Content -Path $outputFile -Value $psOutput
} else {
Add-Content -Path $outputFile -Value "(no Java processes found)"
}
} catch {
Add-Content -Path $outputFile -Value "Error listing processes: $_"
}

# Also try jps for comparison
$javaHome = $env:JAVA_HOME
$jpsPath = if ($javaHome) { "$javaHome/bin/jps" } else { "jps" }
$jstackPath = if ($javaHome) { "$javaHome/bin/jstack" } else { "jstack" }

try {
$jpsOutput = & $jpsPath -l 2>&1
Add-Content -Path $outputFile -Value "`n--- Java processes (jps -l) ---"
Add-Content -Path $outputFile -Value $jpsOutput
} catch {
Add-Content -Path $outputFile -Value "Error running jps: $_"
}

# Extract PIDs from ps output and take jstack dumps
if ($IsLinux -or $IsMacOS) {
try {
$javaPids = bash -c "ps -eo pid,comm | grep '[j]ava' | awk '{print \$1}'" 2>&1
if ($javaPids) {
foreach ($pid in ($javaPids -split "`n" | Where-Object { $_.Trim() })) {
$pid = $pid.Trim()
Add-Content -Path $outputFile -Value "`n--- jstack for PID $pid ---"
try {
$stackTrace = & $jstackPath $pid 2>&1
Add-Content -Path $outputFile -Value $stackTrace
} catch {
Add-Content -Path $outputFile -Value "Failed to get jstack for PID $pid : $_"
}
}
}
} catch {
Add-Content -Path $outputFile -Value "Error extracting PIDs: $_"
}
}
}

Add-Content -Path $outputFile -Value "`nMonitor finished at $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')"
# Mark that we have troubleshooting artifacts
if (Test-Path $outputFile) {
Write-Host "##vso[task.setvariable variable=HAS_TROUBLESHOOTING]true"
}
2 changes: 2 additions & 0 deletions sdk/parents/azure-client-sdk-parent/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,7 @@
<org.slf4j.simpleLogger.log.com.azure>debug</org.slf4j.simpleLogger.log.com.azure>
</systemPropertyVariables>
<forkCount>1</forkCount>
<forkedProcessTimeoutInSeconds>1800</forkedProcessTimeoutInSeconds>
<testFailureIgnore>false</testFailureIgnore>
<argLine>
${defaultSurefireArgLine}
Expand Down Expand Up @@ -944,6 +945,7 @@
<org.slf4j.simpleLogger.log.com.azure>debug</org.slf4j.simpleLogger.log.com.azure>
</systemPropertyVariables>
<forkCount>1</forkCount>
<forkedProcessTimeoutInSeconds>1800</forkedProcessTimeoutInSeconds>
<testFailureIgnore>false</testFailureIgnore>
<argLine>
${defaultFailsafeArgLine}
Expand Down
11 changes: 11 additions & 0 deletions sdk/spring/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,17 @@ extends:
template: ../../eng/pipelines/templates/stages/archetype-sdk-client.yml
parameters:
ServiceDirectory: spring
PreBuildSteps:
- bash: |
nohup pwsh -File "$(Build.SourcesDirectory)/eng/pipelines/scripts/Monitor-Java-Processes.ps1" \
-StagingDirectory "$(System.DefaultWorkingDirectory)" \
-IntervalSeconds 180 \
-DurationMinutes 55 \
> /dev/null 2>&1 &
echo "Java process monitor started in background (PID: $!)"
displayName: 'Start Java process monitor (background)'
continueOnError: true
condition: always()
Artifacts:
- name: azure-spring-data-cosmos
groupId: com.azure
Expand Down
1 change: 1 addition & 0 deletions sdk/spring/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@
<module>azure-spring-data-cosmos</module>
</modules>
</profile>

<profile>
<id>monitor</id>
<modules>
Expand Down
Loading