Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ func runCmd(cmd *cobra.Command, args []string) error {
// prepareTarget prepares the target for configuration changes
// almost all set scripts require the msr kernel module to be loaded and
// use wrmsr and rdmsr, so we do that here so that the goroutines for the
// set scripts can run in parallel without conflicts
// set scripts can run concurrently without conflicts
func prepareTarget(myTarget target.Target, localTempDir string) (err error) {
prepareScript := script.ScriptDefinition{
Name: "prepare-target",
Expand Down
12 changes: 12 additions & 0 deletions cmd/flamegraph/flamegraph_tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ func callStackFrequencyTableValues(outputs map[string]script.ScriptOutput) []tab
}

func javaFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
if outputs[script.CollapsedCallStacksScriptName].Stdout == "" {
slog.Warn("collapsed call stack output is empty")
return ""
}
sections := common.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout)
if len(sections) == 0 {
slog.Warn("no sections in collapsed call stack output")
Expand Down Expand Up @@ -79,6 +83,10 @@ func javaFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
}

func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
if outputs[script.CollapsedCallStacksScriptName].Stdout == "" {
slog.Warn("collapsed call stack output is empty")
return ""
}
sections := common.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout)
if len(sections) == 0 {
slog.Warn("no sections in collapsed call stack output")
Expand All @@ -104,6 +112,10 @@ func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
}

func maxRenderDepthFromOutput(outputs map[string]script.ScriptOutput) string {
if outputs[script.CollapsedCallStacksScriptName].Stdout == "" {
slog.Warn("collapsed call stack output is empty")
return ""
}
sections := common.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout)
if len(sections) == 0 {
slog.Warn("no sections in collapsed call stack output")
Expand Down
6 changes: 3 additions & 3 deletions cmd/metrics/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ func (c *X86MetadataCollector) CollectMetadata(t target.Target, noRoot bool, noS
if err != nil {
return Metadata{}, fmt.Errorf("failed to get number of general purpose counters: %v", err)
}
// the rest of the metadata is retrieved by running scripts in parallel
// the rest of the metadata is retrieved by running scripts concurrently
metadataScripts, err := getMetadataScripts(noRoot, noSystemSummary, metadata.NumGeneralPurposeCounters)
if err != nil {
return Metadata{}, fmt.Errorf("failed to get metadata scripts: %v", err)
Expand Down Expand Up @@ -336,7 +336,7 @@ func (c *ARMMetadataCollector) CollectMetadata(t target.Target, noRoot bool, noS
if err != nil {
return Metadata{}, fmt.Errorf("failed to get number of general purpose counters: %v", err)
}
// the rest of the metadata is retrieved by running scripts in parallel and then parsing the output
// the rest of the metadata is retrieved by running scripts concurrently and then parsing the output
metadataScripts, err := getMetadataScripts(noRoot, noSystemSummary, metadata.NumGeneralPurposeCounters)
if err != nil {
return Metadata{}, fmt.Errorf("failed to get metadata scripts: %v", err)
Expand Down Expand Up @@ -393,7 +393,7 @@ func (c *ARMMetadataCollector) CollectMetadata(t target.Target, noRoot bool, noS
}

func getMetadataScripts(noRoot bool, noSystemSummary bool, numGPCounters int) (metadataScripts []script.ScriptDefinition, err error) {
// reduce startup time by running the metadata scripts in parallel
// reduce startup time by running the metadata scripts concurrently
metadataScriptDefs := []script.ScriptDefinition{
{
Name: "get architecture",
Expand Down
6 changes: 5 additions & 1 deletion cmd/telemetry/telemetry_tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -609,9 +609,13 @@ func instructionTelemetryTableValues(outputs map[string]script.ScriptOutput) []t
// first two lines are not part of the CSV output, they are the start time and interval
var startTime time.Time
var interval int
if len(outputs[script.InstructionTelemetryScriptName].Stdout) == 0 {
slog.Warn("instruction mix output is empty")
return []table.Field{}
}
lines := strings.Split(outputs[script.InstructionTelemetryScriptName].Stdout, "\n")
if len(lines) < 4 {
slog.Warn("no data found in instruction mix output")
slog.Warn("no data lines found in instruction mix output")
return []table.Field{}
}
// TIME
Expand Down
76 changes: 49 additions & 27 deletions internal/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,16 +288,31 @@ func (rc *ReportingCommand) Run() error {
return nil
}

func signalProcessOnTarget(t target.Target, pidStr string, sigStr string) error {
var cmd *exec.Cmd
// prepend "-" to the signal string if not already present
if !strings.HasPrefix(sigStr, "-") {
sigStr = "-" + sigStr
}
if !t.IsSuperUser() && t.CanElevatePrivileges() {
cmd = exec.Command("sudo", "kill", sigStr, pidStr)
} else {
cmd = exec.Command("kill", sigStr, pidStr)
}
_, _, _, err := t.RunCommandEx(cmd, 5, false, true) // #nosec G204
return err
}

// configureSignalHandler sets up a signal handler to catch SIGINT and SIGTERM
//
// When perfspect receives ctrl-c while in the shell, the shell propagates the
// signal to all our children. But when perfspect is run in the background or disowned and
// then receives SIGINT, e.g., from a script, we need to send the signal to our children
//
// Also, when running scripts in parallel using the parallel_master.sh script, we need to
// send the signal to the parallel_master.sh script on each target so that it can clean up
// its child processes. This is because the parallel_master.sh script is run in its own process group
// and does not receive the signal when perfspect receives it.
// When running scripts using the controller.sh script, we need to send the signal to the
// controller.sh script on each target so that it can clean up its child processes. This is
// because the controller.sh script is run in its own process group and does not receive the
// signal when perfspect receives it.
//
// Parameters:
// - myTargets: The list of targets to send the signal to.
Expand All @@ -308,48 +323,38 @@ func configureSignalHandler(myTargets []target.Target, statusFunc progress.Multi
go func() {
sig := <-sigChannel
slog.Debug("received signal", slog.String("signal", sig.String()))
// Scripts that are run in parallel using the parallel_master.sh script and a few other sequential scripts need to be handled specially
// because they are run in their own process group, we need to send the signal directly to the PID of the script.
// For every target, look for the primary_collection_script PID file and send SIGINT to it.
// The controller.sh script is run in its own process group, so we need to send the signal
// directly to the PID of the controller. For every target, look for the primary_collection_script
// PID file and send SIGINT to it.
// The controller script is run in its own process group, so we need to send the signal
// directly to the PID of the controller. For every target, look for the controller
// PID file and send SIGINT to it.
for _, t := range myTargets {
if statusFunc != nil {
_ = statusFunc(t.GetName(), "Signal received, cleaning up...")
}
pidFilePath := filepath.Join(t.GetTempDirectory(), "primary_collection_script.pid")
pidFilePath := filepath.Join(t.GetTempDirectory(), script.ControllerPIDFileName)
stdout, _, exitcode, err := t.RunCommandEx(exec.Command("cat", pidFilePath), 5, false, true) // #nosec G204
if err != nil {
slog.Error("error retrieving target primary_collection_script PID", slog.String("target", t.GetName()), slog.String("error", err.Error()))
slog.Error("error retrieving target controller PID", slog.String("target", t.GetName()), slog.String("error", err.Error()))
}
if exitcode == 0 {
pidStr := strings.TrimSpace(stdout)
_, _, _, err := t.RunCommandEx(exec.Command("sudo", "kill", "-SIGINT", pidStr), 5, false, true) // #nosec G204
err = signalProcessOnTarget(t, pidStr, "SIGINT")
if err != nil {
slog.Error("error sending signal to target primary_collection_script", slog.String("target", t.GetName()), slog.String("error", err.Error()))
slog.Error("error sending SIGINT signal to target controller", slog.String("target", t.GetName()), slog.String("error", err.Error()))
}
}
}
// now wait until all primary collection scripts have exited
slog.Debug("waiting for primary_collection_script scripts to exit")
// now wait until all controller scripts have exited
slog.Debug("waiting for controller scripts to exit")
for _, t := range myTargets {
// create a per-target timeout context
targetTimeout := 10 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), targetTimeout)
timedOut := false
pidFilePath := filepath.Join(t.GetTempDirectory(), "primary_collection_script.pid")
pidFilePath := filepath.Join(t.GetTempDirectory(), script.ControllerPIDFileName)
for {
// check for timeout
select {
case <-ctx.Done():
if statusFunc != nil {
_ = statusFunc(t.GetName(), "cleanup timeout exceeded")
}
slog.Warn("signal handler cleanup timeout exceeded for target", slog.String("target", t.GetName()))
timedOut = true
default:
}
if timedOut {
break
}
// read the pid file
stdout, _, exitcode, err := t.RunCommandEx(exec.Command("cat", pidFilePath), 5, false, true) // #nosec G204
if err != nil || exitcode != 0 {
Expand All @@ -362,6 +367,23 @@ func configureSignalHandler(myTargets []target.Target, statusFunc progress.Multi
if err != nil || exitcode != 0 {
break // process no longer exists, script has exited
}
// check for timeout
select {
case <-ctx.Done():
timedOut = true
default:
}
if timedOut {
if statusFunc != nil {
_ = statusFunc(t.GetName(), "cleanup timeout exceeded, sending kill signal")
}
slog.Warn("signal handler cleanup timeout exceeded for target, sending SIGKILL", slog.String("target", t.GetName()))
err = signalProcessOnTarget(t, pidStr, "SIGKILL")
if err != nil {
slog.Error("error sending SIGKILL signal to target controller", slog.String("target", t.GetName()), slog.String("error", err.Error()))
}
break
}
// sleep for a short time before checking again
time.Sleep(500 * time.Millisecond)
}
Expand Down
28 changes: 13 additions & 15 deletions internal/common/turbostat.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,19 +91,18 @@ func parseTurbostatOutput(output string) ([]map[string]string, error) {
// The "platform" rows are those where Package, Die, Core, and CPU are all "-".
// The first column is the sample time, and the rest are the values for the specified fields.
func TurbostatPlatformRows(turboStatScriptOutput string, fieldNames []string) ([][]string, error) {
if turboStatScriptOutput == "" {
return nil, fmt.Errorf("turbostat output is empty")
}
if len(fieldNames) == 0 {
err := fmt.Errorf("no field names provided")
slog.Error(err.Error())
return nil, err
return nil, fmt.Errorf("no field names provided")
}
rows, err := parseTurbostatOutput(turboStatScriptOutput)
if err != nil {
err := fmt.Errorf("unable to parse turbostat output: %w", err)
return nil, err
return nil, fmt.Errorf("unable to parse turbostat output: %w", err)
}
if len(rows) == 0 {
slog.Warn("no platform rows found in turbostat output")
return nil, nil
return nil, fmt.Errorf("no platform rows found in turbostat output")
}
// filter the rows to the summary rows only
var fieldValues [][]string
Expand Down Expand Up @@ -145,18 +144,18 @@ func isPlatformRow(row map[string]string) bool {
// for each package, for the specified field names.
// The first column is the sample time, and the rest are the values for the specified fields.
func TurbostatPackageRows(turboStatScriptOutput string, fieldNames []string) ([][][]string, error) {
if turboStatScriptOutput == "" {
return nil, fmt.Errorf("turbostat output is empty")
}
if len(fieldNames) == 0 {
err := fmt.Errorf("no field names provided")
return nil, err
return nil, fmt.Errorf("no field names provided")
}
rows, err := parseTurbostatOutput(turboStatScriptOutput)
if err != nil {
err := fmt.Errorf("unable to parse turbostat output: %w", err)
return nil, err
return nil, fmt.Errorf("unable to parse turbostat output: %w", err)
}
if len(rows) == 0 {
slog.Warn("no package rows found in turbostat output")
return nil, nil
return nil, fmt.Errorf("no package rows found in turbostat output")
}
var packageRows [][][]string
for _, row := range rows {
Expand Down Expand Up @@ -195,8 +194,7 @@ func TurbostatPackageRows(turboStatScriptOutput string, fieldNames []string) ([]
}
}
if len(packageRows) == 0 {
err := fmt.Errorf("no data found in turbostat output for fields: %s", fieldNames)
return nil, err
return nil, fmt.Errorf("no data found in turbostat output for fields: %s", fieldNames)
}
return packageRows, nil
}
Expand Down
10 changes: 5 additions & 5 deletions internal/common/turbostat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,23 +98,23 @@ func TestTurbostatPlatformRows(t *testing.T) {
fieldNames: []string{"Avg_MHz", "Busy%"},
wantFirst: nil,
wantLen: 0,
expectErr: false,
expectErr: true,
},
{
name: "No output",
turbostatOutput: "",
fieldNames: []string{"Avg_MHz", "Busy%"},
wantFirst: nil,
wantLen: 0,
expectErr: false,
expectErr: true,
},
{
name: "Only time and interval, no turbostat data",
turbostatOutput: strings.Join(strings.Split(turbostatOutput, "\n")[0:2], "\n"), // Only header and no data
fieldNames: []string{"Avg_MHz", "Busy%"},
wantFirst: nil,
wantLen: 0,
expectErr: false,
expectErr: true,
},
}

Expand Down Expand Up @@ -547,7 +547,7 @@ X 0 0 1000 10
turbostatOutput: "",
fieldNames: []string{"Avg_MHz"},
want: nil,
wantErr: false,
wantErr: true,
},
{
name: "Only headers, no data",
Expand All @@ -558,7 +558,7 @@ Package Core CPU Avg_MHz Busy%
`,
fieldNames: []string{"Avg_MHz"},
want: nil,
wantErr: false,
wantErr: true,
},
}

Expand Down
Loading