Skip to content

cope with process 1 terminated with signal SIGSEGV #5

@LeviD536

Description

@LeviD536

def init_process(rank, world_size, args):
os.environ['MASTER_ADDR'] = args.master_addr
os.environ['MASTER_PORT'] = args.master_port

if rank == 0:
    rpc.init_rpc(
            f"master", rank=rank, world_size=world_size,
            rpc_backend_options=rpc.TensorPipeRpcBackendOptions(num_worker_threads=args.process_per_gpu*world_size+1, rpc_timeout=30.,
                                                                _transports=["shm"],
                                                                _channels=["basic"]
                                                                )
        )
    main(args)
else:
    gpu = args.gpus[(rank-1)//args.process_per_gpu]
    i = (rank-1) % args.process_per_gpu
    rpc.init_rpc(
            f"{gpu}-{i}", rank=rank, world_size=world_size,
            rpc_backend_options=rpc.TensorPipeRpcBackendOptions(num_worker_threads=args.process_per_gpu*world_size+1, rpc_timeout=30.,
                                                                _transports=["shm"],
                                                                _channels=["basic"]
                                                                )
        )
rpc.shutdown()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions