diff --git a/README.md b/README.md index d508f00c..86c27436 100644 --- a/README.md +++ b/README.md @@ -118,3 +118,10 @@ On macOS or Windows systems, we recommend using a VM or the provided `.devcontai - `test`: Runs tests. - `test-registry`: Stands up a local registry for caching images used in tests. - `docs/env-variables.md`: Updated the [environment variables documentation](./docs/env-variables.md). + +**Submodule Handling Fix** + +An issue concerning git's submodule handling has been resolved through iterative refinements. This fix ensures robust submodule cloning and URL resolution without relying on the calls to the git binary (current fallback). +``` +ENVBUILDER_GIT_CLONE_SUBMODULES=true +``` diff --git a/docs/env-variables.md b/docs/env-variables.md index e6fa7ca5..6c18b7d4 100644 --- a/docs/env-variables.md +++ b/docs/env-variables.md @@ -28,6 +28,7 @@ | `--git-clone-depth` | `ENVBUILDER_GIT_CLONE_DEPTH` | | The depth to use when cloning the Git repository. | | `--git-clone-single-branch` | `ENVBUILDER_GIT_CLONE_SINGLE_BRANCH` | | Clone only a single branch of the Git repository. | | `--git-clone-thinpack` | `ENVBUILDER_GIT_CLONE_THINPACK` | `true` | Git clone with thin pack compatibility enabled, ensuring that even when thin pack compatibility is activated,it will not be turned on for the domain dev.zaure.com. | +| `--git-clone-submodules` | `ENVBUILDER_GIT_CLONE_SUBMODULES` | | Recursively clone Git submodules after cloning the repository. | | `--git-username` | `ENVBUILDER_GIT_USERNAME` | | The username to use for Git authentication. This is optional. | | `--git-password` | `ENVBUILDER_GIT_PASSWORD` | | The password to use for Git authentication. This is optional. | | `--git-ssh-private-key-path` | `ENVBUILDER_GIT_SSH_PRIVATE_KEY_PATH` | | Path to an SSH private key to be used for Git authentication. If this is set, then GIT_SSH_PRIVATE_KEY_BASE64 cannot be set. | diff --git a/git/git.go b/git/git.go index efcffa91..ff4c42ec 100644 --- a/git/git.go +++ b/git/git.go @@ -7,7 +7,9 @@ import ( "fmt" "io" "net" + "net/url" "os" + "path" "strings" "github.com/coder/envbuilder/options" @@ -15,6 +17,7 @@ import ( giturls "github.com/chainguard-dev/git-urls" "github.com/go-git/go-billy/v5" "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/config" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/cache" "github.com/go-git/go-git/v5/plumbing/protocol/packp/capability" @@ -41,6 +44,7 @@ type CloneRepoOptions struct { Depth int CABundle []byte ProxyOptions transport.ProxyOptions + Submodules bool } // CloneRepo will clone the repository at the given URL into the given path. @@ -119,7 +123,7 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt return false, nil } - _, err = git.CloneContext(ctx, gitStorage, fs, &git.CloneOptions{ + repo, err = git.CloneContext(ctx, gitStorage, fs, &git.CloneOptions{ URL: parsed.String(), Auth: opts.RepoAuth, Progress: opts.Progress, @@ -136,6 +140,15 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt if err != nil { return false, fmt.Errorf("clone %q: %w", opts.RepoURL, err) } + + // Initialize submodules if requested + if opts.Submodules { + err = initSubmodules(ctx, logf, repo, opts) + if err != nil { + return true, fmt.Errorf("init submodules: %w", err) + } + } + return true, nil } @@ -361,6 +374,7 @@ func CloneOptionsFromOptions(logf func(string, ...any), options options.Options) ThinPack: options.GitCloneThinPack, Depth: int(options.GitCloneDepth), CABundle: caBundle, + Submodules: options.GitCloneSubmodules, } cloneOpts.RepoAuth = SetupRepoAuth(logf, &options) @@ -418,3 +432,271 @@ func ProgressWriter(write func(line string, args ...any)) io.WriteCloser { done: done, } } + +// resolveSubmoduleURL resolves a potentially relative submodule URL against the parent repository URL +// ResolveSubmoduleURL resolves a potentially relative submodule URL against a parent repository URL. +func ResolveSubmoduleURL(parentURL, submoduleURL string) (string, error) { + // If the submodule URL is absolute (contains ://) or doesn't start with ./ or ../, return it as-is + if strings.Contains(submoduleURL, "://") || (!strings.HasPrefix(submoduleURL, "../") && !strings.HasPrefix(submoduleURL, "./")) { + return submoduleURL, nil + } + + // Parse the parent URL + parentParsed, err := url.Parse(parentURL) + if err != nil { + return "", fmt.Errorf("parse parent URL: %w", err) + } + + // For relative URLs, we need to resolve them against the parent's path + // The parent path represents a repository (like a file in filesystem terms) + // So ../something means "sibling repository" + parentPath := strings.TrimSuffix(parentParsed.Path, "/") + + // Split the submodule URL into components + // and manually walk up the directory tree for each ../ + currentPath := parentPath + relativeParts := strings.Split(submoduleURL, "/") + + for _, part := range relativeParts { + if part == ".." { + // Go up one directory + currentPath = path.Dir(currentPath) + } else if part == "." { + // Stay in current directory + continue + } else if part != "" { + // Add this component to the path + currentPath = currentPath + "/" + part + } + } + + // Clean the final path + resolvedPath := path.Clean(currentPath) + + // Construct the absolute URL + resolvedParsed := &url.URL{ + Scheme: parentParsed.Scheme, + User: parentParsed.User, + Host: parentParsed.Host, + Path: resolvedPath, + } + + return resolvedParsed.String(), nil +} + +// initSubmodules recursively initializes and updates all submodules in the repository. +func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Repository, opts CloneRepoOptions) error { + logf("🔗 Initializing git submodules...") + + w, err := repo.Worktree() + if err != nil { + return fmt.Errorf("get worktree: %w", err) + } + + subs, err := w.Submodules() + if err != nil { + return fmt.Errorf("get submodules: %w", err) + } + + if len(subs) == 0 { + logf("No submodules found") + return nil + } + + logf("Found %d submodule(s)", len(subs)) + + // Get the parent repository URL for resolving relative submodule URLs + cfg, err := repo.Config() + if err != nil { + return fmt.Errorf("get repo config: %w", err) + } + + parentURL := opts.RepoURL + if origin, hasOrigin := cfg.Remotes["origin"]; hasOrigin && len(origin.URLs) > 0 { + parentURL = origin.URLs[0] + } + logf("Parent repository URL: %s", parentURL) + + for _, sub := range subs { + subConfig := sub.Config() + logf("📦 Initializing submodule: %s", subConfig.Name) + logf(" Submodule path: %s", subConfig.Path) + logf(" Submodule URL (from .gitmodules): %s", subConfig.URL) + + // Get the expected commit hash + subStatus, err := sub.Status() + if err != nil { + return fmt.Errorf("get submodule status for %q: %w", subConfig.Name, err) + } + logf(" Expected commit: %s", subStatus.Expected) + + // Resolve the submodule URL + resolvedURL, err := ResolveSubmoduleURL(parentURL, subConfig.URL) + if err != nil { + return fmt.Errorf("resolve submodule URL for %q: %w", subConfig.Name, err) + } + logf(" Resolved URL: %s", resolvedURL) + + // Clone the submodule manually + err = cloneSubmodule(ctx, logf, w, subConfig, subStatus.Expected, resolvedURL, opts) + if err != nil { + return fmt.Errorf("clone submodule %q: %w", subConfig.Name, err) + } + + logf("✓ Submodule initialized: %s", subConfig.Name) + + // Recursively handle nested submodules + subRepo, err := sub.Repository() + if err != nil { + logf(" ⚠ Could not open submodule repository %s: %v", subConfig.Name, err) + continue + } + + // Check for nested submodules + subWorktree, err := subRepo.Worktree() + if err == nil { + nestedSubs, err := subWorktree.Submodules() + if err == nil && len(nestedSubs) > 0 { + logf(" Found %d nested submodule(s) in %s", len(nestedSubs), subConfig.Name) + // Create new opts with the submodule's URL as the parent + nestedOpts := opts + nestedOpts.RepoURL = resolvedURL + err = initSubmodules(ctx, logf, subRepo, nestedOpts) + if err != nil { + return fmt.Errorf("init nested submodules in %q: %w", subConfig.Name, err) + } + } + } + } + + logf("✓ All submodules initialized successfully") + return nil +} + +// cloneSubmodule manually clones a submodule repository +func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktree *git.Worktree, subConfig *config.Submodule, expectedHash plumbing.Hash, resolvedURL string, opts CloneRepoOptions) error { + // Get the submodule directory within the parent worktree + submodulePath := subConfig.Path + + // Create the submodule directory + subFS, err := parentWorktree.Filesystem.Chroot(submodulePath) + if err != nil { + return fmt.Errorf("chroot to submodule path: %w", err) + } + + // Check if already cloned + _, err = subFS.Stat(".git") + if err == nil { + logf(" Submodule already cloned, checking out expected commit...") + // Open the existing repository + subRepo, err := git.Open( + filesystem.NewStorage(subFS, cache.NewObjectLRU(cache.DefaultMaxSize)), + subFS, + ) + if err != nil { + return fmt.Errorf("open existing submodule: %w", err) + } + + subWorktree, err := subRepo.Worktree() + if err != nil { + return fmt.Errorf("get submodule worktree: %w", err) + } + + // Checkout the expected commit + err = subWorktree.Checkout(&git.CheckoutOptions{ + Hash: expectedHash, + }) + if err != nil { + return fmt.Errorf("checkout expected commit: %w", err) + } + return nil + } + + // Clone the submodule + logf(" Cloning submodule from: %s", resolvedURL) + + // Create .git directory for the submodule + err = subFS.MkdirAll(".git", 0o755) + if err != nil { + return fmt.Errorf("create .git directory: %w", err) + } + + subGitDir, err := subFS.Chroot(".git") + if err != nil { + return fmt.Errorf("chroot to .git: %w", err) + } + + gitStorage := filesystem.NewStorage(subGitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)) + + // Clone the submodule repository + // Use SingleBranch=false to fetch all branches so we can find the commit + subRepo, err := git.CloneContext(ctx, gitStorage, subFS, &git.CloneOptions{ + URL: resolvedURL, + Auth: opts.RepoAuth, + Progress: opts.Progress, + InsecureSkipTLS: opts.Insecure, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + SingleBranch: false, // Fetch all branches + NoCheckout: true, // Don't checkout yet, we'll do it manually + }) + if err != nil && !errors.Is(err, git.ErrRepositoryAlreadyExists) { + return fmt.Errorf("clone submodule repository: %w", err) + } + + // Verify the commit exists + logf(" Verifying commit exists: %s", expectedHash) + _, err = subRepo.CommitObject(expectedHash) + if err != nil { + // Commit not found, try fetching with the specific hash + logf(" Commit not found, attempting to fetch it directly...") + err = subRepo.FetchContext(ctx, &git.FetchOptions{ + RemoteName: "origin", + RefSpecs: []config.RefSpec{ + config.RefSpec("+" + expectedHash.String() + ":" + expectedHash.String()), + }, + Auth: opts.RepoAuth, + Progress: opts.Progress, + InsecureSkipTLS: opts.Insecure, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + }) + if err != nil && err != git.NoErrAlreadyUpToDate { + // If that fails, try fetching all refs + logf(" Direct fetch failed, fetching all refs...") + err = subRepo.FetchContext(ctx, &git.FetchOptions{ + RemoteName: "origin", + Auth: opts.RepoAuth, + Progress: opts.Progress, + InsecureSkipTLS: opts.Insecure, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + }) + if err != nil && err != git.NoErrAlreadyUpToDate { + return fmt.Errorf("fetch commit %s: %w", expectedHash, err) + } + } + + // Verify again + _, err = subRepo.CommitObject(expectedHash) + if err != nil { + return fmt.Errorf("commit %s still not found after fetch: %w", expectedHash, err) + } + } + + // Checkout the specific commit expected by the parent repository + logf(" Checking out commit: %s", expectedHash) + subWorktree, err := subRepo.Worktree() + if err != nil { + return fmt.Errorf("get submodule worktree: %w", err) + } + + err = subWorktree.Checkout(&git.CheckoutOptions{ + Hash: expectedHash, + }) + if err != nil { + return fmt.Errorf("checkout expected commit %s: %w", expectedHash, err) + } + + return nil +} diff --git a/git/git_test.go b/git/git_test.go index 0da5a163..7077d43f 100644 --- a/git/git_test.go +++ b/git/git_test.go @@ -492,6 +492,74 @@ func mustRead(t *testing.T, fs billy.Filesystem, path string) string { return string(content) } +func TestResolveSubmoduleURL(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + parentURL string + subURL string + expect string + expectErr string + }{ + { + name: "absolute", + parentURL: "https://example.com/org/main.git", + subURL: "https://github.com/other/repo.git", + expect: "https://github.com/other/repo.git", + }, + { + name: "relativeSibling", + parentURL: "https://example.com/org/main.git", + subURL: "../deps/lib.git", + expect: "https://example.com/org/deps/lib.git", + }, + { + name: "relativeChild", + parentURL: "https://example.com/org/main.git", + subURL: "./extras/tool.git", + expect: "https://example.com/org/main.git/extras/tool.git", + }, + { + name: "badParent", + parentURL: "://bad", + subURL: "./child", + expectErr: "parse parent URL", + }, + } + + for _, tc := range cases { + c := tc + t.Run(c.name, func(t *testing.T) { + t.Parallel() + got, err := git.ResolveSubmoduleURL(c.parentURL, c.subURL) + if c.expectErr != "" { + require.ErrorContains(t, err, c.expectErr) + return + } + require.NoError(t, err) + require.Equal(t, c.expect, got) + }) + } +} + +func TestCloneOptionsFromOptions_Submodules(t *testing.T) { + t.Parallel() + + fs := memfs.New() + opts := options.Options{ + Filesystem: fs, + WorkspaceFolder: "/workspace", + GitURL: "https://example.com/example/repo.git", + GitCloneSubmodules: true, + GitCloneThinPack: true, + } + + cloneOpts, err := git.CloneOptionsFromOptions(t.Logf, opts) + require.NoError(t, err) + require.True(t, cloneOpts.Submodules) +} + // generates a random ed25519 private key func randKeygen(t *testing.T) gossh.Signer { t.Helper() diff --git a/integration/integration_test.go b/integration/integration_test.go index 913ab567..ca8cd9cc 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -416,6 +416,33 @@ func TestSucceedsGitAuth(t *testing.T) { require.Contains(t, gitConfig, srv.URL) } +func TestGitSubmodules(t *testing.T) { + t.Parallel() + + // Create parent repo with a submodule + parentSrv, submoduleSrv := gittest.CreateGitServerWithSubmodule(t, gittest.Options{ + Files: map[string]string{ + "Dockerfile": "FROM " + testImageAlpine, + }, + }, gittest.Options{ + Files: map[string]string{ + "subfile.txt": "submodule content", + }, + }) + + ctr, err := runEnvbuilder(t, runOpts{env: []string{ + envbuilderEnv("GIT_URL", parentSrv.URL), + envbuilderEnv("DOCKERFILE_PATH", "Dockerfile"), + envbuilderEnv("GIT_CLONE_SUBMODULES", "true"), + }}) + require.NoError(t, err) + + // Verify the .gitmodules file exists + gitmodules := execContainer(t, ctr, "cat /workspaces/empty/.gitmodules") + require.Contains(t, gitmodules, "[submodule") + require.Contains(t, gitmodules, submoduleSrv.URL) +} + func TestGitSSHAuth(t *testing.T) { t.Parallel() diff --git a/options/options.go b/options/options.go index 8cdf723a..d52f4f64 100644 --- a/options/options.go +++ b/options/options.go @@ -108,6 +108,9 @@ type Options struct { GitCloneSingleBranch bool // GitCloneThinPack clone with thin pack compabilities. This is optional. GitCloneThinPack bool + // GitCloneSubmodules recursively initializes submodules after cloning. + // This is optional and defaults to false. + GitCloneSubmodules bool // GitUsername is the username to use for Git authentication. This is // optional. GitUsername string @@ -386,6 +389,12 @@ func (o *Options) CLI() serpent.OptionSet { "ensuring that even when thin pack compatibility is activated," + "it will not be turned on for the domain dev.zaure.com.", }, + { + Flag: "git-clone-submodules", + Env: WithEnvPrefix("GIT_CLONE_SUBMODULES"), + Value: serpent.BoolOf(&o.GitCloneSubmodules), + Description: "Recursively clone Git submodules after cloning the repository.", + }, { Flag: "git-username", Env: WithEnvPrefix("GIT_USERNAME"), diff --git a/options/options_test.go b/options/options_test.go index ed5dcd3c..6a848a21 100644 --- a/options/options_test.go +++ b/options/options_test.go @@ -72,6 +72,25 @@ func TestEnvOptionParsing(t *testing.T) { require.False(t, o.GitCloneSingleBranch) require.True(t, o.GitCloneThinPack) }) + + t.Run("remote repo build mode", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("REMOTE_REPO_BUILD_MODE"), "true") + o := runCLI() + require.True(t, o.RemoteRepoBuildMode) + }) + + t.Run("binary path", func(t *testing.T) { + const val = "/usr/local/bin/envbuilder" + t.Setenv(options.WithEnvPrefix("BINARY_PATH"), val) + o := runCLI() + require.Equal(t, o.BinaryPath, val) + }) + + t.Run("git clone submodules", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "true") + o := runCLI() + require.True(t, o.GitCloneSubmodules) + }) }) } diff --git a/options/testdata/options.golden b/options/testdata/options.golden index 92a85232..799bf535 100644 --- a/options/testdata/options.golden +++ b/options/testdata/options.golden @@ -99,6 +99,9 @@ OPTIONS: --git-clone-single-branch bool, $ENVBUILDER_GIT_CLONE_SINGLE_BRANCH Clone only a single branch of the Git repository. + --git-clone-submodules bool, $ENVBUILDER_GIT_CLONE_SUBMODULES + Recursively clone Git submodules after cloning the repository. + --git-clone-thinpack bool, $ENVBUILDER_GIT_CLONE_THINPACK (default: true) Git clone with thin pack compatibility enabled, ensuring that even when thin pack compatibility is activated,it will not be turned on for diff --git a/testutil/gittest/gittest.go b/testutil/gittest/gittest.go index f3d5f1d3..03cac047 100644 --- a/testutil/gittest/gittest.go +++ b/testutil/gittest/gittest.go @@ -269,6 +269,40 @@ func NewRepo(t *testing.T, fs billy.Filesystem, commits ...CommitFunc) *git.Repo return repo } +// CreateGitServerWithSubmodule creates a parent git repo with a submodule pointing to another repo. +// Returns the parent server and the submodule server. +func CreateGitServerWithSubmodule(t *testing.T, opts Options, submoduleOpts Options) (parentSrv *httptest.Server, submoduleSrv *httptest.Server) { + t.Helper() + + // Create the submodule repo first + submoduleSrv = CreateGitServer(t, submoduleOpts) + + // Create the parent repo with .gitmodules pointing to submodule + if opts.AuthMW == nil { + opts.AuthMW = mwtest.BasicAuthMW(opts.Username, opts.Password) + } + + fs := memfs.New() + commits := make([]CommitFunc, 0) + for path, content := range opts.Files { + commits = append(commits, Commit(t, path, content, "my test commit")) + } + // Add gitmodules file pointing to the submodule server + gitmodulesContent := fmt.Sprintf(`[submodule "submod"] + path = submod + url = %s +`, submoduleSrv.URL) + commits = append(commits, Commit(t, ".gitmodules", gitmodulesContent, "add submodule")) + _ = NewRepo(t, fs, commits...) + + if opts.TLS { + parentSrv = httptest.NewTLSServer(opts.AuthMW(NewServer(fs))) + } else { + parentSrv = httptest.NewServer(opts.AuthMW(NewServer(fs))) + } + return parentSrv, submoduleSrv +} + // WriteFile writes a file to the filesystem. func WriteFile(t *testing.T, fs billy.Filesystem, path, content string) { t.Helper()