Skip to content

Commit da5f8d8

Browse files
authored
Merge pull request #624 from rajbos/rajbos-patch-1
Add repository statistics calculation script
2 parents 51c39fb + b2902b9 commit da5f8d8

1 file changed

Lines changed: 201 additions & 0 deletions

File tree

scripts/repo-stats.ps1

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
<#
2+
.SYNOPSIS
3+
Calculates repository statistics: file counts, lines of code, file sizes, and disk usage.
4+
5+
.DESCRIPTION
6+
Scans the repository (excluding common build/dependency directories) and reports:
7+
- Total number of tracked files
8+
- Number of code files vs documentation files
9+
- Lines of actual code (docs excluded)
10+
- Average and max code file size in lines
11+
- Size on disk (total and code-only)
12+
13+
Documentation files (markdown, txt, license files) are counted separately from code.
14+
15+
.PARAMETER Path
16+
Root path of the repository. Defaults to the parent of the script's directory.
17+
18+
.EXAMPLE
19+
./scripts/repo-stats.ps1
20+
#>
21+
[CmdletBinding()]
22+
param(
23+
[string]$Path = (Resolve-Path (Join-Path $PSScriptRoot '..')).Path
24+
)
25+
26+
$ErrorActionPreference = 'Stop'
27+
28+
# Directories to exclude from the scan (build artifacts, deps, VCS internals).
29+
$excludeDirs = @(
30+
'node_modules', 'dist', 'out', 'bin', 'obj', '.git', '.vs', '.vscode-test',
31+
'coverage', '.nyc_output', 'packages', '.next', '.turbo', 'TestResults'
32+
)
33+
34+
# Relative path fragments to exclude (generated/bundled webview output committed to the repo).
35+
$excludePathFragments = @(
36+
'visualstudio-extension/src/CopilotTokenTracker/webview',
37+
'visualstudio-extension\src\CopilotTokenTracker\webview'
38+
)
39+
40+
# File names to exclude (lockfiles etc. that aren't hand-written code).
41+
$excludeFileNames = @('package-lock.json', 'yarn.lock', 'pnpm-lock.yaml')
42+
43+
# File extensions considered "documentation" (excluded from code stats).
44+
$docExtensions = @('.md', '.markdown', '.txt', '.rst', '.adoc')
45+
46+
# Files considered documentation by name (no extension or generic names).
47+
$docFileNames = @('LICENSE', 'NOTICE', 'COPYING', 'AUTHORS', 'CHANGELOG')
48+
49+
# Directories whose contents are always treated as documentation.
50+
$docDirs = @('docs', 'doc', 'documentation')
51+
52+
# File extensions considered "code" for LOC counting.
53+
$codeExtensions = @(
54+
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
55+
'.cs', '.csx', '.vb', '.fs',
56+
'.ps1', '.psm1', '.psd1',
57+
'.py', '.rb', '.go', '.rs', '.java', '.kt',
58+
'.c', '.cpp', '.cc', '.h', '.hpp',
59+
'.html', '.css', '.scss', '.less',
60+
'.json', '.yaml', '.yml', '.xml',
61+
'.sh', '.bash',
62+
'.sql'
63+
)
64+
65+
function Test-ExcludedPath {
66+
param([string]$FullPath, [string]$Root)
67+
$rel = $FullPath.Substring($Root.Length).TrimStart('\','/')
68+
$parts = $rel -split '[\\/]'
69+
foreach ($p in $parts) {
70+
if ($excludeDirs -contains $p) { return $true }
71+
}
72+
$relNorm = $rel -replace '\\','/'
73+
foreach ($frag in $excludePathFragments) {
74+
$fragNorm = $frag -replace '\\','/'
75+
if ($relNorm -like "$fragNorm*") { return $true }
76+
}
77+
return $false
78+
}
79+
80+
# Detects minified / bundled files by sampling avg line length.
81+
# Minified JS/CSS typically has very long lines (few line breaks).
82+
function Test-IsMinified {
83+
param([System.IO.FileInfo]$File)
84+
if ($File.Name -match '\.min\.(js|css|mjs)$') { return $true }
85+
if ($File.Length -lt 10KB) { return $false }
86+
$ext = $File.Extension.ToLowerInvariant()
87+
if ($ext -notin '.js', '.mjs', '.cjs', '.css') { return $false }
88+
try {
89+
$content = Get-Content -LiteralPath $File.FullName -Raw -ErrorAction Stop
90+
$lineCount = ($content -split "`n").Count
91+
if ($lineCount -le 0) { return $false }
92+
$avgLineLen = $content.Length / $lineCount
93+
return ($avgLineLen -gt 500)
94+
} catch {
95+
return $false
96+
}
97+
}
98+
99+
function Get-FileCategory {
100+
param([System.IO.FileInfo]$File, [string]$Root)
101+
$ext = $File.Extension.ToLowerInvariant()
102+
$base = $File.BaseName.ToUpperInvariant()
103+
$nameUpper = $File.Name.ToUpperInvariant()
104+
105+
$rel = $File.FullName.Substring($Root.Length).TrimStart('\','/')
106+
$firstSeg = ($rel -split '[\\/]')[0].ToLowerInvariant()
107+
if ($docDirs -contains $firstSeg) { return 'doc' }
108+
109+
if ($excludeFileNames -contains $File.Name) { return 'other' }
110+
if (Test-IsMinified -File $File) { return 'other' }
111+
112+
if ($docExtensions -contains $ext) { return 'doc' }
113+
if ($docFileNames -contains $base) { return 'doc' }
114+
if ($docFileNames -contains $nameUpper) { return 'doc' }
115+
if ($codeExtensions -contains $ext) { return 'code' }
116+
return 'other'
117+
}
118+
119+
Write-Host "Scanning: $Path" -ForegroundColor Cyan
120+
121+
$allFiles = Get-ChildItem -Path $Path -Recurse -File -Force |
122+
Where-Object { -not (Test-ExcludedPath -FullPath $_.FullName -Root $Path) }
123+
124+
$codeFiles = @()
125+
$docFiles = @()
126+
$otherFiles = @()
127+
128+
foreach ($f in $allFiles) {
129+
switch (Get-FileCategory -File $f -Root $Path) {
130+
'code' { $codeFiles += $f }
131+
'doc' { $docFiles += $f }
132+
default { $otherFiles += $f }
133+
}
134+
}
135+
136+
# Count lines of code per file (non-empty lines).
137+
$codeStats = foreach ($f in $codeFiles) {
138+
$lines = 0
139+
try {
140+
$lines = (Get-Content -LiteralPath $f.FullName -ErrorAction Stop | Measure-Object -Line).Lines
141+
} catch {
142+
$lines = 0
143+
}
144+
[PSCustomObject]@{
145+
Path = $f.FullName
146+
Lines = $lines
147+
Size = $f.Length
148+
}
149+
}
150+
151+
$totalLoc = ($codeStats | Measure-Object -Property Lines -Sum).Sum
152+
$avgLoc = if ($codeStats.Count -gt 0) { [math]::Round($totalLoc / $codeStats.Count, 1) } else { 0 }
153+
$maxFile = $codeStats | Sort-Object Lines -Descending | Select-Object -First 1
154+
$sizeBytes = ($allFiles | Measure-Object -Property Length -Sum).Sum
155+
$codeBytes = ($codeFiles | Measure-Object -Property Length -Sum).Sum
156+
157+
function Format-Size {
158+
param([long]$Bytes)
159+
if ($Bytes -ge 1GB) { return "{0:N2} GB" -f ($Bytes / 1GB) }
160+
if ($Bytes -ge 1MB) { return "{0:N2} MB" -f ($Bytes / 1MB) }
161+
if ($Bytes -ge 1KB) { return "{0:N2} KB" -f ($Bytes / 1KB) }
162+
return "$Bytes B"
163+
}
164+
165+
Write-Host ""
166+
Write-Host "=== Repository Stats ===" -ForegroundColor Green
167+
Write-Host ("Root: {0}" -f $Path)
168+
Write-Host ("Total files: {0}" -f $allFiles.Count)
169+
Write-Host (" Code files: {0}" -f $codeFiles.Count)
170+
Write-Host (" Doc files: {0}" -f $docFiles.Count)
171+
Write-Host (" Other files: {0}" -f $otherFiles.Count)
172+
Write-Host ""
173+
Write-Host ("Lines of code: {0:N0}" -f $totalLoc)
174+
Write-Host ("Avg lines/file: {0}" -f $avgLoc)
175+
if ($maxFile) {
176+
$relMax = $maxFile.Path.Substring($Path.Length).TrimStart('\','/')
177+
Write-Host ("Largest file: {0} ({1:N0} lines)" -f $relMax, $maxFile.Lines)
178+
}
179+
Write-Host ""
180+
Write-Host ("Size on disk: {0}" -f (Format-Size $sizeBytes))
181+
Write-Host ("Code-only size: {0}" -f (Format-Size $codeBytes))
182+
Write-Host ""
183+
Write-Host "Top 5 largest code files by lines:" -ForegroundColor Yellow
184+
$codeStats | Sort-Object Lines -Descending | Select-Object -First 5 | ForEach-Object {
185+
$rel = $_.Path.Substring($Path.Length).TrimStart('\','/')
186+
Write-Host (" {0,6:N0} {1}" -f $_.Lines, $rel)
187+
}
188+
189+
# Return an object for scripting scenarios.
190+
[PSCustomObject]@{
191+
TotalFiles = $allFiles.Count
192+
CodeFiles = $codeFiles.Count
193+
DocFiles = $docFiles.Count
194+
OtherFiles = $otherFiles.Count
195+
LinesOfCode = $totalLoc
196+
AvgLinesPerCodeFile = $avgLoc
197+
MaxFileLines = if ($maxFile) { $maxFile.Lines } else { 0 }
198+
MaxFilePath = if ($maxFile) { $maxFile.Path } else { $null }
199+
SizeOnDiskBytes = $sizeBytes
200+
CodeSizeBytes = $codeBytes
201+
} | Out-Null

0 commit comments

Comments
 (0)