Skip to content

Commit b2902b9

Browse files
authored
Add repository statistics calculation script
This script calculates various statistics about the repository, including file counts, lines of code, and disk usage, while excluding specified directories and file types.
1 parent 51c39fb commit b2902b9

1 file changed

Lines changed: 201 additions & 0 deletions

File tree

scripts/repo-stats.ps1

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
<#
2+
.SYNOPSIS
3+
Calculates repository statistics: file counts, lines of code, file sizes, and disk usage.
4+
5+
.DESCRIPTION
6+
Scans the repository (excluding common build/dependency directories) and reports:
7+
- Total number of tracked files
8+
- Number of code files vs documentation files
9+
- Lines of actual code (docs excluded)
10+
- Average and max code file size in lines
11+
- Size on disk (total and code-only)
12+
13+
Documentation files (markdown, txt, license files) are counted separately from code.
14+
15+
.PARAMETER Path
16+
Root path of the repository. Defaults to the parent of the script's directory.
17+
18+
.EXAMPLE
19+
./scripts/repo-stats.ps1
20+
#>
21+
[CmdletBinding()]
22+
param(
23+
[string]$Path = (Resolve-Path (Join-Path $PSScriptRoot '..')).Path
24+
)
25+
26+
$ErrorActionPreference = 'Stop'
27+
28+
# Directories to exclude from the scan (build artifacts, deps, VCS internals).
29+
$excludeDirs = @(
30+
'node_modules', 'dist', 'out', 'bin', 'obj', '.git', '.vs', '.vscode-test',
31+
'coverage', '.nyc_output', 'packages', '.next', '.turbo', 'TestResults'
32+
)
33+
34+
# Relative path fragments to exclude (generated/bundled webview output committed to the repo).
35+
$excludePathFragments = @(
36+
'visualstudio-extension/src/CopilotTokenTracker/webview',
37+
'visualstudio-extension\src\CopilotTokenTracker\webview'
38+
)
39+
40+
# File names to exclude (lockfiles etc. that aren't hand-written code).
41+
$excludeFileNames = @('package-lock.json', 'yarn.lock', 'pnpm-lock.yaml')
42+
43+
# File extensions considered "documentation" (excluded from code stats).
44+
$docExtensions = @('.md', '.markdown', '.txt', '.rst', '.adoc')
45+
46+
# Files considered documentation by name (no extension or generic names).
47+
$docFileNames = @('LICENSE', 'NOTICE', 'COPYING', 'AUTHORS', 'CHANGELOG')
48+
49+
# Directories whose contents are always treated as documentation.
50+
$docDirs = @('docs', 'doc', 'documentation')
51+
52+
# File extensions considered "code" for LOC counting.
53+
$codeExtensions = @(
54+
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
55+
'.cs', '.csx', '.vb', '.fs',
56+
'.ps1', '.psm1', '.psd1',
57+
'.py', '.rb', '.go', '.rs', '.java', '.kt',
58+
'.c', '.cpp', '.cc', '.h', '.hpp',
59+
'.html', '.css', '.scss', '.less',
60+
'.json', '.yaml', '.yml', '.xml',
61+
'.sh', '.bash',
62+
'.sql'
63+
)
64+
65+
function Test-ExcludedPath {
66+
param([string]$FullPath, [string]$Root)
67+
$rel = $FullPath.Substring($Root.Length).TrimStart('\','/')
68+
$parts = $rel -split '[\\/]'
69+
foreach ($p in $parts) {
70+
if ($excludeDirs -contains $p) { return $true }
71+
}
72+
$relNorm = $rel -replace '\\','/'
73+
foreach ($frag in $excludePathFragments) {
74+
$fragNorm = $frag -replace '\\','/'
75+
if ($relNorm -like "$fragNorm*") { return $true }
76+
}
77+
return $false
78+
}
79+
80+
# Detects minified / bundled files by sampling avg line length.
81+
# Minified JS/CSS typically has very long lines (few line breaks).
82+
function Test-IsMinified {
83+
param([System.IO.FileInfo]$File)
84+
if ($File.Name -match '\.min\.(js|css|mjs)$') { return $true }
85+
if ($File.Length -lt 10KB) { return $false }
86+
$ext = $File.Extension.ToLowerInvariant()
87+
if ($ext -notin '.js', '.mjs', '.cjs', '.css') { return $false }
88+
try {
89+
$content = Get-Content -LiteralPath $File.FullName -Raw -ErrorAction Stop
90+
$lineCount = ($content -split "`n").Count
91+
if ($lineCount -le 0) { return $false }
92+
$avgLineLen = $content.Length / $lineCount
93+
return ($avgLineLen -gt 500)
94+
} catch {
95+
return $false
96+
}
97+
}
98+
99+
function Get-FileCategory {
100+
param([System.IO.FileInfo]$File, [string]$Root)
101+
$ext = $File.Extension.ToLowerInvariant()
102+
$base = $File.BaseName.ToUpperInvariant()
103+
$nameUpper = $File.Name.ToUpperInvariant()
104+
105+
$rel = $File.FullName.Substring($Root.Length).TrimStart('\','/')
106+
$firstSeg = ($rel -split '[\\/]')[0].ToLowerInvariant()
107+
if ($docDirs -contains $firstSeg) { return 'doc' }
108+
109+
if ($excludeFileNames -contains $File.Name) { return 'other' }
110+
if (Test-IsMinified -File $File) { return 'other' }
111+
112+
if ($docExtensions -contains $ext) { return 'doc' }
113+
if ($docFileNames -contains $base) { return 'doc' }
114+
if ($docFileNames -contains $nameUpper) { return 'doc' }
115+
if ($codeExtensions -contains $ext) { return 'code' }
116+
return 'other'
117+
}
118+
119+
Write-Host "Scanning: $Path" -ForegroundColor Cyan
120+
121+
$allFiles = Get-ChildItem -Path $Path -Recurse -File -Force |
122+
Where-Object { -not (Test-ExcludedPath -FullPath $_.FullName -Root $Path) }
123+
124+
$codeFiles = @()
125+
$docFiles = @()
126+
$otherFiles = @()
127+
128+
foreach ($f in $allFiles) {
129+
switch (Get-FileCategory -File $f -Root $Path) {
130+
'code' { $codeFiles += $f }
131+
'doc' { $docFiles += $f }
132+
default { $otherFiles += $f }
133+
}
134+
}
135+
136+
# Count lines of code per file (non-empty lines).
137+
$codeStats = foreach ($f in $codeFiles) {
138+
$lines = 0
139+
try {
140+
$lines = (Get-Content -LiteralPath $f.FullName -ErrorAction Stop | Measure-Object -Line).Lines
141+
} catch {
142+
$lines = 0
143+
}
144+
[PSCustomObject]@{
145+
Path = $f.FullName
146+
Lines = $lines
147+
Size = $f.Length
148+
}
149+
}
150+
151+
$totalLoc = ($codeStats | Measure-Object -Property Lines -Sum).Sum
152+
$avgLoc = if ($codeStats.Count -gt 0) { [math]::Round($totalLoc / $codeStats.Count, 1) } else { 0 }
153+
$maxFile = $codeStats | Sort-Object Lines -Descending | Select-Object -First 1
154+
$sizeBytes = ($allFiles | Measure-Object -Property Length -Sum).Sum
155+
$codeBytes = ($codeFiles | Measure-Object -Property Length -Sum).Sum
156+
157+
function Format-Size {
158+
param([long]$Bytes)
159+
if ($Bytes -ge 1GB) { return "{0:N2} GB" -f ($Bytes / 1GB) }
160+
if ($Bytes -ge 1MB) { return "{0:N2} MB" -f ($Bytes / 1MB) }
161+
if ($Bytes -ge 1KB) { return "{0:N2} KB" -f ($Bytes / 1KB) }
162+
return "$Bytes B"
163+
}
164+
165+
Write-Host ""
166+
Write-Host "=== Repository Stats ===" -ForegroundColor Green
167+
Write-Host ("Root: {0}" -f $Path)
168+
Write-Host ("Total files: {0}" -f $allFiles.Count)
169+
Write-Host (" Code files: {0}" -f $codeFiles.Count)
170+
Write-Host (" Doc files: {0}" -f $docFiles.Count)
171+
Write-Host (" Other files: {0}" -f $otherFiles.Count)
172+
Write-Host ""
173+
Write-Host ("Lines of code: {0:N0}" -f $totalLoc)
174+
Write-Host ("Avg lines/file: {0}" -f $avgLoc)
175+
if ($maxFile) {
176+
$relMax = $maxFile.Path.Substring($Path.Length).TrimStart('\','/')
177+
Write-Host ("Largest file: {0} ({1:N0} lines)" -f $relMax, $maxFile.Lines)
178+
}
179+
Write-Host ""
180+
Write-Host ("Size on disk: {0}" -f (Format-Size $sizeBytes))
181+
Write-Host ("Code-only size: {0}" -f (Format-Size $codeBytes))
182+
Write-Host ""
183+
Write-Host "Top 5 largest code files by lines:" -ForegroundColor Yellow
184+
$codeStats | Sort-Object Lines -Descending | Select-Object -First 5 | ForEach-Object {
185+
$rel = $_.Path.Substring($Path.Length).TrimStart('\','/')
186+
Write-Host (" {0,6:N0} {1}" -f $_.Lines, $rel)
187+
}
188+
189+
# Return an object for scripting scenarios.
190+
[PSCustomObject]@{
191+
TotalFiles = $allFiles.Count
192+
CodeFiles = $codeFiles.Count
193+
DocFiles = $docFiles.Count
194+
OtherFiles = $otherFiles.Count
195+
LinesOfCode = $totalLoc
196+
AvgLinesPerCodeFile = $avgLoc
197+
MaxFileLines = if ($maxFile) { $maxFile.Lines } else { 0 }
198+
MaxFilePath = if ($maxFile) { $maxFile.Path } else { $null }
199+
SizeOnDiskBytes = $sizeBytes
200+
CodeSizeBytes = $codeBytes
201+
} | Out-Null

0 commit comments

Comments
 (0)