opentelemetry.io/scripts/normalize-cspell-front-matt...

174 lines
5.1 KiB
Perl
Executable File

#!/usr/bin/perl -w
#
# cSpell:ignore textlintrc
use strict;
use warnings;
use FileHandle;
my $lineLenLimit = 79;
my %dictionary = getSiteWideDictWords('.cspell/en-words.txt', '.textlintrc.yml');
while (my $current_file = shift @ARGV) {
my @words;
my $has_front_matter = 0;
my $in_front_matter = 0;
my $last_line_contained_dict_words = 0;
# Read the entire file content into an array
open my $fh, '<', $current_file or die "Cannot open $current_file: $!";
my @file_content = <$fh>;
close $fh;
my $file_length = scalar @file_content;
my $line_number = 0;
my @output_content;
while ($line_number < $file_length) {
$_ = $file_content[$line_number];
$line_number++;
if ($line_number == 1 && /^---$/) {
$has_front_matter = 1;
$in_front_matter = 1;
push @output_content, $_;
next;
}
if ($has_front_matter && !$in_front_matter) {
push @output_content, $_;
next;
}
$in_front_matter = 0 if $has_front_matter && $in_front_matter && /^---$/;
# Process cSpell words
if (/^\s*(spelling: |-\s*)?cSpell:ignore:?\s*(.*)$/
|| (/^(\s+)(\S.*)$/ && $last_line_contained_dict_words)
) {
push @words, split /[,\s]+/, $2;
$last_line_contained_dict_words = 1;
next;
} else {
$last_line_contained_dict_words = 0;
}
if (@words && (!$has_front_matter || !$in_front_matter)) {
@words = grep { !/^\s*(cSpell:ignore|spelling):?\s*$/ && !$dictionary{$_} } @words;
my %duplicates;
# Ensure all words are unique (case-insensitive), drop duplicates
@words = grep { !$duplicates{lc $_}++ } @words;
# Words that were duplicates (usually because they differed in
# capitalization) should be in lowercase since lowercase words match the
# spelling of all capitalized forms of the word.
@words = map { $duplicates{lc $_} > 1 ? lc $_ : $_ } @words;
# Check if each word is present in the remainder of the file
my @filtered_words;
foreach my $word (@words) {
my $found = 0;
for (my $i = 1; $i < $file_length; $i++) {
my $line = $file_content[$i];
if ($line !~ /cSpell/i && $line =~ /\Q$word\E/i) {
$found = 1;
last;
}
}
push @filtered_words, $word if $found;
}
if (@filtered_words) {
my $words = join(' ', sort {lc($a) cmp lc($b)} @filtered_words);
my $line = "cSpell:ignore: $words\n";
# Only add `# prettier-ignore` if line is too long
push @output_content, "# prettier-ignore\n" if length($line) > $lineLenLimit;
push @output_content, $line;
@words = ();
}
}
push @output_content, $_ unless /^# prettier-ignore$/ || /^spelling:\s*[|>-]*$/;
}
# Write the modified content back to the file
open my $out_fh, '>', $current_file or die "Cannot open $current_file: $!";
print $out_fh @output_content;
close $out_fh;
}
sub getSiteWideDictWords {
my $dictionary_file = shift;
my $textlintrc_file = shift;
my %dictionary = readYmlOrPlainListOfWords('', $dictionary_file);
my %textlintDictionary = readYmlOrPlainListOfWords('terms', $textlintrc_file);
# Merge dictionaries
@dictionary{keys %textlintDictionary} = values %textlintDictionary;
return %dictionary;
}
sub readYmlOrPlainListOfWords {
# Read plain list of words if $wordsFieldName is empty
my $wordsFieldName = shift;
my $file_path = shift;
my $fh = FileHandle->new($file_path, "r") or die "Could not open file '$file_path': $!";
my @lines = $fh->getlines();
$fh->close();
my %dictionary;
my $indentation = '';
my $in_terms = $wordsFieldName eq '' ? 1 : 0;
foreach my $line (@lines) {
chomp $line;
next if $line =~ /^\s*#|^\s*$/;
# print "> $line\n" if $wordsFieldName;
if ($wordsFieldName && $line =~ /^(\s*)$wordsFieldName:/) {
$indentation = "$1 - " || '';
$in_terms = 1;
# print "> FOUND $wordsFieldName keyword\n"
} elsif ($line =~ /^$indentation(\w[^\s]*)$/ && $in_terms) {
my $term = $1;
$dictionary{$term} = 1 if $term;
} elsif ($wordsFieldName && $line !~ /^ / && $in_terms) {
$in_terms = 0;
# print "FINISH word list\n" if $in_terms;
} else {
# print "OOPS LINE DID NOT MATCH\n" if $in_terms;
}
}
die "ERROR: no words read from '$file_path'!" unless %dictionary; # sanity check
return %dictionary;
}
sub processTextlintRc {
my $file_path = shift;
my $fh = FileHandle->new($file_path, "r") or die "Could not open file '$file_path': $!";
my @lines = $fh->getlines();
$fh->close();
my %dictionary;
my $indentation = '';
my $in_terms = 0;
foreach my $line (@lines) {
chomp $line;
if ($line =~ /^(\s*)terms:/) {
$indentation = $1 || '';
$in_terms = 1;
# print STDOUT "Found terms!";
} elsif ($line =~ /^$indentation - (\w[^\s]*)$/ && $in_terms) {
my $term = $1;
$dictionary{$term} = 1 if $term;
} elsif ($line !~ /^ / && $in_terms) {
$in_terms = 0;
}
}
die "ERROR: no words read from '$file_path'!" unless %dictionary; # sanity check
return %dictionary;
}