Skip to content

Commit

Permalink
Merge pull request #2 from eprints/unidecode
Browse files Browse the repository at this point in the history
#1 Nicer representation of titles with non-ASCII in the path
  • Loading branch information
fatchild authored Nov 30, 2023
2 parents 5864ab0 + 847a735 commit 8af2e18
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions plugins/EPrints/DataObj/Page.pm
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package EPrints::DataObj::Page;
use EPrints;
use EPrints::DataObj;
use EPrints::DataObj::RichDataObj;
use Text::Unidecode;

@ISA = ( 'EPrints::DataObj::RichDataObj' );

Expand Down Expand Up @@ -64,8 +65,17 @@ sub tidy_path
my( $path ) = @_;

my ( $tidy ) = ( $path =~ /(^.{1,100})/ );

# converts non-ASCII characters into their nearest equivalent, e.g. stripping accents
$tidy = unidecode( $tidy );

$tidy =~ s/[^ a-zA-Z0-9-]+//g;
$tidy =~ s/ /-/g;

# unidecode can leave us with some extra dashes - tidy them up
$tidy =~ s/--/-/g;
$tidy =~ s/-$//g;

$tidy = lc( $tidy );

return $tidy;
Expand Down

0 comments on commit 8af2e18

Please sign in to comment.