Skip to content

Commit

Permalink
* Merged all trunk changes
Browse files Browse the repository at this point in the history
 * Subjects editing has been re-worked to use a workflow file
 * document.mime_type added


git-svn-id: https://svn.eprints.org/eprints/branches/3.3/system@7179 9491667e-5006-0410-a446-efbe8990b998
  • Loading branch information
Tim Brody committed Oct 24, 2011
1 parent 4d59ac2 commit 70fef50
Show file tree
Hide file tree
Showing 70 changed files with 2,295 additions and 1,165 deletions.
130 changes: 130 additions & 0 deletions bin/epadmin
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ Where I<command> is one of:
=item reorder
=item redo_mime_type
=item redo_thumbnails
=item refresh_abstracts
Expand Down Expand Up @@ -166,6 +168,12 @@ Tell the webserver that all views pages must be regenerated. The webserver will
Tell the webserver that all abstract summary pages must be regenerated. The webserver will update them next , but won't update them again unless something on the EPrint changes or you re-run refresh abstracts. Also causes config to be reloaded.
=item B<epadmin> redo_mime_type I<repository_id> dataset [ objectid, ... ]
Re-run the file format identification. Dataset may be one of 'document' or
'file'. If 'document' only re-does the identification of the main files in
documents.
=item B<epadmin> redo_thumbnails I<repository_id> [ I<eprintid>, ... ]
Regenerate all the thumbnail and image-preview files and any other things which
Expand Down Expand Up @@ -321,6 +329,7 @@ else
elsif( $action eq "reload_metafields" ) { reload_metafields( $repoid ); }
elsif( $action eq "refresh_abstracts" ) { refresh_abstracts( $repoid ); }
elsif( $action eq "refresh_views" ) { refresh_views( $repoid ); }
elsif( $action eq "redo_mime_type" ) { redo_mime_type( $repoid, @ARGV ); }
elsif( $action eq "redo_thumbnails" ) { redo_thumbnails( $repoid, @ARGV ); }
elsif( $action eq "set_developer_mode" ) { set_developer_mode( $repoid, @ARGV ); }
elsif( $action eq "upgrade" ) { upgrade( $repoid ); }
Expand Down Expand Up @@ -1200,6 +1209,78 @@ sub create_user
}
}

sub redo_mime_type
{
my( $repoid, $datasetid, @ids ) = @_;

my $repo = &repository( $repoid );

die "Missing dataset argument\n" if !defined $datasetid;
die "Dataset must be one of 'document' or 'file'\n"
if $datasetid !~ /^document|file$/;

my $dataset = $repo->dataset( $datasetid );
my $list = @ids ? $dataset->list( \@ids ) : $dataset->search;

my $i = 0;
my $f;
if( $datasetid eq "document" )
{
$f = sub {
(undef, undef, my $doc) = @_;

print STDERR sprintf("%.0f%%\r",
100 * $i++ / $list->count
);

my( $file ) = $doc->stored_file( $doc->value( "main" ) );
return if !defined $file;

my $fh = $file->get_local_copy;
return if !defined $fh;

$repo->run_trigger( EPrints::Const::EP_TRIGGER_MEDIA_INFO,
filename => "$fh",
filepath => "$fh",
epdata => my $media_info = {},
);

foreach my $fieldid (keys %$media_info)
{
next if !$dataset->has_field( $fieldid );
$doc->set_value( $fieldid, $media_info->{$fieldid} );
}
$file->set_value( "mime_type", $media_info->{mime_type} );

$file->commit;
$doc->commit;
};
}
else
{
$f = sub {
(undef, undef, my $file) = @_;

print STDERR sprintf("%.0f%%\r",
100 * $i++ / $list->count
);

my $fh = $file->get_local_copy;
return if !defined $fh;

$repo->run_trigger( EPrints::Const::EP_TRIGGER_MEDIA_INFO,
filename => "$fh",
filepath => "$fh",
epdata => my $media_info = {},
);

$file->set_value( "mime_type", $media_info->{mime_type} );
$file->commit;
};
}

$list->map( $f );
}

sub redo_thumbnails
{
Expand Down Expand Up @@ -2636,6 +2717,18 @@ sub upgrade_mysql_charset_table
return $rc;
}

sub checksum
{
my( $filepath ) = @_;

use Digest::MD5;
my $ctx = Digest::MD5->new;
open(my $fh, "<", $filepath) or return undef;
$ctx->addfile( $fh );

return $ctx->hexdigest;
}

sub upgrade
{
my( $repoid ) = @_;
Expand Down Expand Up @@ -2666,6 +2759,7 @@ sub upgrade
3.2.4
3.3.0
3.3.1
3.3.2
);

for(my $i = 0; $i < $#versions; ++$i)
Expand Down Expand Up @@ -2938,6 +3032,42 @@ sub upgrade_3_3_0_to_3_3_1
}
}

sub upgrade_3_3_1_to_3_3_2
{
my( $repo, $db ) = @_;

# document.mime_type
update_datasets( $repo, $db );
update_counters( $repo, $db );

my $list = $repo->dataset( "file" )->search(
filters => [
{ meta_fields => [qw( datasetid )], value => "document" },
],
);

my $i = 0;
$list->map(sub {
(undef, undef, my $file) = @_;

my $docid = $file->value( "objectid" );

$db->{dbh}->do("UPDATE document SET mime_type=? WHERE docid=?",{},
$file->value( "mime_type" ),
$docid,
);

print STDERR sprintf("Set document.mime_type %.0f%%\r",
100 * $i++ / $list->count
);
});

print <<'EOH';
You may want to update cfg.d/document_upload.pl and namedsets/document then
./bin/epadmin redo_mime_type [REPOID] document
EOH
}

# End of UPGRADE CODE

=head1 COPYRIGHT
Expand Down
92 changes: 75 additions & 17 deletions lib/cfg.d/media_info.pl
Original file line number Diff line number Diff line change
@@ -1,15 +1,55 @@
$c->{guess_doc_type} ||= sub {
my( $repo, $filename, $mimetype ) = @_;

my %valid = map { $_ => 1 } $repo->get_types( "document" );

if( $mimetype )
{
my( $major, $minor ) = split '/', $mimetype, 2;
if( $major =~ /^video|audio|image|text$/ && $valid{$major} )
{
return $major;
}
}

if( $filename =~ /\.(pdf|doc|docx)$/ && $valid{text} )
{
return "text";
}
elsif( $filename =~ /\.(ppt|pptx)$/ && $valid{slideshow} )
{
return "slideshow";
}
elsif( $filename =~ /\.(zip|tgz|gz)$/ && $valid{archive} )
{
return "archive";
}
elsif( $filename =~ /\.([^.]+)$/ )
{
my $suffix = "\L$1";
my $format = $repo->config( "mimemap", $suffix );
return $format if defined $format && $valid{$format};
}

return "other";
};

# GNU file
$c->add_trigger( EP_TRIGGER_MEDIA_INFO, sub {
my( %params ) = @_;

my $epdata = $params{epdata};
my $repo = $params{repository};
my $filename = $params{filename};
my $filepath = $params{filepath};

return 0 if defined $epdata->{format};
return 0 if defined $epdata->{mime_type};
return 0 if !defined $filepath;
return 0 if !defined $repo->config( "executables", "file" );

my $filename = $params{filename};
my $filepath = $params{filepath};
# file thinks OpenXML office types are x-zip
return 0 if $filename =~ /.(docx|pptx|xlsx)$/i;
return 0 if $filename =~ /\.bib$/; # BibTeX

if( open(my $fh, "file -b -i ".quotemeta($filepath)."|") )
{
Expand All @@ -19,8 +59,12 @@
my( $mime_type, $opts ) = split /;\s*/, $output, 2;
$opts = "" if !defined $opts;
return 0 if !defined $mime_type;
return 0 if $mime_type =~ /^ERROR:/;
return 0 if $mime_type eq "application/octet-stream";
$epdata->{format} = $mime_type;
# more file fubar
return 0 if $mime_type =~ /^very short file/;
# doc = "application/msword application/msword" ?!
($epdata->{mime_type}) = split /\s+/, $mime_type;
my( $charset ) = $opts =~ s/charset=(\S+)//;
$epdata->{charset} = $charset if defined $charset;
}
Expand All @@ -33,17 +77,22 @@
return 0;
}, priority => 1000);

# ffmpeg
# ffmpeg media info
$c->add_trigger( EP_TRIGGER_MEDIA_INFO, sub {
my( %params ) = @_;

my $epdata = $params{epdata};
my $repo = $params{repository};
my $filename = $params{filename};
my $filepath = $params{filepath};

return 0 if !defined $filepath;
return 0 if !defined $repo->config( "executables", "ffmpeg" );

my $filename = $params{filename};
my $filepath = $params{filepath};
if( $epdata->{mime_type} && $epdata->{mime_type} !~ /^audio|video/ )
{
return 0;
}

my $ffmpeg_log = File::Temp->new;

Expand Down Expand Up @@ -81,34 +130,43 @@
}

return 0;
}, priority => 1000);
}, priority => 7000);

# other
# by file extension
$c->add_trigger( EP_TRIGGER_MEDIA_INFO, sub {
my( %params ) = @_;

my $epdata = $params{epdata};
my $filename = $params{filename};
my $repo = $params{repository};

return 0 if defined $epdata->{format};
return 0 if defined $epdata->{mime_type};

$epdata->{format} = "other";
if( $filename=~m/\.([^.]+)$/ )
{
my $suffix = "\L$1";
$epdata->{mime_type} = $repo->config( "mimemap", $suffix );
}

return 0;
}, priority => 10000);
}, priority => 5000);

# guess_doc_type
# defaults
$c->add_trigger( EP_TRIGGER_MEDIA_INFO, sub {
my( %params ) = @_;

my $epdata = $params{epdata};
my $filename = $params{filename};
my $repo = $params{repository};

return 0 if defined $epdata->{format};
$epdata->{mime_type} = "application/octet-stream"
if !defined $epdata->{mime_type};

my $format = $repo->call( "guess_doc_type", $repo, $filename );
$epdata->{format} = $format if $format ne "other";
$epdata->{format} = $repo->call( "guess_doc_type",
$repo,
$filename,
$epdata->{mime_type},
) if !defined $epdata->{format};

return 0;
});
}, priority => 10000);
20 changes: 20 additions & 0 deletions lib/cfg.d/mime_types.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Load mimemap from /etc/mime.types

foreach my $mime_types (
$c->{base_path} . "/lib/mime.types",
"/etc/mime.types",
)
{
if( open(my $fh, "<", $mime_types) )
{
while(defined(my $line = <$fh>))
{
next if $line =~ /^\s*#/;
next if $line !~ /\S/;
chomp($line);
my( $mt, @ext ) = split /\s+/, $line;
$c->{mimemap}->{$_} = $mt for @ext;
}
close($fh);
}
}
Loading

0 comments on commit 70fef50

Please sign in to comment.