Notez que Type de contenu d'Apache traite le SVG comme application/svg+xml
plutôt que le Défini par l'IANA image/svg+xml
ce qui semble être une catégorisation incorrecte.
Bien que cette réponse ne réponde pas directement à la question, elle offre une alternative à l'utilisation du client HTTP d'Apache en utilisant le client HTTP de Java. De plus, le code d'exemple :
- fait un
HEAD
plutôt qu'une demande de GET
qui est une opération plus légère ;
- introduit un délai d'attente de 5 secondes sur les requêtes HTTP, qui peut être plus long, selon votre scénario ;
- tente d'ajouter un typage fort au type de média en analysant les types de contenu dans un fichier de type
MediaType
enum au lieu d'une chaîne de caractères
- évite les frais généraux liés à l'utilisation d'une expression régulière pour analyser une simple chaîne de caractères ; et
- définit de nombreux autres types de médias, en particulier les images, qui ne sont pas définis par la norme Apache
ContentType
.
Sans plus attendre, voici quelques fichiers sources Java qui peuvent s'avérer utiles.
MediaType
L'énumération de base encode les types de médias IANA. Si vous ajoutez d'autres encodages officiels, veuillez mettre à jour cette réponse afin que tous puissent en bénéficier. Notez que R Markdown, R XML et YAML ne sont pas définis officiellement, vous pouvez donc les supprimer.
import static org.apache.commons.io.FilenameUtils.getExtension;
public enum MediaType {
APP_JAVA_OBJECT(
APPLICATION, "x-java-serialized-object"
),
FONT_OTF( "otf" ),
FONT_TTF( "ttf" ),
IMAGE_APNG( "apng" ),
IMAGE_ACES( "aces" ),
IMAGE_AVCI( "avci" ),
IMAGE_AVCS( "avcs" ),
IMAGE_BMP( "bmp" ),
IMAGE_CGM( "cgm" ),
IMAGE_DICOM_RLE( "dicom_rle" ),
IMAGE_EMF( "emf" ),
IMAGE_EXAMPLE( "example" ),
IMAGE_FITS( "fits" ),
IMAGE_G3FAX( "g3fax" ),
IMAGE_GIF( "gif" ),
IMAGE_HEIC( "heic" ),
IMAGE_HEIF( "heif" ),
IMAGE_HEJ2K( "hej2k" ),
IMAGE_HSJ2( "hsj2" ),
IMAGE_X_ICON( "x-icon" ),
IMAGE_JLS( "jls" ),
IMAGE_JP2( "jp2" ),
IMAGE_JPEG( "jpeg" ),
IMAGE_JPH( "jph" ),
IMAGE_JPHC( "jphc" ),
IMAGE_JPM( "jpm" ),
IMAGE_JPX( "jpx" ),
IMAGE_JXR( "jxr" ),
IMAGE_JXRA( "jxrA" ),
IMAGE_JXRS( "jxrS" ),
IMAGE_JXS( "jxs" ),
IMAGE_JXSC( "jxsc" ),
IMAGE_JXSI( "jxsi" ),
IMAGE_JXSS( "jxss" ),
IMAGE_KTX( "ktx" ),
IMAGE_KTX2( "ktx2" ),
IMAGE_NAPLPS( "naplps" ),
IMAGE_PNG( "png" ),
IMAGE_SVG_XML( "svg+xml" ),
IMAGE_T38( "t38" ),
IMAGE_TIFF( "tiff" ),
IMAGE_WEBP( "webp" ),
IMAGE_WMF( "wmf" ),
TEXT_HTML( TEXT, "html" ),
TEXT_MARKDOWN( TEXT, "markdown" ),
TEXT_PLAIN( TEXT, "plain" ),
TEXT_R_MARKDOWN( TEXT, "R+markdown" ),
TEXT_R_XML( TEXT, "R+xml" ),
TEXT_YAML( TEXT, "yaml" ),
UNDEFINED( TypeName.UNDEFINED, "undefined" );
/**
* The IANA-defined types.
*/
public enum TypeName {
APPLICATION,
IMAGE,
TEXT,
UNDEFINED
}
/**
* The fully qualified IANA-defined media type.
*/
private final String mMediaType;
/**
* The IANA-defined type name.
*/
private final TypeName mTypeName;
/**
* The IANA-defined subtype name.
*/
private final String mSubtype;
/**
* Constructs an instance using the default type name of "image".
*
* @param subtype The image subtype name.
*/
MediaType( final String subtype ) {
this( IMAGE, subtype );
}
/**
* Constructs an instance using an IANA-defined type and subtype pair.
*
* @param typeName The media type's type name.
* @param subtype The media type's subtype name.
*/
MediaType( final TypeName typeName, final String subtype ) {
mTypeName = typeName;
mSubtype = subtype;
mMediaType = typeName.toString().toLowerCase() + '/' + subtype;
}
/**
* Returns the {@link MediaType} associated with the given file.
*
* @param file Has a file name that may contain an extension associated with
* a known {@link MediaType}.
* @return {@link MediaType#UNDEFINED} if the extension has not been
* assigned, otherwise the {@link MediaType} associated with this
* {@link File}'s file name extension.
*/
public static MediaType valueFrom( final File file ) {
return valueFrom( file.getName() );
}
/**
* Returns the {@link MediaType} associated with the given file name.
*
* @param filename The file name that may contain an extension associated
* with a known {@link MediaType}.
* @return {@link MediaType#UNDEFINED} if the extension has not been
* assigned, otherwise the {@link MediaType} associated with this
* URL's file name extension.
*/
public static MediaType valueFrom( final String filename ) {
return getMediaType( getExtension( filename ) );
}
/**
* Returns the {@link MediaType} for the given type and subtype names.
*
* @param type The IANA-defined type name.
* @param subtype The IANA-defined subtype name.
* @return {@link MediaType#UNDEFINED} if there is no {@link MediaType} that
* matches the given type and subtype names.
*/
public static MediaType valueFrom(
final String type, final String subtype ) {
for( final var mediaType : MediaType.values() ) {
if( mediaType.equals( type, subtype ) ) {
return mediaType;
}
}
return UNDEFINED;
}
/**
* Answers whether the given type and subtype names equal this enumerated
* value. This performs a case-insensitive comparison.
*
* @param type The type name to compare against this {@link MediaType}.
* @param subtype The subtype name to compare against this {@link MediaType}.
* @return {@code true} when the type and subtype name match.
*/
public boolean equals( final String type, final String subtype ) {
return mTypeName.name().equalsIgnoreCase( type ) &&
mSubtype.equalsIgnoreCase( subtype );
}
/**
* Answers whether the given {@link TypeName} matches this type name.
*
* @param typeName The {@link TypeName} to compare against the internal value.
* @return {@code true} if the given value is the same IANA-defined type name.
*/
public boolean isType( final TypeName typeName ) {
return mTypeName == typeName;
}
/**
* Returns the IANA-defined type and sub-type.
*
* @return The unique media type identifier.
*/
public String toString() {
return mMediaType;
}
/**
* Used by {@link MediaTypeExtensions} to initialize associations where the
* subtype name and the file name extension have a 1:1 mapping.
*
* @return The IANA subtype value.
*/
String getSubtype() {
return mSubtype;
}
}
MediaTypeExtensions
Différentes extensions de nom de fichier correspondent à différents types de médias. La correspondance entre les extensions et les MediaType
ne signifie pas nécessairement que le contenu correspond au type de média attendu. Les applications doivent prendre soin de lire les en-têtes des fichiers pour déterminer le type de média réel.
enum MediaTypeExtensions {
MEDIA_FONT_OTF( FONT_OTF ),
MEDIA_FONT_TTF( FONT_TTF ),
MEDIA_IMAGE_APNG( IMAGE_APNG ),
MEDIA_IMAGE_BMP( IMAGE_BMP ),
MEDIA_IMAGE_GIF( IMAGE_GIF ),
MEDIA_IMAGE_ICO( IMAGE_X_ICON, of( "ico", "cur" ) ),
MEDIA_IMAGE_JPEG( IMAGE_JPEG, of( "jpg", "jpeg", "jfif", "pjpeg", "pjp" ) ),
MEDIA_IMAGE_PNG( IMAGE_PNG ),
MEDIA_IMAGE_SVG( IMAGE_SVG_XML, of( "svg" ) ),
MEDIA_IMAGE_TIFF( IMAGE_TIFF, of( "tif", "tiff" ) ),
MEDIA_IMAGE_WEBP( IMAGE_WEBP ),
MEDIA_TEXT_MARKDOWN( TEXT_MARKDOWN, of(
"md", "markdown", "mdown", "mdtxt", "mdtext", "mdwn", "mkd", "mkdown",
"mkdn" ) ),
MEDIA_TEXT_PLAIN( TEXT_PLAIN, of( "asc", "ascii", "txt", "text", "utxt" ) ),
MEDIA_TEXT_R_MARKDOWN( TEXT_R_MARKDOWN, of( "Rmd" ) ),
MEDIA_TEXT_R_XML( TEXT_R_XML, of( "Rxml" ) ),
MEDIA_TEXT_YAML( TEXT_YAML, of( "yaml", "yml" ) );
private final MediaType mMediaType;
private final Set<String> mExtensions;
MediaTypeExtensions( final MediaType mediaType ) {
this( mediaType, of( mediaType.getSubtype() ) );
}
MediaTypeExtensions(
final MediaType mediaType, final Set<String> extensions ) {
assert mediaType != null;
assert extensions != null;
assert !extensions.isEmpty();
mMediaType = mediaType;
mExtensions = extensions;
}
static MediaType getMediaType( final String extension ) {
final var sanitized = sanitize( extension );
for( final var mediaType : MediaTypeExtensions.values() ) {
if( mediaType.isType( sanitized ) ) {
return mediaType.getMediaType();
}
}
return UNDEFINED;
}
private boolean isType( final String sanitized ) {
for( final var extension : mExtensions ) {
if( extension.equalsIgnoreCase( sanitized ) ) {
return true;
}
}
return false;
}
private static String sanitize( final String extension ) {
return extension == null ? "" : extension.toLowerCase();
}
private MediaType getMediaType() {
return mMediaType;
}
}
HttpMediaType
Enfin, nous pouvons écrire un minuscule analyseur syntaxique qui convertit l'en-tête content-type en un fichier MediaType
valeur. Notez que le HttpClient
effectue elle-même une comparaison sensible à la casse par rapport au nom de l'en-tête, de sorte que nous ne pouvons pas utiliser des méthodes telles que firstValue
ou allValues
car nous ne savons pas si le serveur renverra "Content-Type" ou "content-type". À proprement parler, cela semble être un bogue car RFC-2616 indique que les en-têtes de message ne sont pas sensibles à la casse.
public class HttpMediaType {
private final static HttpClient HTTP_CLIENT = HttpClient
.newBuilder()
.connectTimeout( ofSeconds( 5 ) )
.followRedirects( NORMAL )
.build();
/**
* Performs an HTTP HEAD request to determine the media type based on the
* Content-Type header returned from the server.
*
* @param uri Determine the media type for this resource.
* @return The data type for the resource or {@link MediaType#UNDEFINED} if
* unmapped.
* @throws MalformedURLException The {@link URI} could not be converted to
* a {@link URL}.
*/
public static MediaType valueFrom( final URI uri )
throws MalformedURLException {
final var mediaType = new MediaType[]{UNDEFINED};
try {
final var request = HttpRequest
.newBuilder( uri )
.method( "HEAD", noBody() )
.build();
final var response = HTTP_CLIENT.send( request, discarding() );
final var headers = response.headers();
final var map = headers.map();
map.forEach( ( key, values ) -> {
if( "Content-Type".equalsIgnoreCase( key ) ) {
var header = values.get( 0 );
// Trim off the character encoding.
var i = header.indexOf( ';' );
header = header.substring( 0, i == -1 ? header.length() : i );
// Split the type and subtype.
i = header.indexOf( '/' );
i = i == -1 ? header.length() : i;
final var type = header.substring( 0, i );
final var subtype = header.substring( i + 1 );
mediaType[ 0 ] = MediaType.valueFrom( type, subtype );
}
} );
} catch( final Exception ex ) {
// TODO: Inform the user?
}
return mediaType[ 0 ];
}
}