/* -*- mode: C; c-file-style: "gnu" -*- */ /* xdgmimemagic.: Private file. Datastructure for storing magic files. * * More info can be found at http://www.freedesktop.org/standards/ * * Copyright (C) 2003 Red Hat, Inc. * Copyright (C) 2003 Jonathan Blandford <jrb@alum.mit.edu> * * Licensed under the Academic Free License version 2.0 * Or under the following terms: * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H #include <config.h> #endif #include <assert.h> #include "xdgmimemagic.h" #include "xdgmimeint.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <errno.h> #include <limits.h> #ifndef FALSE #define FALSE (0) #endif #ifndef TRUE #define TRUE (!FALSE) #endif extern int errno; typedef struct XdgMimeMagicMatch XdgMimeMagicMatch; typedef struct XdgMimeMagicMatchlet XdgMimeMagicMatchlet; typedef enum { XDG_MIME_MAGIC_SECTION, XDG_MIME_MAGIC_MAGIC, XDG_MIME_MAGIC_ERROR, XDG_MIME_MAGIC_EOF } XdgMimeMagicState; struct XdgMimeMagicMatch { const char *mime_type; int priority; XdgMimeMagicMatchlet *matchlet; XdgMimeMagicMatch *next; }; struct XdgMimeMagicMatchlet { int indent; int offset; unsigned int value_length; unsigned char *value; unsigned char *mask; unsigned int range_length; unsigned int word_size; XdgMimeMagicMatchlet *next; }; struct XdgMimeMagic { XdgMimeMagicMatch *match_list; int max_extent; }; static XdgMimeMagicMatch * _xdg_mime_magic_match_new(void) { return (XdgMimeMagicMatch *)calloc(1, sizeof(XdgMimeMagicMatch)); } static XdgMimeMagicMatchlet * _xdg_mime_magic_matchlet_new(void) { XdgMimeMagicMatchlet *matchlet; matchlet = (XdgMimeMagicMatchlet *)malloc(sizeof(XdgMimeMagicMatchlet)); matchlet->indent = 0; matchlet->offset = 0; matchlet->value_length = 0; matchlet->value = NULL; matchlet->mask = NULL; matchlet->range_length = 1; matchlet->word_size = 1; matchlet->next = NULL; return matchlet; } static void _xdg_mime_magic_matchlet_free(XdgMimeMagicMatchlet *mime_magic_matchlet) { if (mime_magic_matchlet) { if (mime_magic_matchlet->next) _xdg_mime_magic_matchlet_free(mime_magic_matchlet->next); if (mime_magic_matchlet->value) free(mime_magic_matchlet->value); if (mime_magic_matchlet->mask) free(mime_magic_matchlet->mask); free(mime_magic_matchlet); } } /* Frees mime_magic_match and the remainder of its list */ static void _xdg_mime_magic_match_free(XdgMimeMagicMatch *mime_magic_match) { XdgMimeMagicMatch *ptr, *next; ptr = mime_magic_match; while (ptr) { next = ptr->next; if (ptr->mime_type) free((void *) ptr->mime_type); if (ptr->matchlet) _xdg_mime_magic_matchlet_free(ptr->matchlet); free(ptr); ptr = next; } } /* Reads in a hunk of data until a newline character or a '\000' is hit. The * returned string is null terminated, and doesn't include the newline. */ static char * _xdg_mime_magic_read_to_newline(FILE *magic_file, int *end_of_file) { char *retval; int c; int len, pos; len = 128; pos = 0; retval = (char *)malloc(len); *end_of_file = FALSE; while (TRUE) { c = getc_unlocked(magic_file); if (c == EOF) { *end_of_file = TRUE; break; } if (c == '\n' || c == '\000') break; retval[pos++] = (char) c; if (pos % 128 == 127) { len = len + 128; char *tmp = (char *)realloc(retval, len); if (tmp == NULL) { free(retval); return NULL; } retval = tmp; } } retval[pos] = '\000'; return retval; } /* Returns the number read from the file, or -1 if no number could be read. */ static int _xdg_mime_magic_read_a_number(FILE *magic_file, int *end_of_file) { /* LONG_MAX is about 20 characters on my system */ #define MAX_NUMBER_SIZE 30 char number_string[MAX_NUMBER_SIZE + 1]; int pos = 0; int c; long retval = -1; while (TRUE) { c = getc_unlocked(magic_file); if (c == EOF) { *end_of_file = TRUE; break; } if (! isdigit(c)) { ungetc(c, magic_file); break; } number_string[pos] = (char) c; pos++; if (pos == MAX_NUMBER_SIZE) break; } if (pos > 0) { number_string[pos] = '\000'; errno = 0; retval = strtol(number_string, NULL, 10); if ((retval < INT_MIN) || (retval > INT_MAX) || (errno != 0)) return -1; } return retval; } /* Headers are of the format: * [<priority>:<mime-type>] */ static XdgMimeMagicState _xdg_mime_magic_parse_header(FILE *magic_file, XdgMimeMagicMatch *match) { int c; char *buffer; char *end_ptr; int end_of_file = 0; assert(magic_file != NULL); assert(match != NULL); c = getc_unlocked(magic_file); if (c == EOF) return XDG_MIME_MAGIC_EOF; if (c != '[') return XDG_MIME_MAGIC_ERROR; match->priority = _xdg_mime_magic_read_a_number(magic_file, &end_of_file); if (end_of_file) return XDG_MIME_MAGIC_EOF; if (match->priority == -1) return XDG_MIME_MAGIC_ERROR; c = getc_unlocked(magic_file); if (c == EOF) return XDG_MIME_MAGIC_EOF; if (c != ':') return XDG_MIME_MAGIC_ERROR; buffer = _xdg_mime_magic_read_to_newline(magic_file, &end_of_file); if (end_of_file) { free(buffer); return XDG_MIME_MAGIC_EOF; } end_ptr = buffer; while (*end_ptr != ']' && *end_ptr != '\000' && *end_ptr != '\n') end_ptr++; if (*end_ptr != ']') { free(buffer); return XDG_MIME_MAGIC_ERROR; } *end_ptr = '\000'; match->mime_type = strdup(buffer); free(buffer); return XDG_MIME_MAGIC_MAGIC; } static XdgMimeMagicState _xdg_mime_magic_parse_error(FILE *magic_file) { int c; while (1) { c = getc_unlocked(magic_file); if (c == EOF) return XDG_MIME_MAGIC_EOF; if (c == '\n') return XDG_MIME_MAGIC_SECTION; } } /* Headers are of the format: * [ indent ] ">" start-offset "=" value * [ "&" mask ] [ "~" word-size ] [ "+" range-length ] "\n" */ static XdgMimeMagicState _xdg_mime_magic_parse_magic_line(FILE *magic_file, XdgMimeMagicMatch *match) { XdgMimeMagicMatchlet *matchlet; int c; int end_of_file; int indent = 0; size_t bytes_read; assert(magic_file != NULL); /* Sniff the buffer to make sure it's a valid line */ c = getc_unlocked(magic_file); if (c == EOF) return XDG_MIME_MAGIC_EOF; else if (c == '[') { ungetc(c, magic_file); return XDG_MIME_MAGIC_SECTION; } else if (c == '\n') return XDG_MIME_MAGIC_MAGIC; /* At this point, it must be a digit or a '>' */ end_of_file = FALSE; if (isdigit(c)) { ungetc(c, magic_file); indent = _xdg_mime_magic_read_a_number(magic_file, &end_of_file); if (end_of_file) return XDG_MIME_MAGIC_EOF; if (indent == -1) return XDG_MIME_MAGIC_ERROR; c = getc_unlocked(magic_file); if (c == EOF) return XDG_MIME_MAGIC_EOF; } if (c != '>') return XDG_MIME_MAGIC_ERROR; matchlet = _xdg_mime_magic_matchlet_new(); matchlet->indent = indent; matchlet->offset = _xdg_mime_magic_read_a_number(magic_file, &end_of_file); if (end_of_file) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_EOF; } if (matchlet->offset == -1) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_ERROR; } c = getc_unlocked(magic_file); if (c == EOF) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_EOF; } else if (c != '=') { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_ERROR; } /* Next two bytes determine how long the value is */ matchlet->value_length = 0; c = getc_unlocked(magic_file); if (c == EOF) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_EOF; } matchlet->value_length = c & 0xFF; matchlet->value_length = matchlet->value_length << 8; c = getc_unlocked(magic_file); if (c == EOF) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_EOF; } matchlet->value_length = matchlet->value_length + (c & 0xFF); matchlet->value = (unsigned char *)malloc(matchlet->value_length); /* OOM */ if (matchlet->value == NULL) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_ERROR; } bytes_read = fread(matchlet->value, 1, matchlet->value_length, magic_file); if (bytes_read != matchlet->value_length) { _xdg_mime_magic_matchlet_free(matchlet); if (feof(magic_file)) return XDG_MIME_MAGIC_EOF; else return XDG_MIME_MAGIC_ERROR; } c = getc_unlocked(magic_file); if (c == '&') { matchlet->mask = (unsigned char *)malloc(matchlet->value_length); /* OOM */ if (matchlet->mask == NULL) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_ERROR; } bytes_read = fread(matchlet->mask, 1, matchlet->value_length, magic_file); if (bytes_read != matchlet->value_length) { _xdg_mime_magic_matchlet_free(matchlet); if (feof(magic_file)) return XDG_MIME_MAGIC_EOF; else return XDG_MIME_MAGIC_ERROR; } c = getc_unlocked(magic_file); } if (c == '~') { matchlet->word_size = _xdg_mime_magic_read_a_number(magic_file, &end_of_file); if (end_of_file) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_EOF; } if (matchlet->word_size != 0 && matchlet->word_size != 1 && matchlet->word_size != 2 && matchlet->word_size != 4) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_ERROR; } c = getc_unlocked(magic_file); } if (c == '+') { matchlet->range_length = _xdg_mime_magic_read_a_number(magic_file, &end_of_file); if (end_of_file) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_EOF; } if (matchlet->range_length == (unsigned int)-1) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_ERROR; } c = getc_unlocked(magic_file); } if (c == '\n') { /* We clean up the matchlet, byte swapping if needed */ if (matchlet->word_size > 1) { size_t i; if (matchlet->value_length % matchlet->word_size != 0) { _xdg_mime_magic_matchlet_free(matchlet); return XDG_MIME_MAGIC_ERROR; } /* FIXME: need to get this defined in a <config.h> style file */ #if LITTLE_ENDIAN for (i = 0; i < matchlet->value_length; i = i + matchlet->word_size) { if (matchlet->word_size == 2) *((xdg_uint16_t *) matchlet->value + i) = SWAP_BE16_TO_LE16(*((xdg_uint16_t *)(matchlet->value + i))); else if (matchlet->word_size == 4) *((xdg_uint32_t *) matchlet->value + i) = SWAP_BE32_TO_LE32(*((xdg_uint32_t *)(matchlet->value + i))); if (matchlet->mask) { if (matchlet->word_size == 2) *((xdg_uint16_t *) matchlet->mask + i) = SWAP_BE16_TO_LE16(*((xdg_uint16_t *)(matchlet->mask + i))); else if (matchlet->word_size == 4) *((xdg_uint32_t *) matchlet->mask + i) = SWAP_BE32_TO_LE32(*((xdg_uint32_t *)(matchlet->mask + i))); } } #endif } matchlet->next = match->matchlet; match->matchlet = matchlet; return XDG_MIME_MAGIC_MAGIC; } _xdg_mime_magic_matchlet_free(matchlet); if (c == EOF) return XDG_MIME_MAGIC_EOF; return XDG_MIME_MAGIC_ERROR; } static int _xdg_mime_magic_matchlet_compare_to_data(XdgMimeMagicMatchlet *matchlet, const void *data, size_t len) { size_t i, j; for (i = matchlet->offset; i <= matchlet->offset + matchlet->range_length; i++) { int valid_matchlet = TRUE; if (i + matchlet->value_length > len) return FALSE; if (matchlet->mask) { for (j = 0; j < matchlet->value_length; j++) { if ((matchlet->value[j] & matchlet->mask[j]) != ((((unsigned char *) data)[j + i]) & matchlet->mask[j])) { valid_matchlet = FALSE; break; } } } else { for (j = 0; j < matchlet->value_length; j++) { if (matchlet->value[j] != ((unsigned char *) data)[j + i]) { valid_matchlet = FALSE; break; } } } if (valid_matchlet) return TRUE; } return FALSE; } static int _xdg_mime_magic_matchlet_compare_level(XdgMimeMagicMatchlet *matchlet, const void *data, size_t len, int indent) { while ((matchlet != NULL) && (matchlet->indent == indent)) { if (_xdg_mime_magic_matchlet_compare_to_data(matchlet, data, len)) { if ((matchlet->next == NULL) || (matchlet->next->indent <= indent)) return TRUE; if (_xdg_mime_magic_matchlet_compare_level(matchlet->next, data, len, indent + 1)) return TRUE; } do { matchlet = matchlet->next; } while (matchlet && matchlet->indent > indent); } return FALSE; } static int _xdg_mime_magic_match_compare_to_data(XdgMimeMagicMatch *match, const void *data, size_t len) { return _xdg_mime_magic_matchlet_compare_level(match->matchlet, data, len, 0); } static void _xdg_mime_magic_insert_match(XdgMimeMagic *mime_magic, XdgMimeMagicMatch *match) { XdgMimeMagicMatch *list; if (mime_magic->match_list == NULL) { mime_magic->match_list = match; return; } if (match->priority > mime_magic->match_list->priority) { match->next = mime_magic->match_list; mime_magic->match_list = match; return; } list = mime_magic->match_list; while (list->next != NULL) { if (list->next->priority < match->priority) { match->next = list->next; list->next = match; return; } list = list->next; } list->next = match; match->next = NULL; } XdgMimeMagic * _xdg_mime_magic_new(void) { return (XdgMimeMagic *)calloc(1, sizeof(XdgMimeMagic)); } void _xdg_mime_magic_free(XdgMimeMagic *mime_magic) { if (mime_magic) { _xdg_mime_magic_match_free(mime_magic->match_list); free(mime_magic); } } int _xdg_mime_magic_get_buffer_extents(XdgMimeMagic *mime_magic) { return mime_magic->max_extent; } const char * _xdg_mime_magic_lookup_data(XdgMimeMagic *mime_magic, const void *data, size_t len) { XdgMimeMagicMatch *match; for (match = mime_magic->match_list; match; match = match->next) { if (_xdg_mime_magic_match_compare_to_data(match, data, len)) { return match->mime_type; } } return NULL; } static void _xdg_mime_update_mime_magic_extents(XdgMimeMagic *mime_magic) { XdgMimeMagicMatch *match; int max_extent = 0; for (match = mime_magic->match_list; match; match = match->next) { XdgMimeMagicMatchlet *matchlet; for (matchlet = match->matchlet; matchlet; matchlet = matchlet->next) { int extent; extent = matchlet->value_length + matchlet->offset + matchlet->range_length; if (max_extent < extent) max_extent = extent; } } mime_magic->max_extent = max_extent; } static XdgMimeMagicMatchlet * _xdg_mime_magic_matchlet_mirror(XdgMimeMagicMatchlet *matchlets) { XdgMimeMagicMatchlet *new_list; XdgMimeMagicMatchlet *tmp; if ((matchlets == NULL) || (matchlets->next == NULL)) return matchlets; new_list = NULL; tmp = matchlets; while (tmp != NULL) { XdgMimeMagicMatchlet *matchlet; matchlet = tmp; tmp = tmp->next; matchlet->next = new_list; new_list = matchlet; } return new_list; } static void _xdg_mime_magic_read_magic_file(XdgMimeMagic *mime_magic, FILE *magic_file) { XdgMimeMagicState state; XdgMimeMagicMatch *match = NULL; /* Quiet compiler */ state = XDG_MIME_MAGIC_SECTION; while (state != XDG_MIME_MAGIC_EOF) { switch (state) { case XDG_MIME_MAGIC_SECTION: match = _xdg_mime_magic_match_new(); state = _xdg_mime_magic_parse_header(magic_file, match); if (state == XDG_MIME_MAGIC_EOF || state == XDG_MIME_MAGIC_ERROR) _xdg_mime_magic_match_free(match); break; case XDG_MIME_MAGIC_MAGIC: state = _xdg_mime_magic_parse_magic_line(magic_file, match); if (state == XDG_MIME_MAGIC_SECTION || (state == XDG_MIME_MAGIC_EOF && match->mime_type)) { match->matchlet = _xdg_mime_magic_matchlet_mirror(match->matchlet); _xdg_mime_magic_insert_match(mime_magic, match); } else if (state == XDG_MIME_MAGIC_EOF || state == XDG_MIME_MAGIC_ERROR) _xdg_mime_magic_match_free(match); break; case XDG_MIME_MAGIC_ERROR: state = _xdg_mime_magic_parse_error(magic_file); break; case XDG_MIME_MAGIC_EOF: default: /* Make the compiler happy */ assert(0); } } _xdg_mime_update_mime_magic_extents(mime_magic); } void _xdg_mime_magic_read_from_file(XdgMimeMagic *mime_magic, const char *file_name) { FILE *magic_file; char header[12]; /* OK to not use CLO_EXEC here because mimedb is single threaded */ magic_file = fopen(file_name, "r"); if (magic_file == NULL) return; if (fread(header, 1, 12, magic_file) == 12) { if (memcmp("MIME-Magic\0\n", header, 12) == 0) _xdg_mime_magic_read_magic_file(mime_magic, magic_file); } fclose(magic_file); }