Viewing File: /usr/local/cpanel/3rdparty/perl/536/cpanel-lib/x86_64-linux/Text/VCardFast.pm

package Text::VCardFast;

use strict;
use warnings;

use Encode qw(decode_utf8 encode_utf8);
use MIME::Base64 qw(decode_base64 encode_base64);

require Exporter;

our @ISA = qw(Exporter);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

# This allows declaration	use Text::VCardFast ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw(
	vcard2hash
	hash2vcard
) ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw(
	vcard2hash
	hash2vcard
);

our $VERSION = '0.11';

require XSLoader;
XSLoader::load('Text::VCardFast', $VERSION);

# public API

sub vcard2hash { &vcard2hash_c }
sub hash2vcard { &hash2vcard_pp }

# Implementation

sub vcard2hash_c {
    my $vcard = shift;
    my %params = @_;
    if (utf8::is_utf8($vcard)) {
        utf8::encode($vcard);
        $params{is_utf8} = 1;
    }
    unless ($vcard =~ m/\n\S/) {
        # cruddy card with \r as line separator?
        $vcard =~ tr/\r/\n/;
    }
    my $hash = Text::VCardFast::_vcard2hash($vcard, \%params);
    return $hash;
}

# pureperl version

# VCard parsing and formatting {{{

my %RFC6868Map = ("n" => "\n", "^" => "^", "'" => "\"");
my %RFC6868RevMap = reverse %RFC6868Map;
my %UnescapeMap = ("n" => "\n", "N" => "\n");

my $Pos = 1;
my @PropOutputOrder = qw(version fn n nickname lang gender org title role bday anniversary email tel adr url impp);
my %PropOutputOrder = map { $_ => $Pos++ } @PropOutputOrder;
my @ParamOutputOrder = qw(type pref);
my %ParamOutputOrder = map { $_ => $Pos++ } @ParamOutputOrder;

sub vcard2hash_pp {
  my $vcard = shift;
  unless ($vcard =~ m/\n\S/) {
    # cruddy card with \r as line separator?
    $vcard =~ tr/\r/\n/;
  }
  my %params = @_;
  return vcardlines2hash_pp(\%params, (split /\r?\n/, $vcard));
}

sub vcardlines2hash_pp {
  my $args = shift;
  local $_;

  my %MultiFieldMap;
  my %MultiParamMap;
  if ($args->{multival}) {
    %MultiFieldMap = map { $_ => 1 } @{$args->{multival}};
  }
  if ($args->{multiparam}) {
    %MultiParamMap = map { $_ => 1 } @{$args->{multiparam}};
  }

  # rfc2425, rfc2426, rfc6350, rfc6868

  my @Path;
  my $Current;
  while ($_ = shift @_) {
    # Strip EOL
    s/\r?\n$//;

    # 5.8.1 - Unfold lines if next line starts with space or tab
    if (@_ && $_[0] =~ s/^[ \t]//) {
      $_ .= shift @_;
      redo;
    }

    # Ignore empty lines
    next if /^\s*$/;

    if (/^BEGIN:(.*)/i) {
      push @Path, $Current;
      $Current = { type => lc $1 };
      push @{ $Path[-1]{objects} }, $Current;
      next;
    }
    if (/^END:(.*)/i) {
      die "END $1 in $Current->{type}"
        unless $Current->{type} eq lc $1;
      $Current = pop @Path;
      return $Current if ($args->{only_one} and not @Path);
      next;
    }

    # 5.8.2 - Parse '[group "."] name *(";" param) ":" value'
    #  In v2.1, params may not have "=value" part
    #  In v4, "," is allowed in non-quoted param value
    my ($Name) = /^([^;:]*)/gc;
    my @Params = /\G;(?:([\w\-]+)=)?("[^"]*"|[^";:=]*)/gc;
    my ($Value) = /\G:(.*)$/g;

    # 5.8.2 - Type names and parameter names are case insensitive
    my $LName = lc $Name;

    my %Props;

    # Remove group from each property name and add as attribute
    #  (in v4, group names are case insensitive as well)
    if ($LName =~ s/^(.+)\.(.*?)$/$2/) {
      $Props{group} = $1;
    }

    $Props{name} = $LName;

    # Parse out parameters
    my %Params;
    while (@Params) {
      # Parsed into param => param-value pairs
      my ($PName, $PValue) = splice @Params, 0, 2;
      if (not defined $PName) {
        if ($args->{barekeys}) {
          $PName = $PValue;
          $PValue = undef;
        }
        else {
          $PName = 'type';
        }
      }

      # 5.8.2 - parameter names are case insensitive
      my $LPName = lc $PName;

      my @PValue = (undef);
      if (defined $PValue) {
        $PValue =~ s/^"(.*)"$/$1/;
        # \n needed for label, but assume any \; is meant to be ; as well
        $PValue =~ s#\\(.)#$UnescapeMap{$1} // $1#ge;
        # And RFC6868 recoding
        $PValue =~ s/\^([n^'])/$RFC6868Map{$1}/g;
        if ($MultiParamMap{$LPName}) {
          @PValue = split /,/, $PValue;
        }
        else {
          @PValue = ($PValue);
        }
      }

      if (exists $Params{$LPName}) {
        push @{$Params{$LPName}}, @PValue;
      } else {
        $Params{$LPName} = \@PValue;
      }
    }
    $Props{params} = \%Params if keys %Params;

    my $Encoding = $Params{encoding};

    if ($MultiFieldMap{$LName}) {
      # use negative 'limit' to force trailing fields
      $Value = [ split /(?<!\\);/, $Value, -1 ];
      s#\\(.)#$UnescapeMap{$1} // $1#ge for @$Value;
      $Props{values} = $Value;
    } elsif ($Encoding && lc $Encoding eq 'b') {
      # Don't bother unescaping base64 value

      $Props{value} = $Value;
    } else {
      $Value =~ s#\\(.)#$UnescapeMap{$1} // $1#ge;
      $Props{value} = $Value;
    }

    push @{$Current->{properties}->{$LName}}, \%Props;
  }

  # something did a BEGIN but no END - TODO, unwind this nicely as
  # it may be more than one level
  die "BEGIN $Current->{type} without matching END"
    if @Path;

  return $Current;
}

sub hash2vcard_pp {
  return join "", map { $_ . ($_[1] // "\n") } hash2vcardlines_pp($_[0]);
}

sub hash2vcardlines_pp {
  my $Objects = shift->{objects} // [];

  my @Lines;
  for my $Card (@$Objects) {
    # We group properties in the same group together, track if we've
    #  already output a property
    my %DoneProps;

    my $Props = $Card->{properties};

    # Order the properties
    my @PropKeys = sort {
      ($PropOutputOrder{$a} // 1000) <=> ($PropOutputOrder{$b} // 1000)
        || $a cmp $b
    } keys %$Props;

    # Make sure items in the same group are output together
    my $Groups = $Card->{groups} || do {
      my %Groups;
      for (map { @$_ } values %$Props) {
	push @{$Groups{$_->{group}}}, $_ if $_->{group};
      }
      \%Groups;
    };

    # Generate output list
    my @OutputProps;
    for my $PropKey (@PropKeys) {
      my @PropVals = @{$Props->{$PropKey}};
      for my $PropVal (@PropVals) {
        next if $DoneProps{"$PropVal"}++;

        push @OutputProps, $PropVal;

        # If it has a group, output all values in that group together
        if (my $Group = $PropVal->{group}) {
          push @OutputProps, grep { !$DoneProps{"$_"}++ } @{$Groups->{$Group}};
        }
      }
    }

    my $Type = uc $Card->{type};
    push @Lines, ("BEGIN:" . $Type);

    for (@OutputProps) {
      # Skip deleted or synthetic properties
      next if $_->{deleted} || $_->{name} eq 'online';

      my $Binary = $_->{binary};
      if ($Binary) {
        my $Encoding = ($_->{params}->{encoding} //= []);
        push @$Encoding, "b" if !@$Encoding;
      }

      my $LName = $_->{name};
      my $Group = $_->{group};

      # rfc6350 3.3 - it is RECOMMENDED that property and parameter names be upper-case on output.
      my $Line = ($Group ? (uc $Group . ".") : "") . uc $LName;

      while (my ($Param, $ParamVals) = each %{$_->{params} // {}}) {
        if (!defined $ParamVals) {
          $Line .= ";" . uc($Param);
        }
        for (ref($ParamVals) ? @$ParamVals : $ParamVals) {
          my $PV = $_ // next; # Modify copy
          $PV =~ s/\n/\\N/g if $Param eq 'label';
          $PV =~ s/([\n^"])/'^' . $RFC6868RevMap{$1}/ge;
          $PV = '"' . $PV . '"' if $PV =~ /\W/;
          $Line .= ";" . uc($Param) . "=" . $PV;
        }
      }
      $Line .= ":";

      my $Value = $_->{values} || $_->{value};

      if ($_->{binary}) {
        $Value = encode_base64($Value, '');

      } else {
        my @Values = map {
          my $V = ref($_) ? $$_ : $_; # Modify copy
          $V //= '';
          # rfc6350 3.4 (v4, assume clarifies many v3 semantics)
          # - a SEMICOLON in a field of such a "compound" property MUST be
          #   escaped with a BACKSLASH character
          # - a COMMA character in one of a field's values MUST be escaped
          #   with a BACKSLASH character
          # - BACKSLASH characters in values MUST be escaped with a BACKSLASH
          #   character.
          $V =~ s/([\,\;\\])/\\$1/g;
          # - NEWLINE (U+000A) characters in values MUST be encoded
          #   by two characters: a BACKSLASH followed by either an 'n' (U+006E)
          #   or an 'N' (U+004E).
          $V =~ s/\n/\\n/g;
          $V;
        } ref $Value ? @$Value : $Value;

        $Value = join ";", @Values;

        # Stripped v4 proto prefix, add it back
        if (my $ProtoStrip = $_->{proto_strip}) {
          $Value = $ProtoStrip . $Value;
        }

        # If it's a perl unicode string, make it utf-8 bytes
        #if (utf8::is_utf8($Value)) {
          #$Value = encode_utf8($Value);
        #}
      }

      $Line .= $Value;

      push @Lines, foldline($Line);
    }

    push @Lines, hash2vcardlines_pp($Card);

    push @Lines, "END:" . $Type;
  }

  return @Lines;
}

sub foldline {
  local $_ = shift;

  # Fold at every \n, regardless of position
  # Try folding on at whitespace boundaries after 60 chars first
  # Otherwise fold to 75 chars, but don't split utf-8 unicode char or end with a \
  my @Out;
  while (/\G(.{0,75}?\\n)/gc || /\G(.{60,75})(?<=[^\n\t ])(?=[\n\t ])/gc || /\G(.{0,74}[^\\])(?![\x80-\xbf])/gc) {
    push @Out, (@Out ? " " . $1 : $1);
  }
  push @Out, " " . substr($_, pos($_)) if pos $_ != length $_;

  return @Out;
}

# }}}

1;

1;
__END__
=head1 NAME

Text::VCardFast - Perl extension for very fast parsing of VCards

=head1 SYNOPSIS

  use Text::VCardFast;

  my $hash = Text::VCard::vcard2hash($card, multival => ['adr', 'org', 'n']);
  my $card = Text::VCard::hash2vcard($hash, "\r\n");

=head1 DESCRIPTION

Text::VCardFast is designed to parse VCards very quickly compared to
pure-perl solutions.  It has a perl and an XS version of the same API,
accessible as vcard2hash_pp and vcard2hash_c, with the XS version being
preferred.

Why would you care?  We were writing the calendaring code for fastmail.fm,
and it was taking over 6 seconds to draw respond to a request for calendar
data, and the bulk was going to the perl middleware layer - and THAT
profiled down to the vcard parser.

Two of us independently wrote better pure perl implementations, leading to
about a 5 times speedup in each case.  I figured it was worth checking if
XS would be much better.  Here's the benchmark on the v4 example from
Wikipedia:

    Benchmark: timing 10000 iterations of fastxs, pureperl, vcardasdata...
        fastxs:  0 wallclock secs ( 0.16 usr +  0.01 sys =  0.17 CPU) @ 58823.53/s (n=10000)
                (warning: too few iterations for a reliable count)
      pureperl:  1 wallclock secs ( 1.04 usr +  0.00 sys =  1.04 CPU) @ 9615.38/s (n=10000)
    vcardasdata:  8 wallclock secs ( 7.35 usr +  0.00 sys =  7.35 CPU) @ 1360.54/s (n=10000)

(see bench.pl in the source tarball for the code)

=head2 EXPORT

  vcard2hash
  hash2vcard

=head2 API

=over

=item Text::VCard::vcard2hash($card, %options);

  Options:

  * only_one - A flag which, if true, means parsing will stop after
    extracting a single VCard from the buffer.  This is very useful
    in cases where, for example, a disclaimer has been added after
    a calendar event in an email.

  * multival - A list of entry names which will be considered to have
    multiple values.  Instead of having a 'value' field in the hash,
    entries with this key will have a 'values' field containing an
    arrayref of values - even if there is only one value.
    The value is split on semicolon, with escaped semicolons decoded
    correctly within each item.

    Default is the empty list.

  * multiparam - As with values - multiparam is a list of entry names
    which can have multiple values.  To see the difference here you
    must consider something like this:

    EMAIL;TYPE="INTERNET,HOME";TYPE=PREF:example@example.com

    If 'multiparam' includes 'TYPE' then the result will be:
    ['INTERNET', 'HOME', 'PREF'], otherwise it will be:
    ['INTERNET,HOME', 'PREF'].

    Default is the empty list.

  * barekeys - if set, then a bare parameter will be considered to be
    a parameter name with an undefined value, rather than a being a
    value for the parameter type.

    Consider:

    EMAIL;INTERNET;HOME:example@example.com

    barekeys off:

    {
      name => 'email',
      params => { type => ['INTERNET', 'HOME'] },
      value => 'example@example.com',
    }

    barekeys on:

    {
      name => 'email',
      params => { internet => [undef], home => [undef] },
      value => 'example@example.com',
    }

    default is barekeys off.

  The input is a scalar containing VFILE text, as per RFC 6350 or the various
  earlier RFCs it replaces.  If the perl unicode flag is set on the scalar,
  then it will be propagated to the output values.

  The output is a hash reference containing a single key 'objects', which is
  an array of all the cards within the source text.

  Each object can have the following keys:
  * type - the text after BEGIN: and END: of the card (lower cased)
  * properties - a hash from name to array of instances within the card.
  * objects - an array of sub cards within the card.

  Properties are a hash with the following keys:
  * group - optional - if the propery name as 'foo.bar', this will be foo.
  * name - a copy of the hash key that pointed to this property, so that
    this hash can be used without keeping the key around too
  * params - a hash of the parameters on the entry.  This is everything from
    the ; to the :
  * value - either a scalar (if not a multival field) or an array of values.
    This is everything after the :

  Decoding is done where possible, including RFC 6868 handling of ^.

  All names, both entry names and parameter names, are lowercased where the
  RFC says they are not case significant.  This means that all hash keys are
  lowercase within this API, as are card types.

  Values, on the other hand, are left in their original case even where the
  RFC says they are case insignificant - due to the increased complexity of
  tracking which version what parameters are in effect.

=item Text::VCard::hash2vcard($hash, $eol)

  The inverse operation (as much as possible!)

  Given a hash with an 'objects' key in it, output a scalar string containing
  the VCARD representation.  Lines are separated with the $eol string given,
  or the default "\n".  Use "\r\n" for files going to caldav/carddav servers.

  In the inverse of the above case, where names are case insignificant, they
  are generated in UPPERCASE in the card, for maximum compatibility with
  other implementations.

=back

=head1 EXAMPLES

  For more examples see the t/cases directory in the tarball, which contains
  some sample VCARDs and JSON dumps of the hash representation.

  BEGIN:VCARD
  KEY;PKEY=PVALUE:VALUE
  KEY2:VALUE2
  END:VCARD

  {
  'objects' => [
    {
      'type' => 'vcard',
      'properties' => {
        'key2' => [
          {
            'value' => 'VALUE2',
            'name' => 'key2'
          }
        ],
        'key' => [
          {
            'params' => {
              'pkey' => [
                'PVALUE'
              ]
            },
            'value' => 'VALUE',
            'name' => 'key'
          }
        ]
      }
    }
  ]
  }

  BEGIN:VCARD
  BEGIN:SUBCARD
  KEY:VALUE
  END:SUBCARD
  END:VCARD

  {
  'objects' => [
    {
      'objects' => [
        {
          'type' => 'subcard',
          'properties' => {
            'key' => [
              {
                'value' => 'VALUE',
                'name' => 'key'
              }
            ]
          }
        }
      ],
      'type' => 'vcard',
      'properties' => {}
    }
  ]
  }

  BEGIN:VCARD
  GROUP1.KEY:VALUE
  GROUP1.KEY2:VALUE2
  GROUP2.KEY:VALUE
  END:VCARD

  {
  'objects' => [
    {
      'type' => 'vcard',
      'properties' => {
        'key2' => [
          {
            'group' => 'group1',
            'value' => 'VALUE2',
            'name' => 'key2'
          }
        ],
        'key' => [
          {
            'group' => 'group1',
            'value' => 'VALUE',
            'name' => 'key'
          },
          {
            'group' => 'group2',
            'value' => 'VALUE',
            'name' => 'key'
          }
        ]
      }
    }
  ]
  }


=head1 SEE ALSO

There is a similar module Text::VFile::asData on CPAN, but it is much
slower and doesn't do as much decoding.

Code is stored on github at

https://github.com/brong/Text-VCardFast/

=head1 AUTHOR

Bron Gondwana, E<lt>brong@fastmail.fm<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2014 by Bron Gondwana

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.14.2 or,
at your option, any later version of Perl 5 you may have available.


=cut
Back to Directory File Manager