Fossil

Check-in [93bb3231]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the "reconstruct" command so that sets the correct hash policy for artifacts read from disk.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:93bb323192ab7db60478f85777e205e0f3a496fb263b1b863868cd0f343112bb
User & Date: drh 2019-02-05 15:43:01
Context
2019-02-05
15:49
Fix the unified diff output so that it conforms with established conventions. check-in: 7fd2a365 user: drh tags: trunk
15:43
Enhance the "reconstruct" command so that sets the correct hash policy for artifacts read from disk. check-in: 93bb3231 user: drh tags: trunk
2019-02-01
15:55
Update the built-in SQLite to the latest 3.27.0 alpha. check-in: 6ba52d79 user: drh tags: trunk
2019-01-29
14:29
Add a test command to infer the hash policy from the length of path names on reconstruct (disabled by preprocessor directive). Closed-Leaf check-in: 8d1ed47c user: florian tags: reconstruct-sha3
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/rebuild.c.

180
181
182
183
184
185
186


187

188
189
190
191
192
193
194
...
272
273
274
275
276
277
278











279
280
281
282
283
284
285
...
929
930
931
932
933
934
935



936




































937
938
939
940
941
942
943
...
951
952
953
954
955
956
957
958
959
960
961
962
963
964

965

966
967
968
969
970
971
972
973
974
975
976
977
978


979





































































































980
981
982
983
984
985
986
987
988
989
990




991
992
993
994

995
996
997
998
999
1000
1001
....
1039
1040
1041
1042
1043
1044
1045
1046


1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058

1059
1060
1061
1062
1063
1064
1065
....
1090
1091
1092
1093
1094
1095
1096

1097
1098
1099
1100
1101
1102
1103
*/
static int totalSize;       /* Total number of artifacts to process */
static int processCnt;      /* Number processed so far */
static int ttyOutput;       /* Do progress output */
static Bag bagDone;         /* Bag of records rebuilt */

static char *zFNameFormat;  /* Format string for filenames on deconstruct */


static int prefixLength;    /* Length of directory prefix for deconstruct */



/*
** Draw the percent-complete message.
** The input is actually the permill complete.
*/
static void percent_complete(int permill){
................................................................................
      manifest_crosslink(rid, pUse, MC_NONE);
    }else{
      /* We are doing "fossil deconstruct" */
      char *zUuid = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", rid);
      char *zFile = mprintf(zFNameFormat /*works-like:"%s:%s"*/,
                            zUuid, zUuid+prefixLength);
      blob_write_to_file(pUse,zFile);











      free(zFile);
      free(zUuid);
      blob_reset(pUse);
    }
    assert( blob_is_reset(pUse) );
    rebuild_step_done(rid);

................................................................................
void recon_read_dir(char *zPath){
  DIR *d;
  struct dirent *pEntry;
  Blob aContent; /* content of the just read artifact */
  static int nFileRead = 0;
  void *zUnicodePath;
  char *zUtf8Name;








































  zUnicodePath = fossil_utf8_to_path(zPath, 1);
  d = opendir(zUnicodePath);
  if( d ){
    while( (pEntry=readdir(d))!=0 ){
      Blob path;
      char *zSubpath;

................................................................................
      if( (pEntry->d_type==DT_UNKNOWN || pEntry->d_type==DT_LNK)
          ? (file_isdir(zSubpath, ExtFILE)==1) : (pEntry->d_type==DT_DIR) )
#else
      if( file_isdir(zSubpath, ExtFILE)==1 )
#endif
      {
        recon_read_dir(zSubpath);
      }else{
        blob_init(&path, 0, 0);
        blob_appendf(&path, "%s", zSubpath);
        if( blob_read_from_file(&aContent, blob_str(&path), ExtFILE)==-1 ){
          fossil_fatal("some unknown error occurred while reading \"%s\"",
                       blob_str(&path));
        }

        content_put(&aContent);

        blob_reset(&path);
        blob_reset(&aContent);
        fossil_print("\r%d", ++nFileRead);
        fflush(stdout);
      }
      free(zSubpath);
    }
    closedir(d);
  }else {
    fossil_fatal("encountered error %d while trying to open \"%s\".",
                  errno, g.argv[3]);
  }
  fossil_path_free(zUnicodePath);


}






































































































/*
** COMMAND: reconstruct*
**
** Usage: %fossil reconstruct FILENAME DIRECTORY
**
** This command studies the artifacts (files) in DIRECTORY and
** reconstructs the fossil record from them. It places the new
** fossil repository in FILENAME. Subdirectories are read, files
** with leading '.' in the filename are ignored.
**




** See also: deconstruct, rebuild
*/
void reconstruct_cmd(void) {
  char *zPassword;

  if( g.argc!=4 ){
    usage("FILENAME DIRECTORY");
  }
  if( file_isdir(g.argv[3], ExtFILE)!=1 ){
    fossil_print("\"%s\" is not a directory\n\n", g.argv[3]);
    usage("FILENAME DIRECTORY");
  }
................................................................................
** writes all artifacts to the file system. The DESTINATION directory
** will be populated with subdirectories AA and files AA/BBBBBBBBB.., where
** AABBBBBBBBB.. is the 40+ character artifact ID, AA the first 2 characters.
** If -L|--prefixlength is given, the length (default 2) of the directory
** prefix can be set to 0,1,..,9 characters.
**
** Options:
**   -R|--repository REPOSITORY  deconstruct given REPOSITORY


**   -L|--prefixlength N         set the length of the names of the DESTINATION
**                               subdirectories to N
**   --private                   Include private artifacts.
**
** See also: rebuild, reconstruct
*/
void deconstruct_cmd(void){
  const char *zDestDir;
  const char *zPrefixOpt;
  Stmt        s;
  int privateFlag;


  /* get and check prefix length argument and build format string */
  zPrefixOpt=find_option("prefixlength","L",1);
  if( !zPrefixOpt ){
    prefixLength = 2;
  }else{
    if( zPrefixOpt[0]>='0' && zPrefixOpt[0]<='9' && !zPrefixOpt[1] ){
      prefixLength = (int)(*zPrefixOpt-'0');
................................................................................
  */
#endif
  if( prefixLength ){
    zFNameFormat = mprintf("%s/%%.%ds/%%s",zDestDir,prefixLength);
  }else{
    zFNameFormat = mprintf("%s/%%s",zDestDir);
  }


  bag_init(&bagDone);
  ttyOutput = 1;
  processCnt = 0;
  if (!g.fQuiet) {
    fossil_print("0 (0%%)...\r");
    fflush(stdout);







>
>

>







 







>
>
>
>
>
>
>
>
>
>
>







 







>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|






>

>













>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




|






>
>
>
>




>







 







|
>
>
|
|





<




>







 







>







180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
...
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
...
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
....
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
....
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218

1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
....
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
*/
static int totalSize;       /* Total number of artifacts to process */
static int processCnt;      /* Number processed so far */
static int ttyOutput;       /* Do progress output */
static Bag bagDone;         /* Bag of records rebuilt */

static char *zFNameFormat;  /* Format string for filenames on deconstruct */
static int cchFNamePrefix;  /* Length of directory prefix in zFNameFormat */
static char *zDestDir;      /* Destination directory on deconstruct */
static int prefixLength;    /* Length of directory prefix for deconstruct */
static int fKeepRid1;       /* Flag to preserve RID=1 on de- and reconstruct */


/*
** Draw the percent-complete message.
** The input is actually the permill complete.
*/
static void percent_complete(int permill){
................................................................................
      manifest_crosslink(rid, pUse, MC_NONE);
    }else{
      /* We are doing "fossil deconstruct" */
      char *zUuid = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", rid);
      char *zFile = mprintf(zFNameFormat /*works-like:"%s:%s"*/,
                            zUuid, zUuid+prefixLength);
      blob_write_to_file(pUse,zFile);
      if( rid==1 && fKeepRid1!=0 ){
        char *zFnDotRid1 = mprintf("%s/.rid1", zDestDir);
        char *zFnRid1 = zFile + cchFNamePrefix + 1; /* Skip directory slash */
        Blob bFileContents = empty_blob;
        blob_appendf(&bFileContents,
          "# The file holding the artifact with RID=1\n"
          "%s\n", zFnRid1);
        blob_write_to_file(&bFileContents, zFnDotRid1);
        blob_reset(&bFileContents);
        free(zFnDotRid1);
      }
      free(zFile);
      free(zUuid);
      blob_reset(pUse);
    }
    assert( blob_is_reset(pUse) );
    rebuild_step_done(rid);

................................................................................
void recon_read_dir(char *zPath){
  DIR *d;
  struct dirent *pEntry;
  Blob aContent; /* content of the just read artifact */
  static int nFileRead = 0;
  void *zUnicodePath;
  char *zUtf8Name;
  static int recursionLevel = 0;  /* Bookkeeping about the recursion level */
  static char *zFnRid1 = 0;       /* The file holding the artifact with RID=1 */
  static int cchPathInitial = 0;  /* The length of zPath on first recursion */

  recursionLevel++;
  if( recursionLevel==1 ){
    cchPathInitial = strlen(zPath);
    if( fKeepRid1!=0 ){
      char *zFnDotRid1 = mprintf("%s/.rid1", zPath);
      Blob bFileContents;
      if( blob_read_from_file(&bFileContents, zFnDotRid1, ExtFILE)!=-1 ){
        Blob line, value;
        while( blob_line(&bFileContents, &line)>0 ){
          if( blob_token(&line, &value)==0 ) continue;  /* Empty line */
          if( blob_buffer(&value)[0]=='#' ) continue;   /* Comment */
          blob_trim(&value);
          zFnRid1 = mprintf("%s/%s", zPath, blob_str(&value));
          break;
        }
        blob_reset(&bFileContents);
        if( zFnRid1 ){
          if( blob_read_from_file(&aContent, zFnRid1, ExtFILE)==-1 ){
            fossil_fatal("some unknown error occurred while reading \"%s\"",
                         zFnRid1);
          }else{
            recon_set_hash_policy(0, zFnRid1);
            content_put(&aContent);
            recon_restore_hash_policy();
            blob_reset(&aContent);
            fossil_print("\r%d", ++nFileRead);
            fflush(stdout);
          }
        }else{
          fossil_fatal("an error occurred while reading or parsing \"%s\"",
                       zFnDotRid1);
        }
      }
      free(zFnDotRid1);
    }
  }
  zUnicodePath = fossil_utf8_to_path(zPath, 1);
  d = opendir(zUnicodePath);
  if( d ){
    while( (pEntry=readdir(d))!=0 ){
      Blob path;
      char *zSubpath;

................................................................................
      if( (pEntry->d_type==DT_UNKNOWN || pEntry->d_type==DT_LNK)
          ? (file_isdir(zSubpath, ExtFILE)==1) : (pEntry->d_type==DT_DIR) )
#else
      if( file_isdir(zSubpath, ExtFILE)==1 )
#endif
      {
        recon_read_dir(zSubpath);
      }else if( fossil_strcmp(zSubpath, zFnRid1)!=0 ){
        blob_init(&path, 0, 0);
        blob_appendf(&path, "%s", zSubpath);
        if( blob_read_from_file(&aContent, blob_str(&path), ExtFILE)==-1 ){
          fossil_fatal("some unknown error occurred while reading \"%s\"",
                       blob_str(&path));
        }
        recon_set_hash_policy(cchPathInitial, blob_str(&path));
        content_put(&aContent);
        recon_restore_hash_policy();
        blob_reset(&path);
        blob_reset(&aContent);
        fossil_print("\r%d", ++nFileRead);
        fflush(stdout);
      }
      free(zSubpath);
    }
    closedir(d);
  }else {
    fossil_fatal("encountered error %d while trying to open \"%s\".",
                  errno, g.argv[3]);
  }
  fossil_path_free(zUnicodePath);
  if( recursionLevel==1 && zFnRid1!=0 ) free(zFnRid1);
  recursionLevel--;
}

/*
** Helper functions called from recon_read_dir() to set and restore the correct
** hash policy for an artifact read from disk, inferred from the length of the
** path name.
*/
static int saved_eHashPolicy = -1;

void recon_set_hash_policy(
  const int cchPathPrefix,    /* Directory prefix length for zUuidAsFilePath */
  const char *zUuidAsFilePath /* Relative, well-formed, from recon_read_dir() */
){
  int cchUuidAsFilePath;
  const char *zHashPart;
  int cchHashPart = 0;
  int new_eHashPolicy = -1;
  assert( HNAME_COUNT==2 ); /* Review function if new hashes are implemented. */
  if( zUuidAsFilePath==0 ) return;
  cchUuidAsFilePath = strlen(zUuidAsFilePath);
  if( cchUuidAsFilePath==0 ) return;
  if( cchPathPrefix>=cchUuidAsFilePath ) return;
  for( zHashPart = zUuidAsFilePath + cchPathPrefix; *zHashPart; zHashPart++ ){
    if( *zHashPart!='/' ) cchHashPart++;
  }
  if( cchHashPart>=HNAME_LEN_K256 ){
    new_eHashPolicy = HPOLICY_SHA3;
  }else if( cchHashPart>=HNAME_LEN_SHA1 ){
    new_eHashPolicy = HPOLICY_SHA1;
  }
  if( new_eHashPolicy!=-1 ){
    saved_eHashPolicy = g.eHashPolicy;
    g.eHashPolicy = new_eHashPolicy;
  }
}

void recon_restore_hash_policy(){
  if( saved_eHashPolicy!=-1 ){
    g.eHashPolicy = saved_eHashPolicy;
    saved_eHashPolicy = -1;
  }
}

#if 0
/*
** COMMAND: test-hash-from-path*
**
** Usage: %fossil test-hash-from-path ?OPTIONS? DESTINATION UUID
**
** Generate a sample path name from DESTINATION and UUID, as the `deconstruct'
** command would do.  Then try to guess the hash policy from the path name, as
** the `reconstruct' command would do.
**
** No files or directories will be created.
**
** Options:
**   -L|--prefixlength N     Set the length of the names of the DESTINATION
**                           subdirectories to N.
*/
void test_hash_from_path_cmd(void) {
  char *zDest;
  char *zUuid;
  char *zFile;
  const char *zHashPolicy = "unknown";
  const char *zPrefixOpt = find_option("prefixlength","L",1);
  int iPrefixLength;
  if( !zPrefixOpt ){
    iPrefixLength = 2;
  }else{
    iPrefixLength = atoi(zPrefixOpt);
    if( iPrefixLength<0 || iPrefixLength>9 ){
      fossil_fatal("N(%s) is not a valid prefix length!",zPrefixOpt);
    }
  }
  if( g.argc!=4 ){
    usage ("?OPTIONS? DESTINATION UUID");
  }
  zDest = g.argv[2];
  zUuid = g.argv[3];
  if( iPrefixLength ){
    zFNameFormat = mprintf("%s/%%.%ds/%%s",zDest,iPrefixLength);
  }else{
    zFNameFormat = mprintf("%s/%%s",zDest);
  }
  cchFNamePrefix = strlen(zDest);
  zFile = mprintf(zFNameFormat /*works-like:"%s:%s"*/,
                  zUuid, zUuid+iPrefixLength);
  recon_set_hash_policy(cchFNamePrefix,zFile);
  if( saved_eHashPolicy!=-1 ){
    zHashPolicy = hpolicy_name();
  }
  recon_restore_hash_policy();
  fossil_print(
    "\nPath Name:   %s"
    "\nHash Policy: %s\n",
    zFile,zHashPolicy);
  free(zFile);
  free(zFNameFormat);
  zFNameFormat = 0;
  cchFNamePrefix = 0;
}
#endif

/*
** COMMAND: reconstruct*
**
** Usage: %fossil reconstruct ?OPTIONS? FILENAME DIRECTORY
**
** This command studies the artifacts (files) in DIRECTORY and
** reconstructs the fossil record from them. It places the new
** fossil repository in FILENAME. Subdirectories are read, files
** with leading '.' in the filename are ignored.
**
** Options:
**    -K|--keep-rid1    Read the filename of the artifact with
**                      RID=1 from the file .rid in DIRECTORY.
**
** See also: deconstruct, rebuild
*/
void reconstruct_cmd(void) {
  char *zPassword;
  fKeepRid1 = find_option("keep-rid1","K",0)!=0;
  if( g.argc!=4 ){
    usage("FILENAME DIRECTORY");
  }
  if( file_isdir(g.argv[3], ExtFILE)!=1 ){
    fossil_print("\"%s\" is not a directory\n\n", g.argv[3]);
    usage("FILENAME DIRECTORY");
  }
................................................................................
** writes all artifacts to the file system. The DESTINATION directory
** will be populated with subdirectories AA and files AA/BBBBBBBBB.., where
** AABBBBBBBBB.. is the 40+ character artifact ID, AA the first 2 characters.
** If -L|--prefixlength is given, the length (default 2) of the directory
** prefix can be set to 0,1,..,9 characters.
**
** Options:
**   -R|--repository REPOSITORY  Deconstruct given REPOSITORY.
**   -K|--keep-rid1              Save the filename of the artifact with RID=1 to
**                               the file .rid1 in the DESTINATION directory.
**   -L|--prefixlength N         Set the length of the names of the DESTINATION
**                               subdirectories to N.
**   --private                   Include private artifacts.
**
** See also: rebuild, reconstruct
*/
void deconstruct_cmd(void){

  const char *zPrefixOpt;
  Stmt        s;
  int privateFlag;

  fKeepRid1 = find_option("keep-rid1","K",0)!=0;
  /* get and check prefix length argument and build format string */
  zPrefixOpt=find_option("prefixlength","L",1);
  if( !zPrefixOpt ){
    prefixLength = 2;
  }else{
    if( zPrefixOpt[0]>='0' && zPrefixOpt[0]<='9' && !zPrefixOpt[1] ){
      prefixLength = (int)(*zPrefixOpt-'0');
................................................................................
  */
#endif
  if( prefixLength ){
    zFNameFormat = mprintf("%s/%%.%ds/%%s",zDestDir,prefixLength);
  }else{
    zFNameFormat = mprintf("%s/%%s",zDestDir);
  }
  cchFNamePrefix = strlen(zDestDir);

  bag_init(&bagDone);
  ttyOutput = 1;
  processCnt = 0;
  if (!g.fQuiet) {
    fossil_print("0 (0%%)...\r");
    fflush(stdout);